Skip to content

Commit de7bd0b

Browse files
authored
fix: each metric should have an expire time (#11838)
1 parent e3c904a commit de7bd0b

File tree

4 files changed

+157
-105
lines changed

4 files changed

+157
-105
lines changed

apisix/plugins/prometheus/exporter.lua

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -136,10 +136,14 @@ function _M.http_init(prometheus_enabled_in_stream)
136136
metric_prefix = attr.metric_prefix
137137
end
138138

139-
local exptime
140-
if attr and attr.expire then
141-
exptime = attr.expire
142-
end
139+
local status_metrics_exptime = core.table.try_read_attr(attr, "metrics",
140+
"http_status", "expire")
141+
local latency_metrics_exptime = core.table.try_read_attr(attr, "metrics",
142+
"http_latency", "expire")
143+
local bandwidth_metrics_exptime = core.table.try_read_attr(attr, "metrics",
144+
"bandwidth", "expire")
145+
local upstream_status_exptime = core.table.try_read_attr(attr, "metrics",
146+
"upstream_status", "expire")
143147

144148
prometheus = base_prometheus.init("prometheus-metrics", metric_prefix)
145149

@@ -172,7 +176,7 @@ function _M.http_init(prometheus_enabled_in_stream)
172176
metrics.upstream_status = prometheus:gauge("upstream_status",
173177
"Upstream status from health check",
174178
{"name", "ip", "port"},
175-
exptime)
179+
upstream_status_exptime)
176180

177181
-- per service
178182

@@ -183,7 +187,7 @@ function _M.http_init(prometheus_enabled_in_stream)
183187
"HTTP status codes per service in APISIX",
184188
{"code", "route", "matched_uri", "matched_host", "service", "consumer", "node",
185189
unpack(extra_labels("http_status"))},
186-
exptime)
190+
status_metrics_exptime)
187191

188192
local buckets = DEFAULT_BUCKETS
189193
if attr and attr.default_buckets then
@@ -193,12 +197,12 @@ function _M.http_init(prometheus_enabled_in_stream)
193197
metrics.latency = prometheus:histogram("http_latency",
194198
"HTTP request latency in milliseconds per service in APISIX",
195199
{"type", "route", "service", "consumer", "node", unpack(extra_labels("http_latency"))},
196-
buckets, exptime)
200+
buckets, latency_metrics_exptime)
197201

198202
metrics.bandwidth = prometheus:counter("bandwidth",
199203
"Total bandwidth in bytes consumed per service in APISIX",
200204
{"type", "route", "service", "consumer", "node", unpack(extra_labels("bandwidth"))},
201-
exptime)
205+
bandwidth_metrics_exptime)
202206

203207
if prometheus_enabled_in_stream then
204208
init_stream_metrics()

conf/config.yaml.example

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -596,25 +596,29 @@ plugin_attr: # Plugin attributes
596596
port: 9091 # Set the port.
597597
# metrics: # Create extra labels from nginx variables: https://nginx.org/en/docs/varindex.html
598598
# http_status:
599+
# expire: 0 # The expiration time after which metrics are removed. unit: second.
600+
# # 0 means the metrics will not expire
599601
# extra_labels:
600602
# - upstream_addr: $upstream_addr
601603
# - status: $upstream_status # The label name does not need to be the same as the variable name.
602604
# http_latency:
605+
# expire: 0 # The expiration time after which metrics are removed. unit: second.
606+
# # 0 means the metrics will not expire
603607
# extra_labels:
604608
# - upstream_addr: $upstream_addr
605609
# bandwidth:
610+
# expire: 0 # The expiration time after which metrics are removed. unit: second.
611+
# # 0 means the metrics will not expire
606612
# extra_labels:
607613
# - upstream_addr: $upstream_addr
614+
# upstream_status:
615+
# expire: 0 # The expiration time after which metrics are removed. unit: second.
608616
# default_buckets:
609617
# - 10
610618
# - 50
611619
# - 100
612620
# - 200
613621
# - 500
614-
# expire: 0 # The expiration time of metrics in seconds.
615-
# 0 means the metrics will not expire.
616-
# Only affect apisix_http_status, apisix_bandwidth, and apisix_http_latency.
617-
# If you need to set the expiration time, it is recommended to use 600, which is 10 minutes.
618622
server-info: # Plugin: server-info
619623
report_ttl: 60 # Set the TTL in seconds for server info in etcd.
620624
# Maximum: 86400. Minimum: 3.
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
BEGIN {
18+
if ($ENV{TEST_NGINX_CHECK_LEAK}) {
19+
$SkipReason = "unavailable for the hup tests";
20+
21+
} else {
22+
$ENV{TEST_NGINX_USE_HUP} = 1;
23+
undef $ENV{TEST_NGINX_USE_STAP};
24+
}
25+
}
26+
27+
use t::APISIX 'no_plan';
28+
29+
add_block_preprocessor(sub {
30+
my ($block) = @_;
31+
32+
if (!defined $block->request) {
33+
$block->set_value("request", "GET /t");
34+
}
35+
});
36+
37+
run_tests;
38+
39+
__DATA__
40+
41+
=== TEST 1: set route with prometheus ttl
42+
--- yaml_config
43+
plugin_attr:
44+
prometheus:
45+
default_buckets:
46+
- 15
47+
- 55
48+
- 105
49+
- 205
50+
- 505
51+
metrics:
52+
http_status:
53+
expire: 1
54+
http_latency:
55+
expire: 1
56+
bandwidth:
57+
expire: 1
58+
--- config
59+
location /t {
60+
content_by_lua_block {
61+
local t = require("lib.test_admin").test
62+
local code = t('/apisix/admin/routes/metrics',
63+
ngx.HTTP_PUT,
64+
[[{
65+
"plugins": {
66+
"public-api": {}
67+
},
68+
"uri": "/apisix/prometheus/metrics"
69+
}]]
70+
)
71+
if code >= 300 then
72+
ngx.status = code
73+
return
74+
end
75+
local code, body = t('/apisix/admin/routes/1',
76+
ngx.HTTP_PUT,
77+
[[{
78+
"plugins": {
79+
"prometheus": {}
80+
},
81+
"upstream": {
82+
"nodes": {
83+
"127.0.0.1:1980": 1
84+
},
85+
"type": "roundrobin"
86+
},
87+
"uri": "/hello1"
88+
}]]
89+
)
90+
if code >= 300 then
91+
ngx.status = code
92+
ngx.say(body)
93+
return
94+
end
95+
local code, body = t('/hello1',
96+
ngx.HTTP_GET,
97+
"",
98+
nil,
99+
nil
100+
)
101+
if code >= 300 then
102+
ngx.status = code
103+
ngx.say(body)
104+
return
105+
end
106+
ngx.sleep(2)
107+
local code, pass, body = t('/apisix/prometheus/metrics',
108+
ngx.HTTP_GET,
109+
"",
110+
nil,
111+
nil
112+
)
113+
114+
local metrics_to_check = {"apisix_bandwidth", "http_latency", "http_status",}
115+
116+
-- verify that above mentioned metrics are not in the metrics response
117+
for _, v in pairs(metrics_to_check) do
118+
local match, err = ngx.re.match(body, "\\b" .. v .. "\\b", "m")
119+
if match then
120+
ngx.status = 500
121+
ngx.say("error found " .. v .. " in metrics")
122+
return
123+
end
124+
end
125+
126+
ngx.say("passed")
127+
}
128+
}
129+
--- request
130+
GET /t
131+
--- response_body
132+
passed

t/plugin/prometheus4.t

Lines changed: 5 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -192,95 +192,7 @@ apisix_http_latency_bucket\{type="upstream",route="1",service="",consumer="",nod
192192
193193
194194
195-
=== TEST 9: set route with prometheus ttl
196-
--- yaml_config
197-
plugin_attr:
198-
prometheus:
199-
default_buckets:
200-
- 15
201-
- 55
202-
- 105
203-
- 205
204-
- 505
205-
expire: 1
206-
--- config
207-
location /t {
208-
content_by_lua_block {
209-
local t = require("lib.test_admin").test
210-
211-
local code = t('/apisix/admin/routes/metrics',
212-
ngx.HTTP_PUT,
213-
[[{
214-
"plugins": {
215-
"public-api": {}
216-
},
217-
"uri": "/apisix/prometheus/metrics"
218-
}]]
219-
)
220-
if code >= 300 then
221-
ngx.status = code
222-
return
223-
end
224-
225-
local code, body = t('/apisix/admin/routes/1',
226-
ngx.HTTP_PUT,
227-
[[{
228-
"plugins": {
229-
"prometheus": {}
230-
},
231-
"upstream": {
232-
"nodes": {
233-
"127.0.0.1:1980": 1
234-
},
235-
"type": "roundrobin"
236-
},
237-
"uri": "/hello1"
238-
}]]
239-
)
240-
241-
if code >= 300 then
242-
ngx.status = code
243-
ngx.say(body)
244-
return
245-
end
246-
247-
local code, body = t('/hello1',
248-
ngx.HTTP_GET,
249-
"",
250-
nil,
251-
nil
252-
)
253-
254-
if code >= 300 then
255-
ngx.status = code
256-
ngx.say(body)
257-
return
258-
end
259-
260-
ngx.sleep(2)
261-
262-
local code, pass, body = t('/apisix/prometheus/metrics',
263-
ngx.HTTP_GET,
264-
"",
265-
nil,
266-
nil
267-
)
268-
ngx.status = code
269-
ngx.say(body)
270-
}
271-
}
272-
--- request
273-
GET /t
274-
--- response_body_unlike eval
275-
qr/apisix_http_latency_bucket\{type="upstream",route="1",service="",consumer="",node="127.0.0.1",le="15"\} \d+
276-
apisix_http_latency_bucket\{type="upstream",route="1",service="",consumer="",node="127.0.0.1",le="55"\} \d+
277-
apisix_http_latency_bucket\{type="upstream",route="1",service="",consumer="",node="127.0.0.1",le="105"\} \d+
278-
apisix_http_latency_bucket\{type="upstream",route="1",service="",consumer="",node="127.0.0.1",le="205"\} \d+
279-
apisix_http_latency_bucket\{type="upstream",route="1",service="",consumer="",node="127.0.0.1",le="505"\} \d+/
280-
281-
282-
283-
=== TEST 10: set sys plugins
195+
=== TEST 9: set sys plugins
284196
--- config
285197
location /t {
286198
content_by_lua_block {
@@ -332,7 +244,7 @@ passed
332244
333245
334246
335-
=== TEST 11: remove prometheus -> reload -> send batch request -> add prometheus for next tests
247+
=== TEST 10: remove prometheus -> reload -> send batch request -> add prometheus for next tests
336248
--- yaml_config
337249
deployment:
338250
role: traditional
@@ -366,7 +278,7 @@ qr/404 Not Found/
366278
367279
368280
369-
=== TEST 12: fetch prometheus metrics -> batch_process_entries metrics should not be present
281+
=== TEST 11: fetch prometheus metrics -> batch_process_entries metrics should not be present
370282
--- yaml_config
371283
deployment:
372284
role: traditional
@@ -387,14 +299,14 @@ qr/apisix_batch_process_entries\{name="sys-logger",route_id="9",server_addr="127
387299
388300
389301
390-
=== TEST 13: hit batch-process-metrics with prometheus enabled from TEST 11
302+
=== TEST 12: hit batch-process-metrics with prometheus enabled from TEST 11
391303
--- request
392304
GET /batch-process-metrics
393305
--- error_code: 404
394306
395307
396308
397-
=== TEST 14: batch_process_entries metrics should be present now
309+
=== TEST 13: batch_process_entries metrics should be present now
398310
--- request
399311
GET /apisix/prometheus/metrics
400312
--- error_code: 200

0 commit comments

Comments
 (0)