@@ -14,8 +14,8 @@ groups:
1414
1515- name : tarantool-common
1616 rules :
17- # Warning for any instance that uses too Lua runtime memory.
18- - alert : LuaRuntimeWarning
17+ # Warning for any instance that uses too much Lua runtime memory.
18+ - alert : HighLuaMemoryWarning
1919 expr : tnt_info_memory_lua >= (512 * 1024 * 1024)
2020 for : 1m
2121 labels :
@@ -25,8 +25,8 @@ groups:
2525 description : " '{{ $labels.alias }}' instance of job '{{ $labels.job }}' uses too much Lua memory
2626 and may hit threshold soon."
2727
28- # Alert for any instance that uses too Lua runtime memory.
29- - alert : LuaRuntimeAlert
28+ # Alert for any instance that uses too much Lua runtime memory.
29+ - alert : HighLuaMemory
3030 expr : tnt_info_memory_lua >= (1024 * 1024 * 1024)
3131 for : 1m
3232 labels :
@@ -37,7 +37,7 @@ groups:
3737 and likely to hit threshold soon."
3838
3939 # Warning for any instance that have low remaining arena memory.
40- - alert : MemtxArenaWarning
40+ - alert : LowMemtxArenaRemainingWarning
4141 expr : (tnt_slab_quota_used_ratio >= 80) and (tnt_slab_arena_used_ratio >= 80)
4242 for : 1m
4343 labels :
@@ -48,7 +48,7 @@ groups:
4848 Consider increasing memtx_memory or number of storages in case of sharded data."
4949
5050 # Alert for any instance that have low remaining arena memory.
51- - alert : MemtxArenaAlert
51+ - alert : LowMemtxArenaRemaining
5252 expr : (tnt_slab_quota_used_ratio >= 90) and (tnt_slab_arena_used_ratio >= 90)
5353 for : 1m
5454 labels :
@@ -60,7 +60,7 @@ groups:
6060 It is strongly recommended to increase memtx_memory or number of storages in case of sharded data."
6161
6262 # Warning for any instance that have low remaining items memory.
63- - alert : MemtxItemsWarning
63+ - alert : LowMemtxItemsRemainingWarning
6464 expr : (tnt_slab_quota_used_ratio >= 80) and (tnt_slab_items_used_ratio >= 80)
6565 for : 1m
6666 labels :
@@ -71,7 +71,7 @@ groups:
7171 Consider increasing memtx_memory or number of storages in case of sharded data."
7272
7373 # Alert for any instance that have low remaining arena memory.
74- - alert : MemtxItemsAlert
74+ - alert : LowMemtxItemsRemaining
7575 expr : (tnt_slab_quota_used_ratio >= 90) and (tnt_slab_items_used_ratio >= 90)
7676 for : 1m
7777 labels :
@@ -178,7 +178,7 @@ groups:
178178- name : tarantool-crud
179179 rules :
180180 # Alert for CRUD module request errors.
181- - alert : CRUDHighErrorRate
181+ - alert : HighCRUDErrorRate
182182 expr : rate(tnt_crud_stats_count{ job="tarantool", status="error" }[5m]) > 0.1
183183 for : 1m
184184 labels :
@@ -189,7 +189,7 @@ groups:
189189 '{{ $labels.alias }}' instance of job '{{ $labels.job }}' get module error responses."
190190
191191 # Warning for CRUD module requests too long responses.
192- - alert : CRUDHighLatency
192+ - alert : HighCRUDLatency
193193 expr : tnt_crud_stats{ job="tarantool", quantile="0.99" } > 0.1
194194 for : 1m
195195 labels :
@@ -200,7 +200,7 @@ groups:
200200 '{{ $labels.alias }}' instance of job '{{ $labels.job }}' are processed too long."
201201
202202 # Warning for too many map reduce CRUD module requests.
203- - alert : CRUDHighMapReduceRate
203+ - alert : HighCRUDMapReduceRate
204204 expr : rate(tnt_crud_map_reduces{ job="tarantool" }[5m]) > 0.1
205205 for : 1m
206206 labels :
@@ -218,7 +218,7 @@ groups:
218218 # Beware that metric name depends on name of the collector you use in HTTP metrics middleware
219219 # and request depends on type of this collector.
220220 # This example based on summary collector with default name.
221- - alert : HTTPHighLatency
221+ - alert : HighHTTPLatency
222222 expr : http_server_request_latency{ job="tarantool", quantile="0.99" } > 0.1
223223 for : 5m
224224 labels :
@@ -228,43 +228,43 @@ groups:
228228 description : " Some {{ $labels.method }} requests to {{ $labels.path }} path with {{ $labels.status }} response status
229229 on '{{ $labels.alias }}' instance of job '{{ $labels.job }}' are processed too long."
230230
231- # Warning for any endpoint of an instance in tarantool job that sends too much 4xx responses.
231+ # Alert for any endpoint of an instance in tarantool job that sends too much 4xx responses.
232232 # Beware that metric name depends on name of the collector you use in HTTP metrics middleware
233233 # and request depends on type of this collector.
234234 # This example based on summary collector with default name.
235- - alert : HTTPHighClientErrorRateInstance
235+ - alert : HighInstanceHTTPClientErrorRate
236236 expr : sum by (job, instance, method, path, alias) (rate(http_server_request_latency_count{ job="tarantool", status=~"^4\\d{2}$" }[5m])) > 10
237237 for : 1m
238238 labels :
239- severity : warning
239+ severity : page
240240 annotations :
241241 summary : " Instance '{{ $labels.alias }}' ('{{ $labels.job }}') high rate of client error responses"
242242 description : " Too many {{ $labels.method }} requests to {{ $labels.path }} path
243243 on '{{ $labels.alias }}' instance of job '{{ $labels.job }}' get client error (4xx) responses."
244244
245- # Warning for any endpoint in tarantool job that sends too much 4xx responses (cluster overall).
245+ # Alert for any endpoint in tarantool job that sends too much 4xx responses (cluster overall).
246246 # Beware that metric name depends on name of the collector you use in HTTP metrics middleware
247247 # and request depends on type of this collector.
248248 # This example based on summary collector with default name.
249- - alert : HTTPHighClientErrorRate
249+ - alert : HighHTTPClientErrorRate
250250 expr : sum by (job, method, path) (rate(http_server_request_latency_count{ job="tarantool", status=~"^4\\d{2}$" }[5m])) > 20
251251 for : 1m
252252 labels :
253- severity : warning
253+ severity : page
254254 annotations :
255255 summary : " Job '{{ $labels.job }}' high rate of client error responses"
256256 description : " Too many {{ $labels.method }} requests to {{ $labels.path }} path
257257 on instances of job '{{ $labels.job }}' get client error (4xx) responses."
258258
259- # Warning for any endpoint of an instance in tarantool job that sends 5xx responses.
259+ # Alert for any endpoint of an instance in tarantool job that sends 5xx responses.
260260 # Beware that metric name depends on name of the collector you use in HTTP metrics middleware
261261 # and request depends on type of this collector.
262262 # This example based on summary collector with default name.
263- - alert : HTTPServerErrors
263+ - alert : HighHTTPServerErrorRate
264264 expr : sum by (job, instance, method, path, alias) (rate(http_server_request_latency_count{ job="tarantool", status=~"^5\\d{2}$" }[5m])) > 0
265265 for : 1m
266266 labels :
267- severity : warning
267+ severity : page
268268 annotations :
269269 summary : " Instance '{{ $labels.alias }}' ('{{ $labels.job }}') server error responses"
270270 description : " Some {{ $labels.method }} requests to {{ $labels.path }} path
@@ -274,7 +274,7 @@ groups:
274274 # Beware that metric name depends on name of the collector you use in HTTP metrics middleware
275275 # and request depends on type of this collector.
276276 # This example based on summary collector with default name.
277- - alert : HTTPLowRequestRateRouter
277+ - alert : LowRouterHTTPRequestRate
278278 expr : sum by (job, instance, alias) (rate(http_server_request_latency_count{ job="tarantool", alias=~"^.*router.*$" }[5m])) < 10
279279 for : 5m
280280 labels :
0 commit comments