@@ -3152,3 +3152,55 @@ tests:
31523152 exp_annotations :
31533153 summary : " The average write latency over the last 5 mins has reached 20 ms or more on node-1"
31543154 description : " High latencies may indicate a constraint within the cluster e.g. CPU, network. Please investigate"
3155+
3156+ # NVMeoFHostKeepAliveTimeout
3157+ - interval : 1h
3158+ input_series :
3159+ - series : ' ceph_nvmeof_host_keepalive_timeout{gw_name="client.nvmeof.a", host_nqn="nqn.1", instance="node-1:10008", nqn="nqn.2016-06.io.spdk:cnode1.mygroup"}'
3160+ values : ' 0 0 0 0 1 0 1 0 1 1 0x14 1 1 0x3'
3161+ - series : ' ceph_nvmeof_host_keepalive_timeout{gw_name="client.nvmeof.a", host_nqn="nqn.2", instance="node-1:10008", nqn="nqn.2016-06.io.spdk:cnode1.mygroup"}'
3162+ values : ' 0 1 1 0 0 0 0 0 0 0 0x19'
3163+ promql_expr_test :
3164+ - expr : ceil(changes(ceph_nvmeof_host_keepalive_timeout[24h:]) / 2) > 0
3165+ eval_time : 2h
3166+ exp_samples :
3167+ - labels : ' {gw_name="client.nvmeof.a", host_nqn="nqn.2", instance="node-1:10008", nqn="nqn.2016-06.io.spdk:cnode1.mygroup"}'
3168+ value : 1
3169+ - expr : ceil(changes(ceph_nvmeof_host_keepalive_timeout[24h:]) / 2) > 0
3170+ eval_time : 8h
3171+ exp_samples :
3172+ - labels : ' {gw_name="client.nvmeof.a", host_nqn="nqn.1", instance="node-1:10008", nqn="nqn.2016-06.io.spdk:cnode1.mygroup"}'
3173+ value : 3
3174+ - labels : ' {gw_name="client.nvmeof.a", host_nqn="nqn.2", instance="node-1:10008", nqn="nqn.2016-06.io.spdk:cnode1.mygroup"}'
3175+ value : 1
3176+ - expr : ceil(changes(ceph_nvmeof_host_keepalive_timeout[24h:]) / 2) > 0
3177+ eval_time : 29h
3178+ exp_samples :
3179+ - labels : ' {gw_name="client.nvmeof.a", host_nqn="nqn.1", instance="node-1:10008", nqn="nqn.2016-06.io.spdk:cnode1.mygroup"}'
3180+ value : 3
3181+ alert_rule_test :
3182+ - eval_time : 1h
3183+ alertname : NVMeoFHostKeepAliveTimeout
3184+ - eval_time : 12h
3185+ alertname : NVMeoFHostKeepAliveTimeout
3186+ exp_alerts :
3187+ - exp_labels :
3188+ gw_name : client.nvmeof.a
3189+ host_nqn : nqn.1
3190+ instance : node-1:10008
3191+ nqn : nqn.2016-06.io.spdk:cnode1.mygroup
3192+ severity : warning
3193+ type : ceph_default
3194+ exp_annotations :
3195+ summary : " Host (nqn.1) was disconnected 3 times from subsystem (nqn.2016-06.io.spdk:cnode1.mygroup) in last 24 hours"
3196+ description : " Host was disconnected due to host keep alive timeout"
3197+ - exp_labels :
3198+ gw_name : client.nvmeof.a
3199+ host_nqn : nqn.2
3200+ instance : node-1:10008
3201+ nqn : nqn.2016-06.io.spdk:cnode1.mygroup
3202+ severity : warning
3203+ type : ceph_default
3204+ exp_annotations :
3205+ summary : " Host (nqn.2) was disconnected 1 times from subsystem (nqn.2016-06.io.spdk:cnode1.mygroup) in last 24 hours"
3206+ description : " Host was disconnected due to host keep alive timeout"
0 commit comments