Skip to content

Commit 4dc89d9

Browse files
authored
Merge pull request ceph#64031 from VallariAg/wip-71723-tentacle
tentacle: monitoring: Fix NVMeoF subsys/namespace limit alerts Reviewed-by: Aashish Sharma <[email protected]>
2 parents 87ddaa0 + e0c016c commit 4dc89d9

File tree

3 files changed

+16
-16
lines changed

3 files changed

+16
-16
lines changed

monitoring/ceph-mixin/prometheus_alerts.libsonnet

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -928,21 +928,21 @@
928928
{
929929
alert: 'NVMeoFTooManySubsystems',
930930
'for': '1m',
931-
expr: 'count by(gateway_host, cluster) (label_replace(ceph_nvmeof_subsystem_metadata,"gateway_host","$1","instance","(.*?)(?::.*)?")) > %.2f' % [$._config.NVMeoFMaxSubsystemsPerGateway],
931+
expr: 'count by(gateway_host, cluster) (label_replace(ceph_nvmeof_subsystem_metadata,"gateway_host","$1","instance","(.*?)(?::.*)?")) >= %.2f' % [$._config.NVMeoFMaxSubsystemsPerGateway],
932932
labels: { severity: 'warning', type: 'ceph_default' },
933933
annotations: {
934-
summary: 'The number of subsystems defined to the gateway exceeds supported values%(cluster)s' % $.MultiClusterSummary(),
935-
description: 'Although you may continue to create subsystems in {{ $labels.gateway_host }}, the configuration may not be supported',
934+
summary: 'The number of subsystems defined to the NVMeoF gateway reached or exceeded the supported values%(cluster)s' % $.MultiClusterSummary(),
935+
description: 'NVMeoF gateway {{ $labels.gateway_host }} has reached or exceeded the supported maximum of %(NVMeoFMaxSubsystemsPerGateway)d subsystems. Current count: {{ $value }}.' % $._config,
936936
},
937937
},
938938
{
939939
alert: 'NVMeoFTooManyNamespaces',
940940
'for': '1m',
941-
expr: 'sum by(gateway_host, cluster) (label_replace(ceph_nvmeof_subsystem_namespace_count,"gateway_host","$1","instance","(.*?)(?::.*)?")) > %.2f' % [$._config.NVMeoFMaxNamespaces],
941+
expr: 'sum by(gateway_host, cluster) (label_replace(ceph_nvmeof_subsystem_namespace_count,"gateway_host","$1","instance","(.*?)(?::.*)?")) >= %.2f' % [$._config.NVMeoFMaxNamespaces],
942942
labels: { severity: 'warning', type: 'ceph_default' },
943943
annotations: {
944-
summary: 'The number of namespaces defined to the gateway exceeds supported values%(cluster)s' % $.MultiClusterSummary(),
945-
description: 'Although you may continue to create namespaces in {{ $labels.gateway_host }}, the configuration may not be supported',
944+
summary: 'The number of namespaces defined to the NVMeoF gateway reached or exceeded supported values%(cluster)s' % $.MultiClusterSummary(),
945+
description: 'NVMeoF gateway {{ $labels.gateway_host }} has reached or exceeded the supported maximum of %(NVMeoFMaxNamespaces)d namespaces. Current count: {{ $value }}.' % $._config,
946946
},
947947
},
948948
{

monitoring/ceph-mixin/prometheus_alerts.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -830,18 +830,18 @@ groups:
830830
type: "ceph_default"
831831
- alert: "NVMeoFTooManySubsystems"
832832
annotations:
833-
description: "Although you may continue to create subsystems in {{ $labels.gateway_host }}, the configuration may not be supported"
834-
summary: "The number of subsystems defined to the gateway exceeds supported values on cluster {{ $labels.cluster }}"
835-
expr: "count by(gateway_host, cluster) (label_replace(ceph_nvmeof_subsystem_metadata,\"gateway_host\",\"$1\",\"instance\",\"(.*?)(?::.*)?\")) > 128.00"
833+
description: "NVMeoF gateway {{ $labels.gateway_host }} has reached or exceeded the supported maximum of 128 subsystems. Current count: {{ $value }}."
834+
summary: "The number of subsystems defined to the NVMeoF gateway reached or exceeded the supported values on cluster {{ $labels.cluster }}"
835+
expr: "count by(gateway_host, cluster) (label_replace(ceph_nvmeof_subsystem_metadata,\"gateway_host\",\"$1\",\"instance\",\"(.*?)(?::.*)?\")) >= 128.00"
836836
for: "1m"
837837
labels:
838838
severity: "warning"
839839
type: "ceph_default"
840840
- alert: "NVMeoFTooManyNamespaces"
841841
annotations:
842-
description: "Although you may continue to create namespaces in {{ $labels.gateway_host }}, the configuration may not be supported"
843-
summary: "The number of namespaces defined to the gateway exceeds supported values on cluster {{ $labels.cluster }}"
844-
expr: "sum by(gateway_host, cluster) (label_replace(ceph_nvmeof_subsystem_namespace_count,\"gateway_host\",\"$1\",\"instance\",\"(.*?)(?::.*)?\")) > 2048.00"
842+
description: "NVMeoF gateway {{ $labels.gateway_host }} has reached or exceeded the supported maximum of 2048 namespaces. Current count: {{ $value }}."
843+
summary: "The number of namespaces defined to the NVMeoF gateway reached or exceeded supported values on cluster {{ $labels.cluster }}"
844+
expr: "sum by(gateway_host, cluster) (label_replace(ceph_nvmeof_subsystem_namespace_count,\"gateway_host\",\"$1\",\"instance\",\"(.*?)(?::.*)?\")) >= 2048.00"
845845
for: "1m"
846846
labels:
847847
severity: "warning"

monitoring/ceph-mixin/tests_alerts/test_alerts.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2838,8 +2838,8 @@ tests:
28382838
cluster: mycluster
28392839
type: ceph_default
28402840
exp_annotations:
2841-
summary: "The number of subsystems defined to the gateway exceeds supported values on cluster mycluster"
2842-
description: "Although you may continue to create subsystems in node-1, the configuration may not be supported"
2841+
summary: "The number of subsystems defined to the NVMeoF gateway reached or exceeded the supported values on cluster mycluster"
2842+
description: "NVMeoF gateway node-1 has reached or exceeded the supported maximum of 128 subsystems. Current count: 129."
28432843

28442844
# NVMeoFTooManyNamespaces
28452845
- interval: 1m
@@ -2882,8 +2882,8 @@ tests:
28822882
cluster: mycluster
28832883
type: ceph_default
28842884
exp_annotations:
2885-
summary: "The number of namespaces defined to the gateway exceeds supported values on cluster mycluster"
2886-
description: "Although you may continue to create namespaces in node-1, the configuration may not be supported"
2885+
summary: "The number of namespaces defined to the NVMeoF gateway reached or exceeded supported values on cluster mycluster"
2886+
description: "NVMeoF gateway node-1 has reached or exceeded the supported maximum of 2048 namespaces. Current count: 2200."
28872887

28882888
# NVMeoFVersionMismatch
28892889
- interval: 1m

0 commit comments

Comments
 (0)