Skip to content

Commit d0d4ae9

Browse files
authored
Merge pull request ceph#60768 from VallariAg/wip-prometheus-tested-limits
monitoring: Add alert NVMeoFTooManyNamespaces Reviewed-by: Afreen Misbah <[email protected]>
2 parents 202d87a + 614e146 commit d0d4ae9

File tree

4 files changed

+291
-5
lines changed

4 files changed

+291
-5
lines changed

monitoring/ceph-mixin/config.libsonnet

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
NVMeoFMaxGatewaysPerGroup: 4,
1313
NVMeoFMaxGatewaysPerCluster: 4,
1414
NVMeoFHighGatewayCPU: 80,
15-
NVMeoFMaxSubsystemsPerGateway: 16,
15+
NVMeoFMaxSubsystemsPerGateway: 128,
16+
NVMeoFMaxNamespaces: 1024,
1617
NVMeoFHighClientCount: 32,
1718
NVMeoFHighHostCPU: 80,
1819
//

monitoring/ceph-mixin/prometheus_alerts.libsonnet

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -908,13 +908,23 @@
908908
{
909909
alert: 'NVMeoFTooManySubsystems',
910910
'for': '1m',
911-
expr: 'count by(gateway_host, cluster) (label_replace(ceph_nvmeof_subsystem_metadata,"gateway_host","$1","instance","(.*):.*")) > %.2f' % [$._config.NVMeoFMaxSubsystemsPerGateway],
911+
expr: 'count by(gateway_host, cluster) (label_replace(ceph_nvmeof_subsystem_metadata,"gateway_host","$1","instance","(.*?)(?::.*)?")) > %.2f' % [$._config.NVMeoFMaxSubsystemsPerGateway],
912912
labels: { severity: 'warning', type: 'ceph_default' },
913913
annotations: {
914914
summary: 'The number of subsystems defined to the gateway exceeds supported values%(cluster)s' % $.MultiClusterSummary(),
915915
description: 'Although you may continue to create subsystems in {{ $labels.gateway_host }}, the configuration may not be supported',
916916
},
917917
},
918+
{
919+
alert: 'NVMeoFTooManyNamespaces',
920+
'for': '1m',
921+
expr: 'sum by(gateway_host, cluster) (label_replace(ceph_nvmeof_subsystem_namespace_count,"gateway_host","$1","instance","(.*?)(?::.*)?")) > %.2f' % [$._config.NVMeoFMaxNamespaces],
922+
labels: { severity: 'warning', type: 'ceph_default' },
923+
annotations: {
924+
summary: 'The number of namespaces defined to the gateway exceeds supported values%(cluster)s' % $.MultiClusterSummary(),
925+
description: 'Although you may continue to create namespaces in {{ $labels.gateway_host }}, the configuration may not be supported',
926+
},
927+
},
918928
{
919929
alert: 'NVMeoFVersionMismatch',
920930
'for': '1h',

monitoring/ceph-mixin/prometheus_alerts.yml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -814,7 +814,16 @@ groups:
814814
annotations:
815815
description: "Although you may continue to create subsystems in {{ $labels.gateway_host }}, the configuration may not be supported"
816816
summary: "The number of subsystems defined to the gateway exceeds supported values on cluster {{ $labels.cluster }}"
817-
expr: "count by(gateway_host, cluster) (label_replace(ceph_nvmeof_subsystem_metadata,\"gateway_host\",\"$1\",\"instance\",\"(.*):.*\")) > 16.00"
817+
expr: "count by(gateway_host, cluster) (label_replace(ceph_nvmeof_subsystem_metadata,\"gateway_host\",\"$1\",\"instance\",\"(.*?)(?::.*)?\")) > 128.00"
818+
for: "1m"
819+
labels:
820+
severity: "warning"
821+
type: "ceph_default"
822+
- alert: "NVMeoFTooManyNamespaces"
823+
annotations:
824+
description: "Although you may continue to create namespaces in {{ $labels.gateway_host }}, the configuration may not be supported"
825+
summary: "The number of namespaces defined to the gateway exceeds supported values on cluster {{ $labels.cluster }}"
826+
expr: "sum by(gateway_host, cluster) (label_replace(ceph_nvmeof_subsystem_namespace_count,\"gateway_host\",\"$1\",\"instance\",\"(.*?)(?::.*)?\")) > 1024.00"
818827
for: "1m"
819828
labels:
820829
severity: "warning"

0 commit comments

Comments
 (0)