Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions ibm-mq-mixin/alerts.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
{
new(this):
{
groups+: [
{
name: 'ibm-mq-alerts',
rules: [
{
alert: 'IBMMQExpiredMessages',
expr: |||
sum without (description,hostname,instance,job,platform) (ibmmq_qmgr_expired_message_count{%(filteringSelector)s}) > %(alertsExpiredMessages)s
||| % this.config,
'for': '5m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'There are expired messages, which imply that application resilience is failing.',
description:
(
'The number of expired messages in the {{$labels.qmgr}} is {{$labels.value}} which is above the threshold of %(alertsExpiredMessages)s.'
) % this.config,
},
},
{
alert: 'IBMMQStaleMessages',
expr: |||
sum without (description,instance,job,platform) (ibmmq_queue_oldest_message_age{%(filteringSelector)s}) >= %(alertsStaleMessagesSeconds)s
||| % this.config,
'for': '5m',
labels: {
severity: 'warning',
},
annotations: {
summary: 'Stale messages have been detected.',
description:
(
'A stale message with an age of {{$labels.value}} has been sitting in the {{$labels.queue}} which is above the threshold of %(alertsStaleMessagesSeconds)ss.'
) % this.config,
},
},
{
alert: 'IBMMQLowDiskSpace',
expr: |||
sum without (description,hostname,instance,job,platform) (ibmmq_qmgr_queue_manager_file_system_free_space_percentage{%(filteringSelector)s}) <= %(alertsLowDiskSpace)s
||| % this.config,
'for': '5m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'There is limited disk available for a queue manager.',
description:
(
'The amount of disk space available for {{$labels.qmgr}} is at {{$labels.value}}%% which is below the threshold of %(alertsLowDiskSpace)s%%.'
) % this.config,
},
},
{
alert: 'IBMMQHighQueueManagerCpuUsage',
expr: |||
sum without (description,hostname,instance,job,platform) (ibmmq_qmgr_user_cpu_time_estimate_for_queue_manager_percentage{%(filteringSelector)s}) >= %(alertsHighQueueManagerCpuUsage)s
||| % this.config,
'for': '5m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'There is a high CPU usage estimate for a queue manager.',
description:
(
'The amount of CPU usage for the queue manager {{$labels.qmgr}} is at {{$labels.value}}%% which is above the threshold of %(alertsHighQueueManagerCpuUsage)s%%.'
) % this.config,
},
},
],
},
],
},
}
79 changes: 0 additions & 79 deletions ibm-mq-mixin/alerts/alerts.libsonnet

This file was deleted.

47 changes: 32 additions & 15 deletions ibm-mq-mixin/config.libsonnet
Original file line number Diff line number Diff line change
@@ -1,20 +1,37 @@
{
_config+:: {
enableMultiCluster: false,
ibmmqSelector: if self.enableMultiCluster then 'job=~"$job", cluster=~"$cluster"' else 'job=~"$job"',
dashboardTags: ['ibm-mq-mixin'],
dashboardPeriod: 'now-1h',
dashboardTimezone: 'default',
dashboardRefresh: '1m',
logExpression: if self.enableMultiCluster then 'job=~"$job", cluster=~"$cluster", qmgr=~"$qmgr"'
else 'job=~"$job", qmgr=~"$qmgr"',
local this = self,
filteringSelector: 'job="integrations/ibm-mq"',
groupLabels: ['job', 'cluster', 'mq_cluster'],
instanceLabels: ['instance', 'qmgr'],
uid: 'ibm-mq',

//alerts thresholds
alertsExpiredMessages: 2, //count
alertsStaleMessagesSeconds: 300, //seconds
alertsLowDiskSpace: 5, //percentage: 0-100
alertsHighQueueManagerCpuUsage: 85, //percentage: 0-100
dashboardNamePrefix: 'IBM MQ',
dashboardTags: ['ibm-mq-mixin'],
dashboardPeriod: 'now-1h',
dashboardTimezone: 'default',
dashboardRefresh: '1m',

enableLokiLogs: true,
// Data source configuration
metricsSource: 'prometheus',
enableLokiLogs: true,
logLabels: this.groupLabels,
extraLogLabels: [],
logsVolumeGroupBy: 'level',
showLogsVolume: true,

// Alerts configuration
alertsExpiredMessages: 2, //count
alertsStaleMessagesSeconds: 300, //seconds
alertsLowDiskSpace: 5, //percentage: 0-100
alertsHighQueueManagerCpuUsage: 85, //percentage: 0-100

// Multi-cluster support (for backward compatibility)
enableMultiCluster: false,

signals+: {
cluster: (import './signals/cluster.libsonnet')(this),
queueManager: (import './signals/queue-manager.libsonnet')(this),
queue: (import './signals/queue.libsonnet')(this),
topic: (import './signals/topics.libsonnet')(this),
},
}
170 changes: 170 additions & 0 deletions ibm-mq-mixin/dashboards.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
local g = import './g.libsonnet';
local logslib = import 'logs-lib/logs/main.libsonnet';

{
local root = self,

new(this)::
local prefix = this.config.dashboardNamePrefix;
local links = this.grafana.links;
local tags = this.config.dashboardTags;
local uid = g.util.string.slugify(this.config.uid);
local vars = this.grafana.variables;
local annotations = this.grafana.annotations;
local refresh = this.config.dashboardRefresh;
local period = this.config.dashboardPeriod;
local timezone = this.config.dashboardTimezone;
{
'ibm-mq-cluster-overview.json':
g.dashboard.new(prefix + ' cluster overview')
+ g.dashboard.withPanels(
g.util.panel.resolveCollapsedFlagOnRows(
g.util.grid.wrapPanels(
[
this.grafana.rows.clusterOverview,
]
),
)
) + root.applyCommon(
vars.multiInstance,
uid + '-cluster-overview',
tags,
links { ibmMqClusterOverview+:: {} },
annotations,
timezone,
refresh,
period
),

'ibm-mq-queue-manager-overview.json':
g.dashboard.new(prefix + ' queue manager overview')
+ g.dashboard.withPanels(
g.util.panel.resolveCollapsedFlagOnRows(
g.util.grid.wrapPanels(
[
this.grafana.rows.queueManagerOverview,
this.grafana.rows.queueManagerLogs,
]
),
)
) + root.applyCommon(
vars.multiInstance,
uid + '-queue-manager-overview',
tags,
links { ibmMqQueueManagerOverview+:: {} },
annotations,
timezone,
refresh,
period
),

'ibm-mq-queue-overview.json':
g.dashboard.new(prefix + ' queue overview')
+ g.dashboard.withPanels(
g.util.panel.resolveCollapsedFlagOnRows(
g.util.grid.wrapPanels(
[
this.grafana.rows.queueOverview,
]
),
)
) + root.applyCommon(
vars.multiInstance + [
g.dashboard.variable.query.new('queue')
+ g.dashboard.variable.custom.generalOptions.withLabel('Queue')
+ g.dashboard.variable.custom.selectionOptions.withMulti(true)
+ g.dashboard.variable.query.queryTypes.withLabelValues(label='queue', metric='ibmmq_queue_average_queue_time_seconds')
+ g.dashboard.variable.query.withDatasourceFromVariable(variable=vars.datasources.prometheus)
+ g.dashboard.variable.custom.selectionOptions.withIncludeAll(true, '.+')
+ g.dashboard.variable.query.refresh.onTime(),
],
uid + '-queue-overview',
tags,
links { ibmMqQueueOverview+:: {} },
annotations,
timezone,
refresh,
period
),

'ibm-mq-topics-overview.json':
g.dashboard.new(prefix + ' topics overview')
+ g.dashboard.withPanels(
g.util.panel.resolveCollapsedFlagOnRows(
g.util.grid.wrapPanels(
[
this.grafana.rows.topicsRow,
this.grafana.rows.subscriptionsRow,
]
),
)
) + root.applyCommon(
vars.multiInstance + [
g.dashboard.variable.query.new('topic')
+ g.dashboard.variable.custom.generalOptions.withLabel('Topic')
+ g.dashboard.variable.custom.selectionOptions.withMulti(true)
+ g.dashboard.variable.query.queryTypes.withLabelValues(label='topic', metric='ibmmq_topic_subscriber_count{qmgr=~"$qmgr",topic!~"SYSTEM.*|\\\\$SYS.*|"}')
+ g.dashboard.variable.query.withDatasourceFromVariable(variable=vars.datasources.prometheus)
+ g.dashboard.variable.custom.selectionOptions.withIncludeAll(true, '.+')
+ g.dashboard.variable.query.refresh.onTime(),

g.dashboard.variable.query.new('subscription')
+ g.dashboard.variable.custom.generalOptions.withLabel('Subscription')
+ g.dashboard.variable.custom.selectionOptions.withMulti(true)
+ g.dashboard.variable.query.queryTypes.withLabelValues(label='subscription', metric='ibmmq_subscription_messsages_received{qmgr=~"$qmgr",subscription!~"SYSTEM.*|"}')
+ g.dashboard.variable.query.withDatasourceFromVariable(variable=vars.datasources.prometheus)
+ g.dashboard.variable.custom.selectionOptions.withIncludeAll(true, '.+')
+ g.dashboard.variable.query.refresh.onTime(),
],
uid + '-topics-overview',
tags,
links { ibmMqTopicsOverview+:: {} },
annotations,
timezone,
refresh,
period
),
}
+ if this.config.enableLokiLogs then {
'ibm-mq-logs.json':
logslib.new(
prefix + ' logs',
datasourceName=this.grafana.variables.datasources.loki.name,
datasourceRegex=this.grafana.variables.datasources.loki.regex,
filterSelector=this.config.filteringSelector,
labels=this.config.groupLabels + this.config.extraLogLabels + ['qmgr'],
formatParser=null,
showLogsVolume=this.config.showLogsVolume,
)
{
dashboards+:
{
logs+:
root.applyCommon(super.logs.templating.list, uid=uid + '-logs', tags=tags, links=links, annotations=annotations, timezone=timezone, refresh=refresh, period=period),
},
panels+:
{
logs+:
g.panel.logs.options.withEnableLogDetails(true)
+ g.panel.logs.options.withShowTime(false)
+ g.panel.logs.options.withWrapLogMessage(false),
},
variables+: {
toArray+: [
this.grafana.variables.datasources.prometheus { hide: 2 },
],
},
}.dashboards.logs,
}
else {},

applyCommon(vars, uid, tags, links, annotations, timezone, refresh, period):
g.dashboard.withTags(tags)
+ g.dashboard.withUid(uid)
+ g.dashboard.withLinks(std.objectValues(links))
+ g.dashboard.withTimezone(timezone)
+ g.dashboard.withRefresh(refresh)
+ g.dashboard.time.withFrom(period)
+ g.dashboard.withVariables(vars)
+ g.dashboard.withAnnotations(std.objectValues(annotations)),
}
Loading
Loading