Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 85 additions & 0 deletions f5-bigip-mixin/alerts.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
{
new(this): {
local config = this.config,

groups: [
{
name: 'f5-bigip-alerts',
rules: [
{
alert: 'BigIPLowNodeAvailabilityStatus',
expr: |||
100 * (sum(bigip_node_status_availability_state{%(filteringSelector)s}) / clamp_min(count(bigip_node_status_availability_state{%(filteringSelector)s}), 1)) < %(alertsCriticalNodeAvailability)s
||| % config,
'for': '5m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'Detecting a significant number of unavailable nodes which can causes potential downtime or degraded performance.',
description:
(
'{{ printf "%%.0f" $value }} percent of available nodes, ' +
'which is below the threshold of %(alertsCriticalNodeAvailability)s.'
) % config,
},
},
{
alert: 'BigIPServerSideConnectionLimit',
expr: |||
max without(instance, job) (100 * bigip_node_serverside_cur_conns{%(filteringSelector)s} / clamp_min(bigip_node_serverside_max_conns{%(filteringSelector)s}, 1)) > %(alertsWarningServerSideConnectionLimit)s
||| % config,
'for': '5m',
labels: {
severity: 'warning',
},
annotations: {
summary: 'Approaching the connection limit may lead to rejecting new connections, impacting availability.',
description:
(
'{{ printf "%%.0f" $value }} percent of the max number of connections in use on node {{$labels.node}}, ' +
'which is above the threshold of %(alertsWarningServerSideConnectionLimit)s percent.'
) % config,
},
},
{
alert: 'BigIPHighRequestRate',
expr: |||
max without(instance, job) (100 * rate(bigip_pool_tot_requests{%(filteringSelector)s}[10m]) / clamp_min(rate(bigip_pool_tot_requests{%(filteringSelector)s}[50m] offset 10m), 1)) > %(alertsCriticalHighRequestRate)s
||| % config,
'for': '10m',
labels: {
severity: 'warning',
},
annotations: {
summary: 'An unexpected spike in requests might indicate an issue like a DDoS attack or unexpected high load.',
description:
(
'{{ printf "%%.0f" $value }} percent increase in requests on pool {{$labels.pool}}, ' +
'which is above the threshold of %(alertsCriticalHighRequestRate)s.'
) % config,
},
},
{
alert: 'BigIPHighConnectionQueueDepth',
expr: |||
max without(instance, job) (100 * rate(bigip_pool_connq_depth{%(filteringSelector)s}[5m])) / clamp_min(rate(bigip_pool_connq_depth{%(filteringSelector)s}[50m] offset 10m), 1) > %(alertsCriticalHighConnectionQueueDepth)s
||| % config,
'for': '5m',
labels: {
severity: 'warning',
},
annotations: {
summary: 'A sudden spike or sustained high queue depth may indicate a bottleneck in handling incoming connections.',
description:
(
'{{ printf "%%.0f" $value }} percent increase in connection queue depth on node {{$labels.pool}}, ' +
'which is above the threshold of %(alertsCriticalHighConnectionQueueDepth)s.'
) % config,
},
},
],
},
],
},
}
38 changes: 26 additions & 12 deletions f5-bigip-mixin/config.libsonnet
Original file line number Diff line number Diff line change
@@ -1,17 +1,31 @@
{
_config+:: {
dashboardTags: ['f5-bigip-mixin'],
dashboardPeriod: 'now-30m',
dashboardTimezone: 'default',
dashboardRefresh: '1m',
local this = self,
filteringSelector: 'job="integrations/f5-bigip"',
groupLabels: ['job'],
instanceLabels: ['instance'],
uid: 'f5-bigip',
dashboardNamePrefix: 'F5 BIG-IP',
dashboardTags: [self.uid + '-mixin'],
dashboardPeriod: 'now-30m',
dashboardTimezone: 'default',
dashboardRefresh: '1m',

// alerts thresholds
alertsCriticalNodeAvailability: 95, // %
alertsWarningServerSideConnectionLimit: 80, // %
alertsCriticalHighRequestRate: 150, // %
alertsCriticalHighConnectionQueueDepth: 75, // %
// alerts thresholds
alertsCriticalNodeAvailability: 95, // %
alertsWarningServerSideConnectionLimit: 80, // %
alertsCriticalHighRequestRate: 150, // %
alertsCriticalHighConnectionQueueDepth: 75, // %

enableLokiLogs: false,
filterSelector: 'job=~"syslog"',
enableLokiLogs: true,
extraLogLabels: [],
showLogsVolume: true,

// metrics source for signals
metricsSource: 'prometheus',
signals: {
cluster: (import './signals/cluster.libsonnet')(this),
node: (import './signals/node.libsonnet')(this),
pool: (import './signals/pool.libsonnet')(this),
virtualServer: (import './signals/virtualserver.libsonnet')(this),
},
}
206 changes: 206 additions & 0 deletions f5-bigip-mixin/dashboards.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
local g = import './g.libsonnet';
local commonlib = import 'common-lib/common/main.libsonnet';
local logslib = import 'logs-lib/logs/main.libsonnet';

{
local root = self,
new(this)::
local prefix = this.config.dashboardNamePrefix;
local links = this.grafana.links;
local tags = this.config.dashboardTags;
local uid = g.util.string.slugify(this.config.uid);
local vars = this.grafana.variables;
local annotations = this.grafana.annotations;
local refresh = this.config.dashboardRefresh;
local period = this.config.dashboardPeriod;
local timezone = this.config.dashboardTimezone;

{
'bigip-cluster-overview.json':
g.dashboard.new(prefix + ' cluster overview')
+ g.dashboard.withPanels(
g.util.panel.resolveCollapsedFlagOnRows(
g.util.grid.wrapPanels(
[
this.grafana.rows.clusterOverviewRow,
]
)
)
) + root.applyCommon(
vars.multiInstance + [
g.dashboard.variable.custom.new('k', values=['5', '10', '20', '50'])
+ g.dashboard.variable.query.selectionOptions.withMulti(false)
+ g.dashboard.variable.custom.generalOptions.withCurrent('5')
+ g.dashboard.variable.custom.generalOptions.withLabel('Top node count')
+ g.dashboard.variable.query.refresh.onTime(),


g.dashboard.variable.query.new('bigip_partition')
+ g.dashboard.variable.query.withDatasourceFromVariable(vars.datasources.prometheus)
+ g.dashboard.variable.query.queryTypes.withLabelValues('partition', 'bigip_node_status_availability_state{' + this.config.filteringSelector + '}')
+ g.dashboard.variable.query.selectionOptions.withIncludeAll(true, '.+')
+ g.dashboard.variable.query.selectionOptions.withMulti(true)
+ g.dashboard.variable.query.generalOptions.withLabel('BIG-IP partition')
+ g.dashboard.variable.query.refresh.onTime(),


],
uid + '_cluster_overview',
tags,
links { f5BigipClusterOverview:: {} },
annotations,
timezone,
refresh,
period,
),

'bigip-node-overview.json':
g.dashboard.new(prefix + ' node overview')
+ g.dashboard.withPanels(
g.util.panel.resolveCollapsedFlagOnRows(
g.util.grid.wrapPanels(
[
this.grafana.rows.nodeOverviewRow,
]
)
)
) + root.applyCommon(
vars.multiInstance + [
g.dashboard.variable.query.new('bigip_node')
+ g.dashboard.variable.query.withDatasourceFromVariable(vars.datasources.prometheus)
+ g.dashboard.variable.query.queryTypes.withLabelValues('node', 'bigip_node_status_availability_state{' + this.config.filteringSelector + '}')
+ g.dashboard.variable.query.selectionOptions.withIncludeAll(true, '.+')
+ g.dashboard.variable.query.selectionOptions.withMulti(true)
+ g.dashboard.variable.query.generalOptions.withLabel('BIG-IP node')
+ g.dashboard.variable.query.refresh.onTime(),

g.dashboard.variable.query.new('bigip_partition')
+ g.dashboard.variable.query.withDatasourceFromVariable(vars.datasources.prometheus)
+ g.dashboard.variable.query.queryTypes.withLabelValues('partition', 'bigip_node_status_availability_state{' + this.config.filteringSelector + ', node=~"$bigip_node"}')
+ g.dashboard.variable.query.selectionOptions.withIncludeAll(true, '.+')
+ g.dashboard.variable.query.selectionOptions.withMulti(true)
+ g.dashboard.variable.query.generalOptions.withLabel('BIG-IP partition')
+ g.dashboard.variable.query.refresh.onTime(),
],
uid + '_node_overview',
tags,
links { f5BigipNodeOverview:: {} },
annotations,
timezone,
refresh,
period,
),

'bigip-pool-overview.json':
g.dashboard.new(prefix + ' pool overview')
+ g.dashboard.withPanels(
g.util.panel.resolveCollapsedFlagOnRows(
g.util.grid.wrapPanels(
[
this.grafana.rows.poolOverviewRow,
]
)
)
) + root.applyCommon(
vars.multiInstance + [
g.dashboard.variable.query.new('bigip_pool')
+ g.dashboard.variable.query.withDatasourceFromVariable(vars.datasources.prometheus)
+ g.dashboard.variable.query.queryTypes.withLabelValues('pool', 'bigip_pool_status_availability_state{' + this.config.filteringSelector + '}')
+ g.dashboard.variable.query.selectionOptions.withIncludeAll(true, '.+')
+ g.dashboard.variable.query.selectionOptions.withMulti(true)
+ g.dashboard.variable.query.generalOptions.withLabel('BIG-IP pool')
+ g.dashboard.variable.query.refresh.onTime(),


g.dashboard.variable.query.new('bigip_partition')
+ g.dashboard.variable.query.withDatasourceFromVariable(vars.datasources.prometheus)
+ g.dashboard.variable.query.queryTypes.withLabelValues('partition', 'bigip_pool_status_availability_state{' + this.config.filteringSelector + ', pool=~"$bigip_pool"}')
+ g.dashboard.variable.query.selectionOptions.withIncludeAll(true, '.+')
+ g.dashboard.variable.query.selectionOptions.withMulti(true)
+ g.dashboard.variable.query.generalOptions.withLabel('BIG-IP partition')
+ g.dashboard.variable.query.refresh.onTime(),
],
uid + '_pool_overview',
tags,
links { f5BigipPoolOverview:: {} },
annotations,
timezone,
refresh,
period,
),

'bigip-virtual-server-overview.json':
g.dashboard.new(prefix + ' virtual server overview')
+ g.dashboard.withPanels(
g.util.panel.resolveCollapsedFlagOnRows(
g.util.grid.wrapPanels(
[
this.grafana.rows.virtualServerOverviewRow,
]
)
)
) + root.applyCommon(
vars.multiInstance + [
g.dashboard.variable.query.new('bigip_vs')
+ g.dashboard.variable.query.withDatasourceFromVariable(vars.datasources.prometheus)
+ g.dashboard.variable.query.queryTypes.withLabelValues('vs', 'bigip_vs_status_availability_state{' + this.config.filteringSelector + '}')
+ g.dashboard.variable.query.selectionOptions.withIncludeAll(true, '.+')
+ g.dashboard.variable.query.selectionOptions.withMulti(true)
+ g.dashboard.variable.query.generalOptions.withLabel('BIG-IP virtual server')
+ g.dashboard.variable.query.refresh.onTime(),


g.dashboard.variable.query.new('bigip_partition')
+ g.dashboard.variable.query.withDatasourceFromVariable(vars.datasources.prometheus)
+ g.dashboard.variable.query.queryTypes.withLabelValues('partition', 'bigip_vs_status_availability_state{' + this.config.filteringSelector + ', vs=~"$bigip_vs"}')
+ g.dashboard.variable.query.selectionOptions.withIncludeAll(true, '.+')
+ g.dashboard.variable.query.selectionOptions.withMulti(true)
+ g.dashboard.variable.query.generalOptions.withLabel('BIG-IP partition')
+ g.dashboard.variable.query.refresh.onTime(),
],
uid + '_virtual_server_overview',
tags,
links { f5BigipVirtualServerOverview:: {} },
annotations,
timezone,
refresh,
period,
),
} + if this.config.enableLokiLogs then {
'bigip-logs.json':
logslib.new(
prefix + ' logs',
datasourceName=this.grafana.variables.datasources.loki.name,
datasourceRegex=this.grafana.variables.datasources.loki.regex,
filterSelector=this.config.filteringSelector,
labels=this.config.groupLabels + this.config.extraLogLabels,
formatParser=null,
showLogsVolume=this.config.showLogsVolume,
)
{
dashboards+:
{
logs+:
root.applyCommon(super.logs.templating.list, uid=uid + '-logs', tags=tags, links=links { f5BigipLogs+:: {} }, annotations=annotations, timezone=timezone, refresh=refresh, period=period),
},
panels+:
{
logs+:
{
logsPanel+:
g.panel.logs.options.withShowTime(true),
},
},
}.dashboards.logs,
} else {},

applyCommon(vars, uid, tags, links, annotations, timezone, refresh, period):
g.dashboard.withTags(tags)
+ g.dashboard.withUid(uid)
+ g.dashboard.withLinks(links)
+ g.dashboard.withTimezone(timezone)
+ g.dashboard.withRefresh(refresh)
+ g.dashboard.time.withFrom(period)
+ g.dashboard.withVariables(vars)
+ g.dashboard.withAnnotations(annotations),
}
Loading
Loading