Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,92 +1,77 @@
{
prometheusAlerts+:: {
groups+: [
new(this): {
groups: [
{
name: 'cloudflare-alerts',
rules: [
{
alert: 'CloudflareHighThreatCount',
expr: |||
sum without (instance) (increase(cloudflare_zone_threats_total[5m])) > %(alertsHighThreatCount)s
||| % $._config,
||| % this.config,
'for': '5m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'There are detected threats targeting the zone.',
description:
(
'The number of detected threats targeting the zone {{$labels.zone}} is {{ printf "%%.0f" $value }} which is greater than the threshold of %(alertsHighThreatCount)s.'
) % $._config,
description: 'The number of detected threats targeting the zone {{$labels.zone}} is {{ printf "%%.0f" $value }} which is greater than the threshold of %(alertsHighThreatCount)s.' % this.config,
},
},
{
alert: 'CloudflareHighRequestRate',
expr: |||
sum without (instance) (100 * (rate(cloudflare_zone_requests_total[10m]) / clamp_min(rate(cloudflare_zone_requests_total[50m] offset 10m), 1))) > %(alertsHighRequestRate)s
||| % $._config,
||| % this.config,
'for': '5m',
labels: {
severity: 'warning',
},
annotations: {
summary: 'A high spike in requests is occurring which may indicate an attack or unexpected load.',
description:
(
'The rate of requests to {{$labels.zone}} is {{ printf "%%.0f" $value }}%% of the prior 50 minute baseline which is above the threshold of %(alertsHighRequestRate)s%%.'
) % $._config,
description: 'The rate of requests to {{$labels.zone}} is {{ printf "%%.0f" $value }}%% of the prior 50 minute baseline which is above the threshold of %(alertsHighRequestRate)s%%.' % this.config,
},
},
{
alert: 'CloudflareHighHTTPErrorCodes',
expr: |||
sum without (instance) (increase(cloudflare_zone_requests_status{status=~"4.*|5.*"}[5m])) > %(alertsHighHTTPErrorCodeCount)s
||| % $._config,
||| % this.config,
'for': '5m',
labels: {
severity: 'warning',
},
annotations: {
summary: 'A high number of 4xx or 5xx HTTP status codes are occurring.',
description:
(
'The number of {{$labels.status}} HTTP status codes occurring in the zone {{$labels.zone}} is {{ printf "%%.0f" $value }} which is greater than the threshold of %(alertsHighHTTPErrorCodeCount)s.'
) % $._config,
description: 'The number of {{$labels.status}} HTTP status codes occurring in the zone {{$labels.zone}} is {{ printf "%%.0f" $value }} which is greater than the threshold of %(alertsHighHTTPErrorCodeCount)s.' % this.config,
},
},
{
alert: 'CloudflareUnhealthyPools',
expr: |||
sum without (instance, load_balancer_name) (cloudflare_zone_pool_health_status) == 0
||| % $._config,
||| % this.config,
'for': '5m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'There are unhealthy pools.',
description:
(
'The pool {{$labels.pool_name}} in zone {{$labels.zone}} is currently down and unhealthy.'
) % $._config,
description: 'The pool {{$labels.pool_name}} in zone {{$labels.zone}} is currently down and unhealthy.',
},
},
{
alert: 'CloudflareMetricsDown',
expr: |||
up{job="%(alertsMetricsDownJobName)s"} == 0
||| % $._config,
||| % this.config,
'for': '5m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'Cloudflare metrics are down.',
description:
(
'Grafana is no longer receiving metrics for the Cloudflare integration from instance {{$labels.instance}}.'
) % $._config,
description: 'Grafana is no longer receiving metrics for the Cloudflare integration from instance {{$labels.instance}}.',
},
},
],
Expand Down
39 changes: 28 additions & 11 deletions cloudflare-mixin/config.libsonnet
Original file line number Diff line number Diff line change
@@ -1,16 +1,33 @@
{
_config+:: {
dashboardTags: ['cloudflare-mixin'],
dashboardPeriod: 'now-30m',
dashboardTimezone: 'default',
dashboardRefresh: '1m',
local this = self,
enableMultiCluster: false,
filteringSelector: 'job="integrations/cloudflare"',
groupLabels: ['job', 'cluster', 'zone', 'script_name'],
instanceLabels: ['instance'],

// CloudflareMetricsDown alert filter variable
alertsMetricsDownJobName: 'integrations/cloudflare',
dashboardTags: [self.uid],
legendLabels: ['instance'],
uid: 'cloudflare',
dashboardNamePrefix: 'Cloudflare',

// alerts thresholds
alertsHighThreatCount: 3, // count
alertsHighRequestRate: 150, // percentage
alertsHighHTTPErrorCodeCount: 100, // count
// additional params
dashboardPeriod: 'now-30m',
dashboardTimezone: 'default',
dashboardRefresh: '1m',
metricsSource: 'prometheus',

// CloudflareMetricsDown alert filter variable
alertsMetricsDownJobName: 'integrations/cloudflare',

// alerts thresholds
alertsHighThreatCount: 3, // count
alertsHighRequestRate: 150, // percentage
alertsHighHTTPErrorCodeCount: 100, // count

signals+: {
geomap: (import './signals/geomap.libsonnet')(this),
zone: (import './signals/zone.libsonnet')(this),
worker: (import './signals/worker.libsonnet')(this),
pool: (import './signals/pool.libsonnet')(this),
},
}
91 changes: 91 additions & 0 deletions cloudflare-mixin/dashboards.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
local g = import './g.libsonnet';

{
local root = self,
new(this)::
local prefix = this.config.dashboardNamePrefix;
local tags = this.config.dashboardTags;
local uid = this.config.uid;
local vars = this.grafana.variables;
local refresh = this.config.dashboardRefresh;
local period = this.config.dashboardPeriod;
local timezone = this.config.dashboardTimezone;
local links = this.grafana.links;
local annotations = this.grafana.annotations;

{
'cloudflare-zone-overview.json':
g.dashboard.new(prefix + ' zone overview')
+ g.dashboard.withPanels(
g.util.panel.resolveCollapsedFlagOnRows(
g.util.grid.wrapPanels(
[
this.grafana.rows.zoneOverview + g.panel.row.withCollapsed(false),
]
)
)
)
+ root.applyCommon(
vars.multiInstance + [vars.geoMetric],
uid + '_cloudflare_zone_overview',
tags,
links { cloudflareZoneOverview+:: {} },
annotations,
timezone,
refresh,
period
),

'cloudflare-worker-overview.json':
g.dashboard.new(prefix + ' worker overview')
+ g.dashboard.withPanels(
g.util.panel.resolveCollapsedFlagOnRows(
g.util.grid.wrapPanels(
[
this.grafana.rows.workers + g.panel.row.withCollapsed(false),
]
)
)
)
+ root.applyCommon(
vars.multiInstance,
uid + '_cloudflare_worker_overview',
tags,
links { cloudflareWorkerOverview+:: {} },
annotations,
timezone,
refresh,
period
),

'cloudflare-geomap-overview.json':
g.dashboard.new(prefix + ' Geomap overview')
+ g.dashboard.withPanels(
g.util.grid.wrapPanels(
[
this.grafana.panels.geoMetricsByCountryTablePanel + g.panel.table.gridPos.withW(24) + g.panel.table.gridPos.withH(7),
this.grafana.panels.geoMetricByCountryGeomapPanel + g.panel.geomap.gridPos.withW(24) + g.panel.geomap.gridPos.withH(12),
]
)
)
+ root.applyCommon(
vars.multiInstance + [vars.geoMetric],
uid + '_cloudflare_geomap_overview',
tags,
links { cloudflareGeomapOverview+:: {} },
annotations,
timezone,
refresh,
period
),
},
applyCommon(vars, uid, tags, links, annotations, timezone, refresh, period):
g.dashboard.withTags(tags)
+ g.dashboard.withUid(uid)
+ g.dashboard.withLinks(std.objectValues(links))
+ g.dashboard.withTimezone(timezone)
+ g.dashboard.withRefresh(refresh)
+ g.dashboard.time.withFrom(period)
+ g.dashboard.withVariables(vars)
+ g.dashboard.withAnnotations(std.objectValues(annotations)),
}
Loading
Loading