Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions .github/workflows/mixin.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
name: mixin
permissions:
contents: read
on:
push:
branches:
- master
paths:
- "celery-mixin/**"
- ".github/workflows/mixin.yml"
pull_request:
paths:
- "celery-mixin/**"
- ".github/workflows/mixin.yml"

defaults:
run:
working-directory: ./celery-mixin
jobs:
matrix:
runs-on: ubuntu-latest
name: ${{ matrix.name }}
strategy:
fail-fast: false
matrix:
include:
- name: Lint Alerts
run: make --always-make alerts-lint
- name: Generate yaml
run: make --always-make generate && git diff --exit-code
- name: Lint Grafana Dashboards
run: make --always-make dashboards-lint
- name: Format Jsonnet
run: make --always-make jsonnet-fmt && git diff --exit-code
- name: Lint Jsonnet
run: make --always-make jsonnet-lint
- name: Format Markdown
run: make --always-make markdownfmt && git diff --exit-code
- name: Lint Markdown
run: make --always-make vale && git diff --exit-code
- name: Lint YAML
run: make --always-make pint-lint
- name: Run unit tests
run: make --always-make test

steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
persist-credentials: false
- uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5.5.0
with:
go-version-file: ./celery-mixin/scripts/go.mod
cache-dependency-path: ./celery-mixin/scripts/go.sum
- run: ${{ matrix.run }}
name: Run ${{ matrix.name }}
4 changes: 3 additions & 1 deletion celery-mixin/.gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
vendor
jsonnetfile.lock.json
tmp
dashboards_out/.lint
.vale
17 changes: 17 additions & 0 deletions celery-mixin/.lint
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
exclusions:
template-job-rule:
reason: Jobs are set to multi in our case.
target-job-rule:
reason: Jobs are set to multi in our case.
template-instance-rule:
reason: We don't use instances.
panel-datasource-rule:
reason: Using a datasource for each panel.
panel-title-description-rule:
reason: TODO(adinhodovic)
target-instance-rule:
reason: We don't use instances.
target-rate-interval-rule:
reason: Intented 1h range.
entries:
20 changes: 20 additions & 0 deletions celery-mixin/.pint.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
rule {
match {
name = "CeleryTaskHighFailRate"
}
disable = ["promql/regexp"]
}

rule {
match {
name = "CeleryHighQueueLength"
}
disable = ["promql/regexp"]
}

rule {
match {
name = "CeleryWorkerDown"
}
disable = ["promql/regexp"]
}
8 changes: 8 additions & 0 deletions celery-mixin/.vale.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
StylesPath = .vale/styles

MinAlertLevel = error

Packages = Readability, write-good, alex

[*]
BasedOnStyles = Readability, write-good, alex
110 changes: 91 additions & 19 deletions celery-mixin/Makefile
Original file line number Diff line number Diff line change
@@ -1,28 +1,100 @@
JSONNET_FMT := jsonnetfmt -n 2 --max-blank-lines 2 --string-style s --comment-style s
BIN_DIR ?= $(shell pwd)/tmp/bin

all: fmt prometheus-alerts.yaml dashboards_out lint
JSONNET_VENDOR=vendor
GRAFANA_DASHBOARD_LINTER_BIN=$(BIN_DIR)/dashboard-linter
JB_BIN=$(BIN_DIR)/jb
JSONNET_BIN=$(BIN_DIR)/jsonnet
JSONNETLINT_BIN=$(BIN_DIR)/jsonnet-lint
JSONNETFMT_BIN=$(BIN_DIR)/jsonnetfmt
MD_FILES = $(shell find . \( -type d -name '.vale' -o -type d -name 'vendor' \) -prune -o -type f -name "*.md" -print)
MARKDOWNFMT_BIN=$(BIN_DIR)/markdownfmt
VALE_BIN=$(BIN_DIR)/vale
PROMTOOL_BIN=$(BIN_DIR)/promtool
PINT_BIN=$(BIN_DIR)/pint
TOOLING=$(JB_BIN) $(JSONNETLINT_BIN) $(JSONNET_BIN) $(JSONNETFMT_BIN) $(PROMTOOL_BIN) $(GRAFANA_DASHBOARD_LINTER_BIN) $(MARKDOWNFMT_BIN) $(VALE_BIN) $(PINT_BIN)
JSONNETFMT_ARGS=-n 2 --max-blank-lines 2 --string-style s --comment-style s
SRC_DIR ?=dashboards
OUT_DIR ?=dashboards_out

fmt:
find . -name 'vendor' -prune -o -name '*.libsonnet' -print -o -name '*.jsonnet' -print | \
xargs -n 1 -- $(JSONNET_FMT) -i
.PHONY: all
all: fmt generate lint test

prometheus-alerts.yaml: mixin.libsonnet config.libsonnet $(wildcard alerts/*)
jsonnet -S alerts.jsonnet > $@
.PHONY: generate
generate: prometheus_alerts.yaml prometheus_rules.yaml $(OUT_DIR)

dashboards_out: mixin.libsonnet config.libsonnet $(wildcard dashboards/*)
@mkdir -p dashboards_out
jsonnet -J vendor -m dashboards_out dashboards.jsonnet
$(JSONNET_VENDOR): $(JB_BIN) jsonnetfile.json
$(JB_BIN) install

lint: prometheus-alerts.yaml
find . -name 'vendor' -prune -o -name '*.libsonnet' -print -o -name '*.jsonnet' -print | \
while read f; do \
$(JSONNET_FMT) "$$f" | diff -u "$$f" -; \
done
.PHONY: fmt
fmt: jsonnet-fmt markdownfmt

promtool check rules prometheus-alerts.yaml
.PHONY: jsonnet-fmt
jsonnet-fmt: $(JSONNETFMT_BIN)
@find . -name 'vendor' -prune -o -name '*.libsonnet' -print -o -name '*.jsonnet' -print | \
xargs -n 1 -- $(JSONNETFMT_BIN) $(JSONNETFMT_ARGS) -i

test: prometheus-alerts.yaml
promtool test rules tests.yaml
.PHONY: markdownfmt
markdownfmt: $(MARKDOWNFMT_BIN)
@for file in $(MD_FILES); do $(MARKDOWNFMT_BIN) -w -gofmt $$file; done

prometheus_alerts.yaml: $(JSONNET_BIN) mixin.libsonnet lib/alerts.jsonnet alerts/*.libsonnet
@$(JSONNET_BIN) -J vendor -S lib/alerts.jsonnet > $@

prometheus_rules.yaml: $(JSONNET_BIN) mixin.libsonnet lib/rules.jsonnet rules/*.libsonnet
@$(JSONNET_BIN) -J vendor -S lib/rules.jsonnet > $@

$(OUT_DIR): $(JSONNET_BIN) $(JSONNET_VENDOR) mixin.libsonnet lib/dashboards.jsonnet $(SRC_DIR)/*.libsonnet
@mkdir -p $(OUT_DIR)
@$(JSONNET_BIN) -J vendor -m $(OUT_DIR) lib/dashboards.jsonnet

.PHONY: lint
lint: jsonnet-lint alerts-lint dashboards-lint vale pint-lint

.PHONY: jsonnet-lint
jsonnet-lint: $(JSONNETLINT_BIN) $(JSONNET_VENDOR)
@find . -name 'vendor' -prune -o -name '*.libsonnet' -print -o -name '*.jsonnet' -print | \
xargs -n 1 -- $(JSONNETLINT_BIN) -J vendor

.PHONY: alerts-lint
alerts-lint: $(PROMTOOL_BIN) prometheus_alerts.yaml prometheus_rules.yaml
@$(PROMTOOL_BIN) check rules prometheus_rules.yaml
@$(PROMTOOL_BIN) check rules prometheus_alerts.yaml

$(OUT_DIR)/.lint: $(OUT_DIR)
@cp .lint $@

.PHONY: dashboards-lint
dashboards-lint: $(GRAFANA_DASHBOARD_LINTER_BIN) $(OUT_DIR)/.lint
# Replace $$interval:$$resolution var with $$__rate_interval to make dashboard-linter happy.
@sed -i -e 's/$$interval:$$resolution/$$__rate_interval/g' $(OUT_DIR)/*.json
@find $(OUT_DIR) -name '*.json' ! -name 'celery-tasks.json' -print0 | xargs -n 1 -0 $(GRAFANA_DASHBOARD_LINTER_BIN) lint --strict

.PHONY: vale
vale: $(VALE_BIN)
@$(VALE_BIN) sync && \
$(VALE_BIN) $(MD_FILES)

.PHONY: pint-lint
pint-lint: generate $(PINT_BIN)
@# Pint will not exit with a non-zero status code if there are linting issues.
@output=$$($(PINT_BIN) -n -o -l WARN lint prometheus_alerts.yaml prometheus_rules.yaml 2>&1); \
if [ -n "$$output" ]; then \
echo "\n$$output"; \
exit 1; \
fi

.PHONY: clean
clean:
rm -rf dashboards_out prometheus-alerts.yaml
# Remove all files and directories ignored by git.
git clean -Xfd .

.PHONY: test
test: $(PROMTOOL_BIN) prometheus_alerts.yaml prometheus_rules.yaml
@$(PROMTOOL_BIN) test rules tests/*.yaml

$(BIN_DIR):
mkdir -p $(BIN_DIR)

$(TOOLING): $(BIN_DIR)
@echo Installing tools from hack/tools.go
@cd scripts && go list -e -mod=mod -tags tools -f '{{ range .Imports }}{{ printf "%s\n" .}}{{end}}' ./ | xargs -tI % go build -mod=mod -o $(BIN_DIR) %
11 changes: 3 additions & 8 deletions celery-mixin/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@ A set of Grafana dashboards and Prometheus alerts for Celery.

## How to use

This mixin is designed to be vendored into the repo with your infrastructure config.
To do this, use [jsonnet-bundler](https://github.com/jsonnet-bundler/jsonnet-bundler):
This mixin is designed to be vendored into the repo with your infrastructure config. To do this, use [jsonnet-bundler](https://github.com/jsonnet-bundler/jsonnet-bundler):

You then have three options for deploying your dashboards

Expand All @@ -15,8 +14,7 @@ You then have three options for deploying your dashboards

## Generate config files

You can manually generate the alerts, dashboards and rules files, but first you
must install some tools:
You can manually generate the alerts, dashboards and rules files, but first you must install some tools:

```sh
go get github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb
Expand All @@ -38,10 +36,7 @@ make prometheus-alerts.yaml
make dashboards_out
```

The `prometheus-alerts.yaml` file then need to passed
to your Prometheus server, and the files in `dashboards_out` need to be imported
into you Grafana server. The exact details will depending on how you deploy your
monitoring stack.
The `prometheus-alerts.yaml` file then need to passed to your Prometheus server, and the files in `dashboards_out` need to be imported into you Grafana server. The exact details will depending on how you deploy your monitoring stack.

## Alerts

Expand Down
6 changes: 3 additions & 3 deletions celery-mixin/alerts/alerts.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
annotations: {
summary: 'Celery high task fail rate.',
description: 'More than %(celeryTaskFailedThreshold)s%% tasks failed for the task {{ $labels.job }}/{{ $labels.queue_name }}/{{ $labels.name }} the past %(celeryTaskFailedInterval)s.' % $._config,
dashboard_url: $._config.celeryTasksByTaskUrl + '?var-job={{ $labels.job }}&var-queue_name={{ $labels.queue_name }}&var-task={{ $labels.name }}',
dashboard_url: $._config.dashboardUrls['celery-tasks-by-task'] + '?var-job={{ $labels.job }}&var-queue_name={{ $labels.queue_name }}&var-task={{ $labels.name }}',
},
'for': '1m',
labels: {
Expand All @@ -68,7 +68,7 @@
annotations: {
summary: 'Celery high queue length.',
description: 'More than %(celeryHighQueueLengthThreshold)s tasks in the queue {{ $labels.job }}/{{ $labels.queue_name }} the past %(celeryHighQueueLengthInterval)s.' % $._config,
dashboard_url: $._config.celeryTasksOverviewUrl + '?&var-job={{ $labels.job }}&var-queue_name={{ $labels.queue_name }}',
dashboard_url: $._config.dashboardUrls['celery-tasks-overview'] + '?&var-job={{ $labels.job }}&var-queue_name={{ $labels.queue_name }}',
},
},
if $._config.celeryWorkerDownAlertEnabled then {
Expand All @@ -83,7 +83,7 @@
annotations: {
summary: 'A Celery worker is offline.',
description: 'The Celery worker {{ $labels.job }}/{{ $labels.hostname }} is offline.',
dashboard_url: $._config.celeryTasksOverviewUrl + '?&var-job={{ $labels.job }}',
dashboard_url: $._config.dashboardUrls['celery-tasks-overview'] + '?&var-job={{ $labels.job }}',
},
},
]),
Expand Down
46 changes: 23 additions & 23 deletions celery-mixin/config.libsonnet
Original file line number Diff line number Diff line change
@@ -1,24 +1,22 @@
local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet';
local annotation = g.dashboard.annotation;

{
_config+:: {
local this = self,

// Selectors are inserted between {} in Prometheus queries.
celerySelector: 'job=~".*celery.*"',

// Default datasource name
datasourceName: 'default',

// Opt-in to multiCluster dashboards by overriding this and the clusterLabel.
showMultiCluster: false,
clusterLabel: 'cluster',

grafanaUrl: 'https://grafana.com',

celeryIgnoredTasks: 'None',
celeryIgnoredQueues: 'None',

celeryTasksOverviewUid: 'celery-tasks-overview-32s3',
celeryTasksByTaskUid: 'celery-tasks-by-task-32s3',

celeryTasksOverviewUrl: '%s/d/%s/celery-tasks-overview' % [self.grafanaUrl, self.celeryTasksOverviewUid],
celeryTasksByTaskUrl: '%s/d/%s/celery-tasks-by-task' % [self.grafanaUrl, self.celeryTasksByTaskUid],

tags: ['celery', 'celery-mixin'],

// If you have autoscaling workers then you maybe do not want to alert on workers that are down.
celeryWorkerDownAlertEnabled: true,
celeryCeleryHighQueueLengthAlertEnabled: true,
Expand All @@ -29,23 +27,25 @@ local annotation = g.dashboard.annotation;
celeryHighQueueLengthThreshold: '100',
celeryWorkerDownInterval: '15m',

dashboardIds: {
'celery-tasks-overview': 'celery-tasks-overview-32s3',
'celery-tasks-by-task': 'celery-tasks-by-task-32s3',
},
dashboardUrls: {
'celery-tasks-overview': '%s/d/%s/celery-tasks-overview' % [this.grafanaUrl, this.dashboardIds['celery-tasks-overview']],
'celery-tasks-by-task': '%s/d/%s/celery-tasks-by-task' % [this.grafanaUrl, this.dashboardIds['celery-tasks-by-task']],
},

tags: ['celery', 'celery-mixin'],

// Custom annotations to display in graphs
annotation: {
enabled: false,
name: 'Custom Annotation',
datasource: '-- Grafana --',
iconColor: 'green',
tags: [],
datasource: '-- Grafana --',
iconColor: 'blue',
type: 'tags',
},

customAnnotation:: if $._config.annotation.enabled then
annotation.withName($._config.annotation.name) +
annotation.withIconColor($._config.annotation.iconColor) +
annotation.withHide(false) +
annotation.datasource.withUid($._config.annotation.datasource) +
annotation.target.withMatchAny(true) +
annotation.target.withTags($._config.annotation.tags) +
annotation.target.withType('tags')
else {},
},
}
Loading