diff --git a/docs/guides/migration-recipe.md b/docs/guides/migration-recipe.md index 7b0aedc5..5f8befad 100644 --- a/docs/guides/migration-recipe.md +++ b/docs/guides/migration-recipe.md @@ -155,7 +155,7 @@ index 68387c9..7a8ace1 100644 sudo service nginx stop ``` ```{note} - Don't forget to pause service checks for both the old and new hosts in things like Dead Man's Snitch, Pingdom, etc. + Don't forget to pause service checks for both the old and new hosts in things like Sentry monitors, Pingdom, etc. ``` 4. Ensure that any additional volumes are mounted and in the correct location: - Check what disks are currently mounted and where: `df` diff --git a/docs/overview.rst b/docs/overview.rst index 8e2ff8b2..c637c286 100644 --- a/docs/overview.rst +++ b/docs/overview.rst @@ -63,6 +63,11 @@ Pingdom `Pingdom `_ provides monitoring and complains to us when services are down. +Sentry + `Sentry `_ is used for error reporting and monitoring of + many services. It also provides Salt highstate cron monitoring, which + notifies us when runs fail over a certain threshold. + PagerDuty `PagerDuty `_ is used for on-call rotation for PSF Infrastructure employees on the front-line, and volunteers as backup. diff --git a/pillar/dev/secrets/sentry.sls b/pillar/dev/secrets/sentry.sls new file mode 100644 index 00000000..660b8adc --- /dev/null +++ b/pillar/dev/secrets/sentry.sls @@ -0,0 +1,3 @@ +project_id: 123456789012345 +project_key: deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef +ingest_url: deadbeef.ingest diff --git a/pillar/dev/top.sls b/pillar/dev/top.sls index de9057ec..fceb9a85 100644 --- a/pillar/dev/top.sls +++ b/pillar/dev/top.sls @@ -8,6 +8,7 @@ base: - tls - users.* - postgres.clusters + # - secrets.sentry # Uncomment and update sentry secrets if you want to work in dev 'backup-server': - match: nodegroup diff --git a/salt/_extensions/pillar/dms.py b/salt/_extensions/pillar/dms.py deleted file mode 100644 index 47861001..00000000 --- a/salt/_extensions/pillar/dms.py +++ /dev/null @@ -1,48 +0,0 @@ -import pathlib - -try: - import requests - from requests.auth import HTTPBasicAuth - - HAS_REQUESTS = True -except ImportError: - HAS_REQUESTS = False - - -def ext_pillar(minion_id, pillar, api_key=None, base_path="/etc/deadmanssnitch/"): - base_path = pathlib.Path(base_path) - # Ensure base path exists - base_path.mkdir(parents=True, exist_ok=True) - - minion_path = base_path / minion_id - - if minion_path.exists(): - token = minion_path.read_text() - if token: - return {"deadmanssnitch": {"token": token}} - - snitches = requests.get( - "https://api.deadmanssnitch.com/v1/snitches", - params={"tags": "salt-master"}, - auth=HTTPBasicAuth(api_key, ""), - ) - - for snitch in snitches.json(): - if snitch["name"] == f"salt-highstate {minion_id}": - token = snitch["token"] - minion_path.write_text(token) - return {"deadmanssnitch": {"token": token}} - - snitch = requests.post( - "https://api.deadmanssnitch.com/v1/snitches", - auth=HTTPBasicAuth(api_key, ""), - json={ - "name": f"salt-highstate {minion_id}", - "interval": "hourly", - "alert_type": "basic", - "tags": ["salt-master"], - }, - ) - token = snitch.json()["token"] - minion_path.write_text(token) - return {"deadmanssnitch": {"token": token}} diff --git a/salt/base/auto-highstate.sls b/salt/base/auto-highstate.sls index e33d7b00..459cae3e 100644 --- a/salt/base/auto-highstate.sls +++ b/salt/base/auto-highstate.sls @@ -1,18 +1,27 @@ -{% set dms_token = salt["pillar.get"]("deadmanssnitch:token") %} +{% set sentry_enabled = salt["pillar.get"]("project_id") and salt["pillar.get"]("project_key") and salt["pillar.get"]("ingest_url") %} + +{% if sentry_enabled %} +curl: + pkg.installed + +/usr/local/bin/sentry-checkin.sh: + file.managed: + - source: salt://base/scripts/sentry-checkin.sh.jinja + - template: jinja + - mode: '0755' + - user: root + - group: root +{% endif %} -{% if dms_token %} -15m-interval-highstate: - cron.present: - - identifier: 15m-interval-highstate - - name: "timeout 5m salt-call state.highstate >> /var/log/salt/cron-highstate.log 2>&1; curl https://nosnch.in/{{ dms_token }} &> /dev/null" - - minute: '*/15' -{% else %} 15m-interval-highstate: cron.present: - identifier: 15m-interval-highstate - - name: "timeout 5m salt-call state.highstate >> /var/log/salt/cron-highstate.log 2>&1" + - name: "{% if sentry_enabled %}/usr/local/bin/sentry-checkin.sh {% endif %}timeout 5m salt-call state.highstate >> /var/log/salt/cron-highstate.log 2>&1" - minute: '*/15' -{% endif %} + {% if sentry_enabled %} + - require: + - file: /usr/local/bin/sentry-checkin.sh + {% endif %} /etc/logrotate.d/salt: {% if grains["oscodename"] == "xenial" %} diff --git a/salt/base/scripts/sentry-checkin.sh.jinja b/salt/base/scripts/sentry-checkin.sh.jinja new file mode 100644 index 00000000..220b011d --- /dev/null +++ b/salt/base/scripts/sentry-checkin.sh.jinja @@ -0,0 +1,27 @@ +#!/bin/bash + +MINION_ID="{{ grains['id'] }}" +SENTRY_INGEST_URL="{{ pillar.get('ingest_url', '') }}" +SENTRY_PROJECT_ID="{{ pillar.get('project_id', '') }}" +SENTRY_PROJECT_KEY="{{ pillar.get('project_key', '') }}" + +MONITOR_SLUG="salt-highstate-${MINION_ID//./}" + +if [ -n "$SENTRY_INGEST_URL" ] && [ -n "$SENTRY_PROJECT_ID" ] && [ -n "$SENTRY_PROJECT_KEY" ]; then + curl -X POST "https://${SENTRY_INGEST_URL}/api/${SENTRY_PROJECT_ID}/cron/${MONITOR_SLUG}/${SENTRY_PROJECT_KEY}/" \ + --header 'Content-Type: application/json' \ + --data-raw '{"monitor_config": {"schedule": {"type": "crontab", "value": "*/15 * * * *"}, "checkin_margin": 5, "max_runtime": 30, "timezone": "UTC"}, "status": "in_progress"}' &> /dev/null + + "$@" + COMMAND_EXIT=$? + + if [ $COMMAND_EXIT -eq 0 ]; then + curl "https://${SENTRY_INGEST_URL}/api/${SENTRY_PROJECT_ID}/cron/${MONITOR_SLUG}/${SENTRY_PROJECT_KEY}/?status=ok" &> /dev/null + else + curl "https://${SENTRY_INGEST_URL}/api/${SENTRY_PROJECT_ID}/cron/${MONITOR_SLUG}/${SENTRY_PROJECT_KEY}/?status=error" &> /dev/null + fi + + exit $COMMAND_EXIT +else + exit 1 +fi \ No newline at end of file