From f0d8cf044e3a94e63fbe75fb3d242d2e0e259dd6 Mon Sep 17 00:00:00 2001 From: Dustin Kaiser Date: Thu, 19 Sep 2024 16:49:57 +0200 Subject: [PATCH 01/10] wip --- charts/Makefile | 33 +++++++++++++++++++++------------ charts/README.md | 8 ++++++++ 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/charts/Makefile b/charts/Makefile index 8bf61fee..f33c5391 100644 --- a/charts/Makefile +++ b/charts/Makefile @@ -7,33 +7,32 @@ CONFIG_DIR := $(shell dirname $(REPO_CONFIG_LOCATION)) CHART_DIRS := $(wildcard $(REPO_BASE_DIR)/charts/*/) .PHONY: .check-helmfile-installed -.check-helmfile-installed: +.check-helmfile-installed: ## Checks if helmfile is installed @if ! command -v helmfile >/dev/null 2>&1; then \ echo "'helmfile' is not installed. Install it to continue ...";\ fi -helmfile.yaml: simcore-charts/helmfile.yaml +helmfile.yaml: simcore-charts/helmfile.yaml ## Copies the helmfile.yaml to the charts directory cp $(CONFIG_DIR)/$@ $(REPO_BASE_DIR)/charts/helmfile.yaml -simcore-charts/helmfile.yaml: +simcore-charts/helmfile.yaml: ## Copies the simcore helmfile to the charts directory cp $(CONFIG_DIR)/helmfile.simcore.yaml $(REPO_BASE_DIR)/charts/$@ .PHONY: helmfile-lint -helmfile-lint: .check-helmfile-installed helmfile.yaml +helmfile-lint: .check-helmfile-installed helmfile.yaml ## Lints the helmfile set -a; source $(REPO_CONFIG_LOCATION); set +a; \ helmfile lint .PHONY: .helmfile-local-post-install -.helmfile-local-post-install: +.helmfile-local-post-install: ## Post install steps for local helmfile deployment @$(MAKE) -s configure-local-hosts @echo ""; @echo "Cluster has been deployed locally: http://$(MACHINE_FQDN)"; @echo " For secure connections self-signed certificates are used."; - @echo " Install their root-ca certificate in your system for smooth experience."; - @echo " For insecure connections make sure to disable automatic https redirects in your browser."; + @echo " .PHONY: helmfile-apply -helmfile-apply: .check-helmfile-installed helmfile.yaml +helmfile-apply: .check-helmfile-installed helmfile.yaml ## Applies the helmfile configuration set -a; source $(REPO_CONFIG_LOCATION); set +a; \ helmfile -f $(REPO_BASE_DIR)/charts/helmfile.yaml apply @@ -41,17 +40,27 @@ helmfile-apply: .check-helmfile-installed helmfile.yaml $(MAKE) -s .helmfile-local-post-install; \ fi +.PHONY: helmfile-sync +helmfile-sync: .check-helmfile-installed helmfile.yaml ## Syncs the helmfile configuration + set -a; source $(REPO_CONFIG_LOCATION); set +a; \ + helmfile -f $(REPO_BASE_DIR)/charts/helmfile.yaml sync + + @if [ "$(MACHINE_FQDN)" = "osparc.local" ]; then \ + $(MAKE) -s .helmfile-local-post-install; \ + fi + + .PHONY: configure-local-hosts -configure-local-hosts: - @echo "Addings $(MACHINE_FQDN) hosts to /etc/hosts ..." +configure-local-hosts: ## Adds local hosts entries for the machine + @echo "Adding $(MACHINE_FQDN) hosts to /etc/hosts ..." @grep -q '127.0.0.1 k8s.monitoring.$(MACHINE_FQDN)' /etc/hosts || echo '127.0.0.1 k8s.monitoring.$(MACHINE_FQDN)' | sudo tee -a /etc/hosts .PHONY: helmfile-diff -helmfile-diff: .check-helmfile-installed helmfile.yaml +helmfile-diff: .check-helmfile-installed helmfile.yaml ## Shows the differences that would be applied by helmfile @set -a; source $(REPO_CONFIG_LOCATION); set +a; \ helmfile -f $(REPO_BASE_DIR)/charts/helmfile.yaml diff .PHONY: helmfile-delete -helmfile-delete: .check-helmfile-installed helmfile.yaml +helmfile-delete: .check-helmfile-installed helmfile.yaml ## Deletes the helmfile configuration @set -a; source $(REPO_CONFIG_LOCATION); set +a; \ helmfile -f $(REPO_BASE_DIR)/charts/helmfile.yaml delete diff --git a/charts/README.md b/charts/README.md index 13767e80..45d83630 100644 --- a/charts/README.md +++ b/charts/README.md @@ -23,6 +23,14 @@ source: https://kind.sigs.k8s.io/docs/user/quick-start Follow the instructions here: https://helm.sh/docs/intro/install/ +Install the helm-diff plugin: `helm plugin install https://github.com/databus23/helm-diff` + +`via https://doc.traefik.io/traefik/user-guides/crd-acme/#ingressroute-definition` +Install traefik-v3 CRDs: `kubectl apply -f https://raw.githubusercontent.com/traefik/traefik/v3.1/docs/content/reference/dynamic-configuration/kubernetes-crd-definition-v1.yml` + +`via https://doc.traefik.io/traefik/user-guides/crd-acme/#ingressroute-definition` +Install traefik-v3 RBAC: `kubectl apply -f https://raw.githubusercontent.com/traefik/traefik/v3.1/docs/content/reference/dynamic-configuration/kubernetes-crd-rbac.yml` + #### helmfile If you have a different OS / architecture, pick a different link from [release artifacts](https://github.com/helmfile/helmfile/releases) From 293f63c8c7971afeb0de64af16d01153bf76eed4 Mon Sep 17 00:00:00 2001 From: Dustin Kaiser Date: Thu, 24 Oct 2024 10:13:28 +0200 Subject: [PATCH 02/10] Add csi-s3 and have portainer use it --- .gitignore | 3 +++ charts/csi-s3/values.yaml.gotmpl | 7 +++++++ charts/portainer/values.yaml.gotmpl | 6 ++++++ 3 files changed, 16 insertions(+) create mode 100644 charts/csi-s3/values.yaml.gotmpl diff --git a/.gitignore b/.gitignore index 0c825bcd..24edb7f6 100644 --- a/.gitignore +++ b/.gitignore @@ -149,3 +149,6 @@ docker-compose.simcore.yml repo.config .temp .temp/** + +# By convention: `.secret` files are gitignored +**/*.secret diff --git a/charts/csi-s3/values.yaml.gotmpl b/charts/csi-s3/values.yaml.gotmpl new file mode 100644 index 00000000..7e6ff4c9 --- /dev/null +++ b/charts/csi-s3/values.yaml.gotmpl @@ -0,0 +1,7 @@ +secret: + accessKey: {{ requiredEnv "S3_ACCESS_KEY" }} + secretKey: {{ requiredEnv "S3_SECRET_KEY" }} + region: {{ requiredEnv "S3_REGION" }} + endpoint: {{ requiredEnv "S3_ENDPOINT" }} +storageClass: + singleBucket: {{ requiredEnv "S3_K8S_CSI_BUCKET_NAME" }} diff --git a/charts/portainer/values.yaml.gotmpl b/charts/portainer/values.yaml.gotmpl index e89f2457..edc56479 100644 --- a/charts/portainer/values.yaml.gotmpl +++ b/charts/portainer/values.yaml.gotmpl @@ -18,6 +18,12 @@ serviceAccount: # The name of the service account to use. # If not set and create is true, a name is generated using the fullname template name: portainer-sa-clusteradmin +persistence: + enabled: true + size: "10Gi" + annotations: {} + storageClass: "csi-s3" + existingClaim: podAnnotations: {} podLabels: {} From f7f72ec27e13232dababef209c92e2a3a1b983d4 Mon Sep 17 00:00:00 2001 From: Dustin Kaiser Date: Fri, 25 Oct 2024 08:37:18 +0200 Subject: [PATCH 03/10] Change request @hrytsuk 1GB max portainer volume size --- charts/portainer/values.yaml.gotmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/portainer/values.yaml.gotmpl b/charts/portainer/values.yaml.gotmpl index edc56479..1f5f5c44 100644 --- a/charts/portainer/values.yaml.gotmpl +++ b/charts/portainer/values.yaml.gotmpl @@ -20,7 +20,7 @@ serviceAccount: name: portainer-sa-clusteradmin persistence: enabled: true - size: "10Gi" + size: "1Gi" annotations: {} storageClass: "csi-s3" existingClaim: From c9c70d642e632e920a25cc76395cc56f1be8f134 Mon Sep 17 00:00:00 2001 From: Dustin Kaiser Date: Tue, 3 Dec 2024 16:08:00 +0100 Subject: [PATCH 04/10] Arch Linux Certificates Customization --- certificates/Makefile | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/certificates/Makefile b/certificates/Makefile index a9326900..9af700df 100644 --- a/certificates/Makefile +++ b/certificates/Makefile @@ -66,16 +66,10 @@ install-root-certificate: rootca.crt ## installs a certificate in the host syste echo "Is the DOCKER service ready? press when ready" && read -n 1; \ fi;\ echo "======================================";,\ - $(if $(IS_OSX), \ - sudo security add-trusted-cert -d -k /Library/Keychains/System.keychain $<; \ - echo "Please restart the DOCKER service now..." && read -n 1; \ - echo "Is the DOCKER service ready? press when ready" && read -n 1; \ - , \ - sudo cp $< /usr/local/share/ca-certificates/osparc.crt; \ - sudo update-ca-certificates -f; \ - echo "# restarting docker daemon"; \ + sudo cp $< /etc/ca-certificates/trust-source/anchors/osparc.crt; \ + sudo trust extract-compat && \ + echo "# restarting docker daemon" && \ sudo systemctl restart docker \ - ) \ ) From 94b996ac6e7030a2756ea7a0d047696f04964c29 Mon Sep 17 00:00:00 2001 From: Dustin Kaiser Date: Fri, 28 Mar 2025 09:52:19 +0100 Subject: [PATCH 05/10] Fixes https://github.com/ITISFoundation/osparc-simcore/issues/7363 --- .../contentpacks/osparc-custom-content-pack-v2.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/services/graylog/data/contentpacks/osparc-custom-content-pack-v2.json b/services/graylog/data/contentpacks/osparc-custom-content-pack-v2.json index f50cec48..2bd87c57 100644 --- a/services/graylog/data/contentpacks/osparc-custom-content-pack-v2.json +++ b/services/graylog/data/contentpacks/osparc-custom-content-pack-v2.json @@ -1,9 +1,9 @@ { "v": "1", - "id": "dfaeea11-bde6-4203-9cfe-6ca2a23ca22e", - "rev": 42, - "name": "osparc-custom-content-pack-v2", - "summary": "osparc-custom-content-pack-v2", + "id": "daaeea11-bde6-4203-9cfe-6ca2a23ca22e", + "rev": 1, + "name": "osparc-custom-content-pack-v3", + "summary": "osparc-custom-content-pack-v3", "description": "", "vendor": "Osparc team", "url": "", @@ -623,7 +623,7 @@ "configuration": { "grok_pattern": { "@type": "string", - "@value": "log_level=%{WORD:log_level} \\| log_timestamp=%{TIMESTAMP_ISO8601:log_timestamp} \\| log_source=%{DATA:log_source} \\| (log_uid=%{WORD:log_uid} \\| )?log_msg=%{GREEDYDATA:log_msg}" + "@value": "log_level=%{WORD:log_level} \\| log_timestamp=%{TIMESTAMP_ISO8601:log_timestamp} \\| log_source=%{DATA:log_source} \\| (log_uid=%{WORD:log_uid} \\| (log_oec=%{WORD:log_oec} \\| )?log_msg=%{GREEDYDATA:log_msg}" }, "named_captures_only": { "@type": "boolean", From 018b70838adac40aca0322cbb244a8326bfd6f4c Mon Sep 17 00:00:00 2001 From: Dustin Kaiser Date: Mon, 31 Mar 2025 09:24:52 +0200 Subject: [PATCH 06/10] fixes --- certificates/Makefile | 12 +++++++++--- .../contentpacks/osparc-custom-content-pack-v2.json | 8 ++++---- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/certificates/Makefile b/certificates/Makefile index 9af700df..a9326900 100644 --- a/certificates/Makefile +++ b/certificates/Makefile @@ -66,10 +66,16 @@ install-root-certificate: rootca.crt ## installs a certificate in the host syste echo "Is the DOCKER service ready? press when ready" && read -n 1; \ fi;\ echo "======================================";,\ - sudo cp $< /etc/ca-certificates/trust-source/anchors/osparc.crt; \ - sudo trust extract-compat && \ - echo "# restarting docker daemon" && \ + $(if $(IS_OSX), \ + sudo security add-trusted-cert -d -k /Library/Keychains/System.keychain $<; \ + echo "Please restart the DOCKER service now..." && read -n 1; \ + echo "Is the DOCKER service ready? press when ready" && read -n 1; \ + , \ + sudo cp $< /usr/local/share/ca-certificates/osparc.crt; \ + sudo update-ca-certificates -f; \ + echo "# restarting docker daemon"; \ sudo systemctl restart docker \ + ) \ ) diff --git a/services/graylog/data/contentpacks/osparc-custom-content-pack-v2.json b/services/graylog/data/contentpacks/osparc-custom-content-pack-v2.json index 2bd87c57..3b5627f2 100644 --- a/services/graylog/data/contentpacks/osparc-custom-content-pack-v2.json +++ b/services/graylog/data/contentpacks/osparc-custom-content-pack-v2.json @@ -1,9 +1,9 @@ { "v": "1", - "id": "daaeea11-bde6-4203-9cfe-6ca2a23ca22e", - "rev": 1, - "name": "osparc-custom-content-pack-v3", - "summary": "osparc-custom-content-pack-v3", + "id": "dfaeea11-bde6-4203-9cfe-6ca2a23ca22e", + "rev": 42, + "name": "osparc-custom-content-pack-v2", + "summary": "osparc-custom-content-pack-v2", "description": "", "vendor": "Osparc team", "url": "", From 7e19a8880a9732356a87d8bab9d594efc21a25e0 Mon Sep 17 00:00:00 2001 From: Dustin Kaiser Date: Mon, 31 Mar 2025 09:42:57 +0200 Subject: [PATCH 07/10] Make graylog alerts work again --- services/graylog/scripts/alerts.template.yaml | 315 +----------------- services/graylog/scripts/configure.py | 187 ++++++++--- services/graylog/template.env | 15 +- 3 files changed, 143 insertions(+), 374 deletions(-) diff --git a/services/graylog/scripts/alerts.template.yaml b/services/graylog/scripts/alerts.template.yaml index 992375ca..9338b57e 100644 --- a/services/graylog/scripts/alerts.template.yaml +++ b/services/graylog/scripts/alerts.template.yaml @@ -1,237 +1,3 @@ -- title: "${MACHINE_FQDN}: Interactive Study Data too large for AWS S3 and hanging" - description: "${MACHINE_FQDN}: Study Hanging" - priority: 2 - config: - query: > - "EntityTooLarge" AND NOT container_name:/.*graylog_graylog.*/ - query_parameters: [] - search_within_ms: 600000 - execute_every_ms: 600000 - group_by: [] - series: [] - conditions: {} - type: aggregation-v1 - field_spec: - source: - data_type: string - providers: - - type: template-v1 - template: "${source.source}" - require_values: false - container_name: - data_type: string - providers: - - type: template-v1 - template: "${source.container_name}" - require_values: false - full_message: - data_type: string - providers: - - type: template-v1 - template: "${source.full_message}" - key_spec: - - source - - container_name - - full_message - notification_settings: - grace_period_ms: 0 - backlog_size: 99 - alert: true -- title: "${MACHINE_FQDN}: Writer Is None Error in Webserver" - description: "${MACHINE_FQDN}: Alert if \"writer is None\" pops up. Communication with rabbitMQ is disrupted and this will make simcore go crazy" - priority: 2 - config: - query: > - "writer is None" AND NOT container_name:/.*graylog_graylog.*/ - query_parameters: [] - search_within_ms: 600000 - execute_every_ms: 600000 - group_by: [] - series: [] - conditions: {} - type: aggregation-v1 - field_spec: - source: - data_type: string - providers: - - type: template-v1 - template: "${source.source}" - require_values: false - container_name: - data_type: string - providers: - - type: template-v1 - template: "${source.container_name}" - require_values: false - full_message: - data_type: string - providers: - - type: template-v1 - template: "${source.full_message}" - key_spec: - - source - - container_name - - full_message - notification_settings: - grace_period_ms: 0 - backlog_size: 99 - alert: true -- title: "${MACHINE_FQDN}: Dynamic Sidecar failed to save with S3TransferError" - description: "${MACHINE_FQDN}: Alert if Dynamic Sidecar failed to save with S3TransferError" - priority: 2 - config: - query: > - "simcore_sdk.node_ports_common.exceptions.S3TransferError: Could not upload file" AND NOT container_name:/.*graylog_graylog.*/ - query_parameters: [] - search_within_ms: 600000 - execute_every_ms: 600000 - group_by: [] - series: [] - conditions: {} - type: aggregation-v1 - field_spec: - source: - data_type: string - providers: - - type: template-v1 - template: "${source.source}" - require_values: false - container_name: - data_type: string - providers: - - type: template-v1 - template: "${source.container_name}" - require_values: false - full_message: - data_type: string - providers: - - type: template-v1 - template: "${source.full_message}" - key_spec: - - source - - container_name - - full_message - notification_settings: - grace_period_ms: 0 - backlog_size: 99 - alert: true -- title: "${MACHINE_FQDN}: Dynamic Sidecar failed to save - 2" - description: "${MACHINE_FQDN}: Alert if Dynamic Sidecar failed to save - 2" - priority: 2 - config: - query: > - "Could not contact dynamic-sidecar to save service" AND NOT container_name:/.*graylog_graylog.*/ - query_parameters: [] - search_within_ms: 60000 - execute_every_ms: 60000 - group_by: [] - series: [] - conditions: {} - type: aggregation-v1 - field_spec: - source: - data_type: string - providers: - - type: template-v1 - template: "${source.source}" - require_values: false - container_name: - data_type: string - providers: - - type: template-v1 - template: "${source.container_name}" - require_values: false - full_message: - data_type: string - providers: - - type: template-v1 - template: "${source.full_message}" - key_spec: - - source - - container_name - - full_message - notification_settings: - grace_period_ms: 0 - backlog_size: 99 - alert: true -- title: "${MACHINE_FQDN}: simcore-agent failed pushing docker volume data to backup S3 bucket" - description: "${MACHINE_FQDN}: simcore-agent failed pushing docker volume data to backup S3 bucket" - priority: 2 - config: - query: > - container_name: /.*agent.*/ AND "Shell subprocesses yielded nonzero error code" AND NOT container_name:/.*graylog_graylog.*/ - query_parameters: [] - search_within_ms: 600000 - execute_every_ms: 600000 - group_by: [] - series: [] - conditions: {} - type: aggregation-v1 - field_spec: - source: - data_type: string - providers: - - type: template-v1 - template: "${source.source}" - require_values: false - container_name: - data_type: string - providers: - - type: template-v1 - template: "${source.container_name}" - require_values: false - full_message: - data_type: string - providers: - - type: template-v1 - template: "${source.full_message}" - key_spec: - - source - - container_name - - full_message - notification_settings: - grace_period_ms: 0 - backlog_size: 99 - alert: true -- title: "${MACHINE_FQDN}: faulty env-var setup" - description: "${MACHINE_FQDN}: Look e.g. here https://git.speag.com/oSparc/osparc-ops-environments/-/issues/564" - priority: 2 - config: - query: > - "unresolved, defaulting to None" AND NOT container_name:/.*graylog_graylog.*/ - query_parameters: [] - search_within_ms: 600000 - execute_every_ms: 600000 - group_by: [] - series: [] - conditions: {} - type: aggregation-v1 - field_spec: - source: - data_type: string - providers: - - type: template-v1 - template: "${source.source}" - require_values: false - container_name: - data_type: string - providers: - - type: template-v1 - template: "${source.container_name}" - require_values: false - full_message: - data_type: string - providers: - - type: template-v1 - template: "${source.full_message}" - key_spec: - - source - - container_name - - full_message - notification_settings: - grace_period_ms: 0 - backlog_size: 99 - alert: true - title: "${MACHINE_FQDN}: DOCKER IP POOL EXHAUSTED, no service can start" description: "${MACHINE_FQDN}: DOCKER IP POOL EXHAUSTED, no service can start. See: https://github.com/moby/moby/issues/30820" priority: 3 @@ -240,6 +6,7 @@ container_name: /.*director-v2.*/ AND "could not find an available, non-overlapping IPv4 address pool among the defaults to assign to the network" AND NOT container_name:/.*graylog_graylog.*/ query_parameters: [] search_within_ms: 600000 + event_limit: 1 execute_every_ms: 600000 group_by: [] series: [] @@ -271,82 +38,6 @@ grace_period_ms: 0 backlog_size: 99 alert: true -- title: "${MACHINE_FQDN}: Potential hanging dy-sidecar service detected." - description: "${MACHINE_FQDN}: Potential hanging dy-sidecar service detected. Human intervention required. Please investigate." - priority: 3 - config: - query: > - "waiting for manual intervention" AND container_name:/.*director-v2.*/ - query_parameters: [] - search_within_ms: 3600000 - execute_every_ms: 3600000 - group_by: [] - series: [] - conditions: {} - type: aggregation-v1 - field_spec: - source: - data_type: string - providers: - - type: template-v1 - template: "${source.source}" - require_values: false - container_name: - data_type: string - providers: - - type: template-v1 - template: "${source.container_name}" - require_values: false - full_message: - data_type: string - providers: - - type: template-v1 - template: "${source.full_message}" - key_spec: - - source - - container_name - notification_settings: - grace_period_ms: 0 - backlog_size: 99 - alert: true -- title: "${MACHINE_FQDN}: Syslogs indicate OOM-Event" - description: "${MACHINE_FQDN}: Likely the oom-killer has reaped a container Please investigate and adjust service limitations." - priority: 2 - config: - query: > - "Memory cgroup out of memory:" - query_parameters: [] - search_within_ms: 3600000 - execute_every_ms: 3600000 - group_by: [] - series: [] - conditions: {} - type: aggregation-v1 - field_spec: - source: - data_type: string - providers: - - type: template-v1 - template: "${source.source}" - require_values: false - container_name: - data_type: string - providers: - - type: template-v1 - template: "${source.container_name}" - require_values: false - full_message: - data_type: string - providers: - - type: template-v1 - template: "${source.full_message}" - key_spec: - - source - - container_name - notification_settings: - grace_period_ms: 0 - backlog_size: 99 - alert: true - title: "${MACHINE_FQDN}: Unexpected error with redis lock detected" description: "${MACHINE_FQDN}: This error should only occur in unit tests due to very low timings, maybe something happene here" priority: 2 @@ -355,6 +46,7 @@ "lock is no longer owned. This is unexpected and requires investigation" AND NOT container_name:/.*graylog_graylog.*/ query_parameters: [] search_within_ms: 3600000 + event_limit: 1 execute_every_ms: 3600000 group_by: [] series: [] @@ -393,6 +85,7 @@ "LockNotOwnedError" AND NOT container_name:/.*graylog_graylog.*/ query_parameters: [] search_within_ms: 3600000 + event_limit: 1 execute_every_ms: 3600000 group_by: [] series: [] @@ -432,6 +125,7 @@ query_parameters: [] search_within_ms: 86400000 execute_every_ms: 86400000 + event_limit: 1 group_by: [] series: [] conditions: {} @@ -463,6 +157,7 @@ query: log_service:/.+payments/ AND (log_level:ERROR OR log_level:WARNING) query_parameters: [] search_within_ms: 600000 + event_limit: 1 execute_every_ms: 600000 group_by: [] series: [] diff --git a/services/graylog/scripts/configure.py b/services/graylog/scripts/configure.py index 37a415d4..73de7917 100644 --- a/services/graylog/scripts/configure.py +++ b/services/graylog/scripts/configure.py @@ -1,3 +1,4 @@ +# pylint: disable=invalid-name # pylint: disable=logging-fstring-interpolation import json import logging @@ -33,12 +34,22 @@ SUPPORTED_GRAYLOG_MAJOR_VERSION = 6 -GRAYLOG_BASE_DOMAIN = "https://monitoring." + env.str("MACHINE_FQDN") + "/graylog" -GRAYLOG_WAIT_ONLINE_TIMEOUT_SEC = env.int("GRAYLOG_WAIT_ONLINE_TIMEOUT_SEC") +MACHINE_FQDN = env.str("MACHINE_FQDN") +GRAYLOG_BASE_DOMAIN = "https://monitoring." + MACHINE_FQDN + "/graylog" +GRAYLOG_WAIT_ONLINE_TIMEOUT_SEC = env.int("GRAYLOG_WAIT_ONLINE_TIMEOUT_SEC", 30) REQUESTS_AUTH = (env.str("SERVICES_USER"), env.str("SERVICES_PASSWORD")) - +GRAYLOG_SYSLOG_CAPTURE_PORT = env.int("GRAYLOG_SYSLOG_CAPTURE_PORT") GRAYLOG_LOG_MAX_DAYS_IN_STORAGE = env.int("GRAYLOG_LOG_MAX_DAYS_IN_STORAGE") GRAYLOG_LOG_MIN_DAYS_IN_STORAGE = env.int("GRAYLOG_LOG_MIN_DAYS_IN_STORAGE") +GRAYLOG_SLACK_WEBHOOK_URL = env.str("GRAYLOG_SLACK_WEBHOOK_URL") +GRAYLOG_ALERT_MAIL_ADDRESS = env.str("GRAYLOG_ALERT_MAIL_ADDRESS") +GRAYLOG_SLACK_WEBHOOK_ICON_URL = env.str("GRAYLOG_SLACK_WEBHOOK_ICON_URL") +GRAYLOG_SLACK_WEBHOOK_CHANNEL = env.str("GRAYLOG_SLACK_WEBHOOK_CHANNEL") +assert MACHINE_FQDN +assert REQUESTS_AUTH +assert GRAYLOG_SYSLOG_CAPTURE_PORT +assert GRAYLOG_LOG_MAX_DAYS_IN_STORAGE +assert GRAYLOG_LOG_MIN_DAYS_IN_STORAGE @retry( @@ -97,28 +108,23 @@ def get_graylog_inputs(_session, _headers, _url): def configure_email_notifications(_session, _headers): - _url = ( - "https://monitoring." - + env.str("MACHINE_FQDN") - + "/graylog/api/events/notifications" - ) + _url = GRAYLOG_BASE_DOMAIN + "/api/events/notifications" _r = _session.get(_url, headers=_headers, verify=False) if ( len( [ noti for noti in r.json()["notifications"] - if noti["title"] - == "Graylog " + env.str("MACHINE_FQDN") + " mail notification" + if noti["title"] == "Graylog " + MACHINE_FQDN + " mail notification" ] ) == 0 ): raw_data = ( '{"title":"Graylog ' - + env.str("MACHINE_FQDN") + + MACHINE_FQDN + ' mail notification","description":"","config":{"sender":"","subject":"Graylog event notification: ${event_definition_title}","user_recipients":[],"email_recipients":["' - + env.str("GRAYLOG_ALERT_MAIL_ADDRESS") + + GRAYLOG_ALERT_MAIL_ADDRESS + '"],"type":"email-notification-v1"}}' ) _r = _session.post(_url, headers=_headers, data=raw_data, verify=False) @@ -139,17 +145,96 @@ def configure_email_notifications(_session, _headers): _mail_notification_id = [ noti for noti in _r.json()["notifications"] - if noti["title"] == "Graylog " + env.str("MACHINE_FQDN") + " mail notification" + if noti["title"] == "Graylog " + MACHINE_FQDN + " mail notification" ][0]["id"] return _mail_notification_id +def configure_slack_notification_channel(_session, _hed) -> str: + # Configure sending Slack notifications + if GRAYLOG_SLACK_WEBHOOK_URL != "": + assert GRAYLOG_SLACK_WEBHOOK_CHANNEL + assert GRAYLOG_SLACK_WEBHOOK_ICON_URL + print( + f"Starting Graylock Slack Channel Setup. Assuming:\nGRAYLOG_SLACK_WEBHOOK_URL={GRAYLOG_SLACK_WEBHOOK_URL}\nGRAYLOG_SLACK_WEBHOOK_URL={GRAYLOG_SLACK_WEBHOOK_CHANNEL}\nGRAYLOG_SLACK_WEBHOOK_ICON_URL={GRAYLOG_SLACK_WEBHOOK_ICON_URL}" + ) + _url = ( + "https://monitoring." + MACHINE_FQDN + "/graylog/api/events/notifications" + ) + _r = _session.get(_url, headers=_hed, verify=False) + if ( + len( + [ + noti + for noti in _r.json()["notifications"] + if noti["title"] + == "Graylog " + MACHINE_FQDN + " Slack notification" + ] + ) + == 0 + ): + raw_data = ( + '{"title":"Graylog ' + + MACHINE_FQDN + + """ Slack notification","description":"Slack notification","config": { + "color": "#FF0000", + "webhook_url": \"""" + + GRAYLOG_SLACK_WEBHOOK_URL + + """\", + "channel": "#""" + + GRAYLOG_SLACK_WEBHOOK_CHANNEL + + """\", + "custom_message":"--- [Event Definition] ---------------------------\\nTitle: ${event_definition_title}\\nType: ${event_definition_type}\\n--- [Event] --------------------------------------\\nTimestamp: ${event.timestamp}\\nMessage: ${event.message}\\nSource: ${event.source}\\nKey: ${event.key}\\nPriority: ${event.priority}\\nAlert: ${event.alert}\\nTimestamp Processing: ${event.timestamp}\\nTimerange Start: ${event.timerange_start}\\nTimerange End: ${event.timerange_end}\\nEvent Fields:\\n${foreach event.fields field}\\n${field.key}: ${field.value}\\n${end}\\n${if backlog}\\n--- [Backlog] ------------------------------------\\nLast messages accounting for this alert:\\n${foreach backlog message}\\n""" + + "https://monitoring." + + MACHINE_FQDN + + "/graylog/messages" + + """/${message.index}/${message.id}\\n${end}${end}\\n", + "user_name": "Graylog", + "notify_channel": true, + "link_names": false, + "icon_url": \"""" + + GRAYLOG_SLACK_WEBHOOK_ICON_URL + + """\", + "icon_emoji": "", + "backlog_size": 5, + "type": "slack-notification-v1"}}""" + ) + _r = _session.post( + _url, headers=_hed, verify=False, data=raw_data.encode("utf-8") + ) + if _r.status_code == 200: + print("Slack Notification added with success !") + # Keeping notification ID + _r = _session.get(_url, headers=_hed, verify=False) + _slack_notification_id = [ + noti + for noti in _r.json()["notifications"] + if noti["title"] + == "Graylog " + MACHINE_FQDN + " Slack notification" + ][0]["id"] + return _slack_notification_id + print( + "Error while adding the Slack Notification. Status code of the request : " + + str(r.status_code) + + " " + + r.text + ) + sys.exit(1) + print("Graylog Slack Notification already present - skipping...") + _r = _session.get(_url, headers=_hed, verify=False) + _slack_notification_id = [ + noti + for noti in _r.json()["notifications"] + if noti["title"] == "Graylog " + MACHINE_FQDN + " Slack notification" + ][0]["id"] + return _slack_notification_id + return "" + + def configure_log_retention(_session, _headers): _url = ( - "https://monitoring." - + env.str("MACHINE_FQDN") - + "/graylog/api/system/indices/index_sets" + "https://monitoring." + MACHINE_FQDN + "/graylog/api/system/indices/index_sets" ) _r = _session.get(_url, headers=_headers, verify=False) index_of_interest = [ @@ -167,9 +252,7 @@ def configure_log_retention(_session, _headers): "index_lifetime_max": f"P{GRAYLOG_LOG_MAX_DAYS_IN_STORAGE}D", } _url = ( - "https://monitoring." - + env.str("MACHINE_FQDN") - + "/graylog/api/system/indices/index_sets" + "https://monitoring." + MACHINE_FQDN + "/graylog/api/system/indices/index_sets" ) raw_data = json.dumps(index_of_interest) _r = _session.put( @@ -188,26 +271,22 @@ def configure_log_retention(_session, _headers): + r.text ) + +def configure_syslog_capture(_session, _headers): try: _url = ( - "https://monitoring." - + env.str("MACHINE_FQDN") - + "/graylog/api/system/cluster/nodes" + "https://monitoring." + MACHINE_FQDN + "/graylog/api/system/cluster/nodes" ) _r = _session.get(_url, headers=_headers, verify=False).json() assert len(_r["nodes"]) == 1 node_uuid = _r["nodes"][0]["node_id"] # - _url = ( - "https://monitoring." - + env.str("MACHINE_FQDN") - + "/graylog/api/system/inputs" - ) + _url = "https://monitoring." + MACHINE_FQDN + "/graylog/api/system/inputs" r2 = _session.get(_url, headers=_headers, verify=False).json() if len([i for i in r2["inputs"] if i["title"] == "Syslog"]) == 0: raw_data = ( '{"title":"Syslog","type":"org.graylog2.inputs.syslog.udp.SyslogUDPInput","configuration":{"bind_address":"0.0.0.0","port":' - + env.str("GRAYLOG_SYSLOG_CAPTURE_PORT") + + GRAYLOG_SYSLOG_CAPTURE_PORT + ',"recv_buffer_size":262144,"number_worker_threads":8,"override_source":null,"force_rdns":false,"allow_override_date":true,"store_full_message":true,"expand_structured_data":false},"global":true,"node":"' + node_uuid + '"}' @@ -219,7 +298,7 @@ def configure_log_retention(_session, _headers): sleep(0.3) _url = ( "https://monitoring." - + env.str("MACHINE_FQDN") + + MACHINE_FQDN + "/graylog/api/system/inputs/" + input_id + "/extractors" @@ -233,11 +312,16 @@ def configure_log_retention(_session, _headers): print("Error setting up graylog syslog capturing.") -def configure_alerts(_mail_notification_id, _session, _headers): +def configure_alerts( + _session, + _headers, + _mail_notification_id: str | None = None, + _slack_notification_id: str | None = None, +): print("Configuring Graylog Alerts...") with open("alerts.yaml") as f: data = yaml.load(f, Loader=SafeLoader) - _url = "https://monitoring." + env.str("MACHINE_FQDN") + "/graylog/api/streams" + _url = "https://monitoring." + MACHINE_FQDN + "/graylog/api/streams" _r = _session.get(_url, headers=_headers, verify=False) if _r.status_code == 200: streams_list = _r.json()["streams"] @@ -253,11 +337,8 @@ def configure_alerts(_mail_notification_id, _session, _headers): "Could not determine ID of stream containing all events. Is graylog misconfigured? Exiting with error!" ) sys.exit(1) - _url = ( - "https://monitoring." - + env.str("MACHINE_FQDN") - + "/graylog/api/events/definitions" - ) + _url = "https://monitoring." + MACHINE_FQDN + "/graylog/api/events/definitions" + # Deleting existing alerts - this ensures idemptency _r = _session.get( _url, headers=_headers, params={"per_page": 2500}, verify=False ) @@ -285,12 +366,14 @@ def configure_alerts(_mail_notification_id, _session, _headers): sys.exit(1) for i in data: i["notifications"] = [] - if env.str("GRAYLOG_ALERT_MAIL_ADDRESS"): + if GRAYLOG_ALERT_MAIL_ADDRESS != "" and _mail_notification_id: i["notifications"] += [{"notification_id": str(_mail_notification_id)}] + if GRAYLOG_SLACK_WEBHOOK_URL != "" and _slack_notification_id: + i["notifications"] += [{"notification_id": str(_slack_notification_id)}] i["config"]["streams"] = [str(stream_id_for_all_messages)] _url = ( "https://monitoring." - + env.str("MACHINE_FQDN") + + MACHINE_FQDN + "/graylog/api/events/definitions?schedule=true" ) resp = _session.post(_url, headers=_headers, json=i, verify=False) @@ -402,26 +485,32 @@ def install_content_pack_revision(content_pack): session = requests.Session() session.verify = False - session.auth = ( - env.str("SERVICES_USER"), - env.str("SERVICES_PASSWORD"), - ) # Graylog username is always "admin" + session.auth = REQUESTS_AUTH # Graylog username is always "admin" hed = { "Content-Type": "application/json", "Accept": "application/json", "X-Requested-By": "cli", } - url = "https://monitoring." + env.str("MACHINE_FQDN") + "/graylog/api/system/inputs" + url = "https://monitoring." + MACHINE_FQDN + "/graylog/api/system/inputs" r = get_graylog_inputs(session, hed, url) configure_log_retention(session, hed) - + configure_syslog_capture(session, hed) # Configure sending email notifications - if env.str("GRAYLOG_ALERT_MAIL_ADDRESS") != "": + mail_notification_id = None + slack_notification_id = None + if GRAYLOG_ALERT_MAIL_ADDRESS != "": mail_notification_id = configure_email_notifications(session, hed) + if GRAYLOG_SLACK_WEBHOOK_URL != "": + slack_notification_id = configure_slack_notification_channel(session, hed) + if mail_notification_id or slack_notification_id: # Configure Alerts - configure_alerts(mail_notification_id, session, hed) - # Configure log retention time + configure_alerts( + session, + hed, + _mail_notification_id=mail_notification_id, + _slack_notification_id=slack_notification_id, + ) # content pack will create GELF UDP Input # NOTE: When you introduce changes, revision number increase is mandatory @@ -429,7 +518,5 @@ def install_content_pack_revision(content_pack): # Autoloader is only good at loading content packs first time but not updating / adding new ones to existing. # https://community.graylog.org/t/update-content-packs-using-autoloading-functionality/6205 # https://github.com/Graylog2/graylog2-server/issues/14672 - content_pack_base_url = ( - "https://monitoring." + env.str("MACHINE_FQDN") + "/graylog/api" - ) + content_pack_base_url = "https://monitoring." + MACHINE_FQDN + "/graylog/api" configure_content_packs(session, hed, content_pack_base_url) diff --git a/services/graylog/template.env b/services/graylog/template.env index 01bcf8e6..121f3e97 100644 --- a/services/graylog/template.env +++ b/services/graylog/template.env @@ -1,29 +1,16 @@ GRAYLOG_PASSWORD_SECRET=${GRAYLOG_PASSWORD_SECRET} GRAYLOG_ROOT_PASSWORD_SHA2=${GRAYLOG_ROOT_PASSWORD_SHA2} -GRAYLOG_HTTP_EXTERNAL_URI=https://${MONITORING_DOMAIN}/graylog/ MONITORING_DOMAIN=${MONITORING_DOMAIN} +GRAYLOG_HTTP_EXTERNAL_URI=https://${MONITORING_DOMAIN}/graylog/ GRAYLOG_ALERT_MAIL_ADDRESS=${GRAYLOG_ALERT_MAIL_ADDRESS} -GRAYLOG_TRANSPORT_EMAIL_ENABLED=${GRAYLOG_TRANSPORT_EMAIL_ENABLED} -GRAYLOG_TRANSPORT_EMAIL_HOSTNAME=${SMTP_HOST} -GRAYLOG_TRANSPORT_EMAIL_PORT=${SMTP_PORT} -GRAYLOG_TRANSPORT_EMAIL_USE_AUTH=${GRAYLOG_TRANSPORT_EMAIL_USE_AUTH} -GRAYLOG_TRANSPORT_EMAIL_USE_TLS=${GRAYLOG_TRANSPORT_EMAIL_USE_TLS} -GRAYLOG_TRANSPORT_EMAIL_USE_SSL=${GRAYLOG_TRANSPORT_EMAIL_USE_SSL} -GRAYLOG_TRANSPORT_EMAIL_AUTH_USERNAME=${SMTP_USERNAME} -GRAYLOG_TRANSPORT_EMAIL_AUTH_PASSWORD=${SMTP_PASSWORD} -GRAYLOG_TRANSPORT_EMAIL_SUBJECT_PREFIX=[graylog-${CLUSTER_NAME}] -GRAYLOG_TRANSPORT_EMAIL_FROM_EMAIL=${SMTP_USERNAME} GRAYLOG_SYSLOG_CAPTURE_PORT=${GRAYLOG_SYSLOG_CAPTURE_PORT} SERVICES_PASSWORD=${SERVICES_PASSWORD} SERVICES_USER=${SERVICES_USER} - MACHINE_FQDN=${MACHINE_FQDN} GRAYLOG_SLACK_WEBHOOK_URL=${GRAYLOG_SLACK_WEBHOOK_URL} GRAYLOG_SLACK_WEBHOOK_ICON_URL=${GRAYLOG_SLACK_WEBHOOK_ICON_URL} GRAYLOG_SLACK_WEBHOOK_CHANNEL=${GRAYLOG_SLACK_WEBHOOK_CHANNEL} - GRAYLOG_WAIT_ONLINE_TIMEOUT_SEC=${GRAYLOG_WAIT_ONLINE_TIMEOUT_SEC} GRAYLOG_LOG_MAX_DAYS_IN_STORAGE=${GRAYLOG_LOG_MAX_DAYS_IN_STORAGE} GRAYLOG_LOG_MIN_DAYS_IN_STORAGE=${GRAYLOG_LOG_MIN_DAYS_IN_STORAGE} - PUBLIC_NETWORK=${PUBLIC_NETWORK} From 18385f7bc698cfe8979ebfade12fe1d69e5913bc Mon Sep 17 00:00:00 2001 From: Dustin Kaiser Date: Thu, 3 Apr 2025 10:53:44 +0200 Subject: [PATCH 08/10] Change request @YuryHrytsuk bump content pack rev --- .../data/contentpacks/osparc-custom-content-pack-v2.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/graylog/data/contentpacks/osparc-custom-content-pack-v2.json b/services/graylog/data/contentpacks/osparc-custom-content-pack-v2.json index 3b5627f2..ad948470 100644 --- a/services/graylog/data/contentpacks/osparc-custom-content-pack-v2.json +++ b/services/graylog/data/contentpacks/osparc-custom-content-pack-v2.json @@ -1,7 +1,7 @@ { "v": "1", "id": "dfaeea11-bde6-4203-9cfe-6ca2a23ca22e", - "rev": 42, + "rev": 43, "name": "osparc-custom-content-pack-v2", "summary": "osparc-custom-content-pack-v2", "description": "", From fec5e6d1d72e91cc20c3761474c37657aa09f683 Mon Sep 17 00:00:00 2001 From: Dustin Kaiser <8209087+mrnicegyu11@users.noreply.github.com> Date: Thu, 3 Apr 2025 13:19:13 +0200 Subject: [PATCH 09/10] graylog/configure.py add type annotations --- services/graylog/scripts/configure.py | 112 ++++++++++++++------------ 1 file changed, 61 insertions(+), 51 deletions(-) diff --git a/services/graylog/scripts/configure.py b/services/graylog/scripts/configure.py index 73de7917..788e318c 100644 --- a/services/graylog/scripts/configure.py +++ b/services/graylog/scripts/configure.py @@ -1,16 +1,19 @@ # pylint: disable=invalid-name # pylint: disable=logging-fstring-interpolation + import json import logging import os import sys import warnings from time import sleep +from typing import Any, Dict, Optional, Tuple import requests import yaml from environs import Env, EnvError from requests.exceptions import HTTPError +from requests.sessions import Session from tenacity import ( before_log, retry, @@ -22,8 +25,7 @@ from yaml.loader import SafeLoader logging.basicConfig(level="INFO") -logger = logging.getLogger() - +logger: logging.Logger = logging.getLogger() warnings.filterwarnings( "ignore", ".*Adding certificate verification is strongly advised.*", @@ -32,19 +34,22 @@ env = Env() env.read_env("./../.env", recurse=False) -SUPPORTED_GRAYLOG_MAJOR_VERSION = 6 - -MACHINE_FQDN = env.str("MACHINE_FQDN") -GRAYLOG_BASE_DOMAIN = "https://monitoring." + MACHINE_FQDN + "/graylog" -GRAYLOG_WAIT_ONLINE_TIMEOUT_SEC = env.int("GRAYLOG_WAIT_ONLINE_TIMEOUT_SEC", 30) -REQUESTS_AUTH = (env.str("SERVICES_USER"), env.str("SERVICES_PASSWORD")) -GRAYLOG_SYSLOG_CAPTURE_PORT = env.int("GRAYLOG_SYSLOG_CAPTURE_PORT") -GRAYLOG_LOG_MAX_DAYS_IN_STORAGE = env.int("GRAYLOG_LOG_MAX_DAYS_IN_STORAGE") -GRAYLOG_LOG_MIN_DAYS_IN_STORAGE = env.int("GRAYLOG_LOG_MIN_DAYS_IN_STORAGE") -GRAYLOG_SLACK_WEBHOOK_URL = env.str("GRAYLOG_SLACK_WEBHOOK_URL") -GRAYLOG_ALERT_MAIL_ADDRESS = env.str("GRAYLOG_ALERT_MAIL_ADDRESS") -GRAYLOG_SLACK_WEBHOOK_ICON_URL = env.str("GRAYLOG_SLACK_WEBHOOK_ICON_URL") -GRAYLOG_SLACK_WEBHOOK_CHANNEL = env.str("GRAYLOG_SLACK_WEBHOOK_CHANNEL") +SUPPORTED_GRAYLOG_MAJOR_VERSION: int = 6 +MACHINE_FQDN: str = env.str("MACHINE_FQDN") +GRAYLOG_BASE_DOMAIN: str = f"https://monitoring.{MACHINE_FQDN}/graylog" +GRAYLOG_WAIT_ONLINE_TIMEOUT_SEC: int = env.int("GRAYLOG_WAIT_ONLINE_TIMEOUT_SEC", 30) +REQUESTS_AUTH: Tuple[str, str] = ( + env.str("SERVICES_USER"), + env.str("SERVICES_PASSWORD"), +) +GRAYLOG_SYSLOG_CAPTURE_PORT: int = env.int("GRAYLOG_SYSLOG_CAPTURE_PORT") +GRAYLOG_LOG_MAX_DAYS_IN_STORAGE: int = env.int("GRAYLOG_LOG_MAX_DAYS_IN_STORAGE") +GRAYLOG_LOG_MIN_DAYS_IN_STORAGE: int = env.int("GRAYLOG_LOG_MIN_DAYS_IN_STORAGE") +GRAYLOG_SLACK_WEBHOOK_URL: str = env.str("GRAYLOG_SLACK_WEBHOOK_URL") +GRAYLOG_ALERT_MAIL_ADDRESS: str = env.str("GRAYLOG_ALERT_MAIL_ADDRESS") +GRAYLOG_SLACK_WEBHOOK_ICON_URL: str = env.str("GRAYLOG_SLACK_WEBHOOK_ICON_URL") +GRAYLOG_SLACK_WEBHOOK_CHANNEL: str = env.str("GRAYLOG_SLACK_WEBHOOK_CHANNEL") + assert MACHINE_FQDN assert REQUESTS_AUTH assert GRAYLOG_SYSLOG_CAPTURE_PORT @@ -53,35 +58,34 @@ @retry( - stop=stop_after_attempt(GRAYLOG_WAIT_ONLINE_TIMEOUT_SEC / 5), + stop=stop_after_attempt(GRAYLOG_WAIT_ONLINE_TIMEOUT_SEC // 5), wait=wait_fixed(5), retry=retry_if_exception_type(HTTPError), before=before_log(logger, logging.INFO), ) -def wait_graylog_is_online(): - _r = requests.get( +def wait_graylog_is_online() -> None: + _r: requests.Response = requests.get( GRAYLOG_BASE_DOMAIN + "/api/system", auth=REQUESTS_AUTH, verify=False, timeout=10, ) - if _r.status_code == 401: raise TypeError(f"Graylog unauthorized HTTP response: {_r}") - _r.raise_for_status() logger.info("Graylog is online") -def validate_graylog_version_is_supported(): - _r = requests.get( - GRAYLOG_BASE_DOMAIN + "/api/system", auth=REQUESTS_AUTH, verify=False +def validate_graylog_version_is_supported() -> None: + _r: requests.Response = requests.get( + GRAYLOG_BASE_DOMAIN + "/api/system", + auth=REQUESTS_AUTH, + verify=False, + timeout=30, ) _r.raise_for_status() - - graylog_version = _r.json()["version"] - major_version = int(graylog_version.split(".")[0]) - + graylog_version: str = _r.json()["version"] + major_version: int = int(graylog_version.split(".")[0]) if major_version != SUPPORTED_GRAYLOG_MAJOR_VERSION: raise TypeError( f"Graylog major version {major_version} is not supported by this script. " @@ -90,14 +94,16 @@ def validate_graylog_version_is_supported(): @retry(stop=stop_after_attempt(5), wait=wait_random(min=1, max=10)) -def get_graylog_inputs(_session, _headers, _url): +def get_graylog_inputs( + _session: Session, _headers: Dict[str, str], _url: str +) -> requests.Response: # We check if graylog has inputs, if not we add a new one - _r = _session.get(_url, headers=_headers, verify=False) + _r: requests.Response = _session.get(_url, headers=_headers, verify=False) # DEBUG if _r.status_code == 200: print("Successfully send GET /api/system/inputs") return _r - error_message = ( + error_message: str = ( "Error while sending GET /api/system/inputs. Status code of the request : " + str(_r.status_code) + " " @@ -107,7 +113,7 @@ def get_graylog_inputs(_session, _headers, _url): raise RuntimeError(error_message) -def configure_email_notifications(_session, _headers): +def configure_email_notifications(_session: requests.Session, _headers: dict) -> str: _url = GRAYLOG_BASE_DOMAIN + "/api/events/notifications" _r = _session.get(_url, headers=_headers, verify=False) if ( @@ -137,7 +143,7 @@ def configure_email_notifications(_session, _headers): + " " + _r.text ) - sys.exit(1) + sys.exit(os.EX_USAGE) else: print("Graylog Mail Notification already present - skipping...") # Keeping notification ID @@ -151,7 +157,7 @@ def configure_email_notifications(_session, _headers): return _mail_notification_id -def configure_slack_notification_channel(_session, _hed) -> str: +def configure_slack_notification_channel(_session: requests.Session, _hed: dict) -> str: # Configure sending Slack notifications if GRAYLOG_SLACK_WEBHOOK_URL != "": assert GRAYLOG_SLACK_WEBHOOK_CHANNEL @@ -220,7 +226,7 @@ def configure_slack_notification_channel(_session, _hed) -> str: + " " + r.text ) - sys.exit(1) + sys.exit(os.EX_USAGE) print("Graylog Slack Notification already present - skipping...") _r = _session.get(_url, headers=_hed, verify=False) _slack_notification_id = [ @@ -232,7 +238,7 @@ def configure_slack_notification_channel(_session, _hed) -> str: return "" -def configure_log_retention(_session, _headers): +def configure_log_retention(_session: requests.Session, _headers: dict) -> None: _url = ( "https://monitoring." + MACHINE_FQDN + "/graylog/api/system/indices/index_sets" ) @@ -272,7 +278,7 @@ def configure_log_retention(_session, _headers): ) -def configure_syslog_capture(_session, _headers): +def configure_syslog_capture(_session: requests.Session, _headers: dict) -> None: try: _url = ( "https://monitoring." + MACHINE_FQDN + "/graylog/api/system/cluster/nodes" @@ -313,11 +319,11 @@ def configure_syslog_capture(_session, _headers): def configure_alerts( - _session, - _headers, - _mail_notification_id: str | None = None, - _slack_notification_id: str | None = None, -): + _session: requests.Session, + _headers: dict, + _mail_notification_id: Optional[str] = None, + _slack_notification_id: Optional[str] = None, +) -> None: print("Configuring Graylog Alerts...") with open("alerts.yaml") as f: data = yaml.load(f, Loader=SafeLoader) @@ -336,7 +342,7 @@ def configure_alerts( print( "Could not determine ID of stream containing all events. Is graylog misconfigured? Exiting with error!" ) - sys.exit(1) + sys.exit(os.EX_USAGE) _url = "https://monitoring." + MACHINE_FQDN + "/graylog/api/events/definitions" # Deleting existing alerts - this ensures idemptency _r = _session.get( @@ -363,7 +369,7 @@ def configure_alerts( ) print(resp.status_code) print(resp.json()) - sys.exit(1) + sys.exit(os.EX_USAGE) for i in data: i["notifications"] = [] if GRAYLOG_ALERT_MAIL_ADDRESS != "" and _mail_notification_id: @@ -382,11 +388,13 @@ def configure_alerts( else: print("Could not add alert. Failure:", resp.status_code) print(resp.json()) - sys.exit(1) + sys.exit(os.EX_USAGE) -def configure_content_packs(_session, _headers, base_url): - def get_installation(content_pack): +def configure_content_packs( + _session: Session, _headers: Dict[str, str], base_url: str +) -> None: + def get_installation(content_pack: Dict[str, Any]) -> Optional[Dict[str, Any]]: logger.debug(f"Getting installations for content pack {content_pack['id']}") resp = _session.get( base_url + "/system/content_packs/" + content_pack["id"] + "/installations" @@ -404,7 +412,9 @@ def get_installation(content_pack): return installations[0] if installations else None - def delete_installation(content_pack, installation): + def delete_installation( + content_pack: Dict[str, Any], installation: Dict[str, Any] + ) -> None: logger.debug(f"Deleting installation {installation['_id']}") resp = _session.delete( @@ -421,7 +431,7 @@ def delete_installation(content_pack, installation): f"Error while deleting installation {installation['_id']} for content pack {content_pack['id']}" ) - def create_content_pack_revision(content_pack): + def create_content_pack_revision(content_pack: Dict[str, Any]) -> None: logger.debug( f"Uploading content pack {content_pack['id']} revision {content_pack['rev']}" ) @@ -440,7 +450,7 @@ def create_content_pack_revision(content_pack): f"Unexpected {resp.status_code=} while uploading content pack {content_pack['id']} revision {content_pack['rev']}. Error: {resp.text}" ) - def install_content_pack_revision(content_pack): + def install_content_pack_revision(content_pack: Dict[str, Any]) -> None: logger.debug( f"Installing content pack {content_pack['id']} revision {content_pack['rev']}" ) @@ -459,7 +469,7 @@ def install_content_pack_revision(content_pack): for file in os.listdir("../data/contentpacks"): with open(f"../data/contentpacks/{file}") as f: - logger.debug(f"Configuring content pack {f.name}") + logger.debug(f"Configuring content pack {file}") content_pack = json.loads(f.read()) create_content_pack_revision(content_pack) @@ -476,7 +486,7 @@ def install_content_pack_revision(content_pack): "This revision of content pack is already installed. Nothing to do..." ) - logging.info(f"{f.name} content pack has been configured") + logging.info(f"{file} content pack has been configured") if __name__ == "__main__": From de6cc1c74e9d71c63872faeae0fb1505d0c03732 Mon Sep 17 00:00:00 2001 From: Dustin Kaiser <8209087+mrnicegyu11@users.noreply.github.com> Date: Thu, 3 Apr 2025 13:28:36 +0200 Subject: [PATCH 10/10] Fix minor makefile bug --- scripts/common.Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/common.Makefile b/scripts/common.Makefile index a5e36327..294dfc3c 100644 --- a/scripts/common.Makefile +++ b/scripts/common.Makefile @@ -274,7 +274,7 @@ $(REPO_BASE_DIR)/.venv/bin/activate: $(REPO_BASE_DIR)/.venv/bin/pip3 install --upgrade pip wheel setuptools $(REPO_BASE_DIR)/.venv/bin/pip3 install jinja2 j2cli[yaml] typer @echo "To activate the venv, execute 'source $(REPO_BASE_DIR)/.venv/bin/activate'" -PHONY: .venv +.PHONY: .venv .venv: $(REPO_BASE_DIR)/.venv/bin/activate ## Creates a python virtual environment with dev tools (pip, pylint, ...) .PHONY: venv venv: $(REPO_BASE_DIR)/.venv/bin/activate ## Creates a python virtual environment with dev tools (pip, pylint, ...)