diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 91c1bea4..33120588 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -79,6 +79,10 @@ repos: hooks: - id: shellcheck name: Shell scripts conform to shellcheck + - repo: https://github.com/antonbabenko/pre-commit-terraform + rev: v1.89.1 # Get the latest from: https://github.com/antonbabenko/pre-commit-terraform/releases + hooks: + - id: terraform_fmt - repo: local hooks: - id: run-pylint diff --git a/scripts/common.Makefile b/scripts/common.Makefile index 4a34bd83..a5e36327 100644 --- a/scripts/common.Makefile +++ b/scripts/common.Makefile @@ -16,7 +16,7 @@ IS_WIN := $(strip $(if $(or $(IS_LINUX),$(IS_OSX),$(IS_WSL)),,$(OS))) $(if $(IS_WSL2),,$(if $(IS_WSL),$(error WSL1 is not supported in all recipes. Use WSL2 instead. Follow instructions in README.md),)) # Check that a valid location to a config file is set. -REPO_BASE_DIR := $(shell git rev-parse --show-toplevel) +REPO_BASE_DIR := $(abspath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))..) export REPO_CONFIG_LOCATION := $(shell cat $(REPO_BASE_DIR)/.config.location) $(if $(REPO_CONFIG_LOCATION),,$(error The location of the repo.config file given in .config.location is invalid. Aborting)) $(if $(shell cat $(REPO_CONFIG_LOCATION)),,$(error The location of the repo.config file given in .config.location is invalid. Aborting)) diff --git a/services/graylog/scripts/configure.py b/services/graylog/scripts/configure.py index 8cb91ed9..37a415d4 100644 --- a/services/graylog/scripts/configure.py +++ b/services/graylog/scripts/configure.py @@ -49,7 +49,10 @@ ) def wait_graylog_is_online(): _r = requests.get( - GRAYLOG_BASE_DOMAIN + "/api/system", auth=REQUESTS_AUTH, verify=False + GRAYLOG_BASE_DOMAIN + "/api/system", + auth=REQUESTS_AUTH, + verify=False, + timeout=10, ) if _r.status_code == 401: diff --git a/services/monitoring/Makefile b/services/monitoring/Makefile index 71be12e8..9ff5b242 100644 --- a/services/monitoring/Makefile +++ b/services/monitoring/Makefile @@ -4,7 +4,7 @@ # STACK_NAME defaults to name of the current directory. Should not to be changed if you follow GitOps operating procedures. STACK_NAME = $(notdir $(CURDIR)) TEMP_COMPOSE=.stack.${STACK_NAME}.yaml -REPO_BASE_DIR := $(shell git rev-parse --show-toplevel) +REPO_BASE_DIR := $(abspath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))../..) # TARGETS -------------------------------------------------- include ${REPO_BASE_DIR}/scripts/common.Makefile @@ -76,28 +76,12 @@ ${TEMP_COMPOSE}-local: docker-compose.yml docker-compose.letsencrypt.dns.yml con docker-compose.yml: docker-compose.yml.j2 .env .venv pgsql_query_exporter_config.yaml $(call jinja,$<,.env,$@) -.PHONY: update.grafana.pwd -update.grafana.pwd: .env ## Change grafana pwd - @set -o allexport; \ - source $(REPO_CONFIG_LOCATION); \ - set +o allexport; \ - grafanacontainerid=$$(docker ps | grep grafana | awk '{print $$1;}');\ - docker exec -ti $$grafanacontainerid grafana-cli admin reset-admin-password $$TRAEFIK_PASSWORD - - -.PHONY: grafana-export -grafana-export: .venv## Export the remote grafana dashboards and datasources TO YOUR LOCAL MACHINE - @cd grafana/scripts;\ - source ${REPO_BASE_DIR}/.venv/bin/activate;\ - pip install -r requirements.txt > /dev/null 2>&1;\ - python3 export.py; - .PHONY: grafana-import -grafana-import: grafana/assets .venv ## Imports AND OVERWRITES the remote grafana dashboards and datasources FROM YOUR LOCAL MACHINE - @cd grafana/scripts;\ - source ${REPO_BASE_DIR}/.venv/bin/activate;\ - pip install -r requirements.txt > /dev/null 2>&1;\ - python3 import.py +grafana-import: grafana/assets ## Imports the remote grafana dashboards and datasources FROM YOUR LOCAL MACHINE + @pushd ${REPO_BASE_DIR}/services/monitoring/grafana && \ + $(MAKE) terraform-plan && \ + $(MAKE) terraform-apply; \ + popd > /dev/null .PHONY: config.grafana.dashboards config.grafana.dashboards: grafana/templates-provisioning/dashboards/simcore/Metrics-dashboard.json.j2 .venv #Configure dashboards for aws or dalco clusters diff --git a/services/monitoring/grafana/Makefile b/services/monitoring/grafana/Makefile new file mode 100644 index 00000000..149d5f3f --- /dev/null +++ b/services/monitoring/grafana/Makefile @@ -0,0 +1,64 @@ +.DEFAULT_GOAL := help +REPO_BASE_DIR := $(abspath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))../../..) +include ${REPO_BASE_DIR}/scripts/common.Makefile + + + +# Internal VARIABLES ------------------------------------------------ +TF_STATE_FILE := terraform/.terraform/terraform.tfstate + +terraform/main.tf: terraform/main.tf.j2 .venv $(REPO_CONFIG_LOCATION) + # generate $@ + @$(call jinja, $<, $(REPO_CONFIG_LOCATION), $@) + +terraform-init: $(TF_STATE_FILE) ## init terraform + +$(TF_STATE_FILE): $(REPO_CONFIG_LOCATION) terraform/main.tf + # terraform init + @set -a; source $<; set +a; \ + if [ "$${GRAFANA_TERRAFORM_STATE_BACKEND_TYPE}" = "local" ]; then \ + terraform -chdir=./terraform init; \ + else \ + terraform -chdir=./terraform init -backend-config="access_key=$${TF_GRAFANA_STATE_BACKEND_AWS_ACCESS_KEY_ID}" -backend-config="secret_key=$${TF_GRAFANA_STATE_BACKEND_AWS_SECRET_ACCESS_KEY}"; \ + fi + +terraform/plan.cache: + @echo "$@ file not found. Run 'make terraform-plan' to generate it." + @exit 1 + +.PHONY: terraform-plan +terraform-plan: $(REPO_CONFIG_LOCATION) $(TF_STATE_FILE) ensure-grafana-online ## terraform plan + # terraform plan + @set -a; source $<; set +a; \ + terraform -chdir=./terraform plan -out=plan.cache + +.PHONY: terraform-apply +terraform-apply: $(REPO_CONFIG_LOCATION) terraform/plan.cache $(TF_STATE_FILE) ensure-grafana-online ## terraform apply + # terraform apply + @set -a; source $<; set +a; \ + terraform -chdir=./terraform apply plan.cache + +.PHONY: ensure-grafana-online +ensure-grafana-online: + @set -o allexport; \ + source $(REPO_CONFIG_LOCATION); \ + set +o allexport; \ + url=$${TF_VAR_GRAFANA_URL}; \ + echo "Waiting for grafana at $$url to become reachable..."; \ + attempts=0; \ + max_attempts=10; \ + while [ $$attempts -lt $$max_attempts ]; do \ + status_code=$$(curl -k -o /dev/null -s -w "%{http_code}" --max-time 10 $$url); \ + if [ "$$status_code" -ge 200 ] && [ "$$status_code" -lt 400 ]; then \ + echo "Grafana is online"; \ + break; \ + else \ + echo "Grafana still unreachable, waiting 5s for grafana to become reachable... (Attempt $$((attempts+1)))"; \ + sleep 5; \ + attempts=$$((attempts + 1)); \ + fi; \ + done; \ + if [ $$attempts -eq $$max_attempts ]; then \ + echo "Max attempts reached, Grafana is still unreachable."; \ + exit 1; \ + fi; diff --git a/services/monitoring/grafana/scripts/.gitignore b/services/monitoring/grafana/scripts/.gitignore deleted file mode 100644 index 99fa7e12..00000000 --- a/services/monitoring/grafana/scripts/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -venv -venv/* diff --git a/services/monitoring/grafana/scripts/README.MD b/services/monitoring/grafana/scripts/README.MD deleted file mode 100644 index 7383674b..00000000 --- a/services/monitoring/grafana/scripts/README.MD +++ /dev/null @@ -1,26 +0,0 @@ -# Purpose - -This python scripts export and import all the folders/datasources/dashboards of a grafana instance and is used to handle the different configuration of all Osparc Grafana instances. -At each update on grafana dashboards and/or datasources, the export script has to be run, and the changes pushed to the git repo. - -# Installation - -* Create a virtualenvironment with python 3.X and activate it -```console -python3 -m venv venv -source venv/bin/activate -``` -* Install the dependancies -```console -pip install -r requirements.txt -``` - -* To export everything (has to be run each time something is updated on Grafana) -```console -python export.py -``` - -* To import everything -```console -python import.py -``` diff --git a/services/monitoring/grafana/scripts/export.py b/services/monitoring/grafana/scripts/export.py deleted file mode 100644 index c403607e..00000000 --- a/services/monitoring/grafana/scripts/export.py +++ /dev/null @@ -1,150 +0,0 @@ -# pylint: disable=pointless-string-statement,too-many-statements -import json -import os -import shutil -import sys -import warnings -from pathlib import Path - -import requests -import typer -from environs import Env - -repo_config_location = os.getenv("REPO_CONFIG_LOCATION") -if not repo_config_location: - print("ERROR: Env-Var REPO_CONFIG_LOCATION not set.") - sys.exit(1) -if "\n" in repo_config_location: - repo_config_location = repo_config_location.split("\n")[0] - -env = Env() -env.read_env(repo_config_location, recurse=False) - -warnings.filterwarnings( - "ignore", - ".*Adding certificate verification is strongly advised.*", -) - - -def main(foldername: str = ""): - # We delete the previous files - if foldername == "": - directory = "./../provisioning/exported/" + env.str("MACHINE_FQDN") - - export_dir = Path.cwd() / ".." / "provisioning/exported" - export_dir.mkdir(parents=True, exist_ok=True) - else: - directory = foldername - if os.path.exists(directory): - shutil.rmtree(directory) - - os.mkdir(directory) - - # We export the Datasources - print("**************** Export datasources *******************") - os.mkdir(directory + "/datasources") - url = "https://monitoring." + env.str("MACHINE_FQDN") + "/grafana/api/" - session = requests.Session() - session.auth = (env.str("SERVICES_USER"), env.str("SERVICES_PASSWORD")) - hed = {"Content-Type": "application/json"} - - r = session.get(url + "datasources", headers=hed, verify=False) - for datasource in r.json(): - r_datasource = session.get( - url + "datasources/" + str(datasource["id"]), headers=hed, verify=False - ) - with open( - directory + "/datasources/" + str(datasource["id"]) + ".json", "w" - ) as outfile: - # If the datasource is Prometheus, we remove the login/password credentials - json_data = r_datasource.json() - if json_data["type"] == "prometheus": - json_data["basicAuthUser"] = "" - json_data["basicAuthPassword"] = "" - json.dump(json_data, outfile, sort_keys=True, indent=2) - print("Export datasource " + json_data["name"]) - - # We export the dashboards - print("**************** Export dashboards *******************") - os.mkdir(directory + "/dashboards") - r = session.get(url + "search?query=%", headers=hed, verify=False) - for dashboard in r.json(): - r_dashboard = session.get( - url + "dashboards/uid/" + str(dashboard["uid"]), headers=hed, verify=False - ) - if r_dashboard.json()["meta"]["isFolder"] is not True: - if ( - os.path.exists( - directory - + "/dashboards/" - + r_dashboard.json()["meta"]["folderTitle"] - ) - == False - ): - os.mkdir( - directory - + "/dashboards/" - + r_dashboard.json()["meta"]["folderTitle"] - ) - - with open( - directory - + "/dashboards/" - + r_dashboard.json()["meta"]["folderTitle"] - + "/" - + str(r_dashboard.json()["dashboard"]["title"]) - + ".json", - "w", - ) as outfile: - print("Export Dashboard " + r_dashboard.json()["dashboard"]["title"]) - exported_dashboard = r_dashboard.json() - exported_dashboard["meta"].pop("updated", None) - exported_dashboard["meta"].pop("created", None) - exported_dashboard["meta"].pop("folderId", None) - exported_dashboard["meta"].pop("folderUid", None) - exported_dashboard["meta"].pop("folderUrl", None) - exported_dashboard["meta"].pop("version", None) - exported_dashboard.pop("id", None) - exported_dashboard["dashboard"].pop("id", None) - exported_dashboard.pop("iteration", None) - json.dump(exported_dashboard, outfile, sort_keys=True, indent=2) - - # Export Alerts - print("**************** Export alerts *******************") - if not os.path.exists(directory + "/alerts/"): - os.mkdir(directory + "/alerts/") - r = session.get(url + "ruler/grafana/api/v1/rules", headers=hed, verify=False) - for alert in r.json()["ops"]: - with open(directory + "/alerts/" + alert["name"] + ".json", "w") as outfile: - print("Export Alert " + alert["name"]) - # Remove UID if present - for rule_iter in range(len(alert["rules"])): - alert["rules"][rule_iter]["grafana_alert"].pop("uid", None) - # Remove orgId - alert["rules"][rule_iter]["grafana_alert"].pop("orgId", None) - # Remove id - alert["rules"][rule_iter]["grafana_alert"].pop("id", None) - # Remove id - alert["rules"][rule_iter]["grafana_alert"].pop("namespace_id", None) - # Remove id - alert["rules"][rule_iter]["grafana_alert"].pop("namespace_uid", None) - if ( - str(env.str("MACHINE_FQDN") + " - ") - in alert["rules"][rule_iter]["grafana_alert"]["title"] - ): - alert["rules"][rule_iter]["grafana_alert"]["title"] = alert[ - "rules" - ][rule_iter]["grafana_alert"]["title"].replace( - str(env.str("MACHINE_FQDN") + " - "), "" - ) - json.dump(alert, outfile, sort_keys=True, indent=2) - - -if __name__ == "__main__": - """ - Imports grafana dashboard from dumped json files via the Grafana API - - If --foldername is used, the data is taken from this location. - Otherwise, the default ops-repo folder is assumed. - """ - typer.run(main) diff --git a/services/monitoring/grafana/scripts/import.py b/services/monitoring/grafana/scripts/import.py deleted file mode 100644 index 796f848b..00000000 --- a/services/monitoring/grafana/scripts/import.py +++ /dev/null @@ -1,521 +0,0 @@ -# pylint: disable=invalid-name,pointless-string-statement,too-many-statements,too-many-branches -import glob -import json -import os -import sys -import time -import warnings -from pathlib import Path - -import requests -import typer -import yaml -from environs import Env - -warnings.filterwarnings( - "ignore", - ".*Adding certificate verification is strongly advised.*", -) - -repo_config_location = os.getenv("REPO_CONFIG_LOCATION") -assert repo_config_location is not None -if "\n" in repo_config_location: - repo_config_location = repo_config_location.split("\n")[0] - -env = Env() -env.read_env(repo_config_location, recurse=False) - - -def dictionary_traversal_datasource_uid_replacement( - _input, datasourceType, replacementID -): - """ - Traverses Dictionary, potentially with lists of dicts, recursively. - If a "datasource" dict tis found, the uid value is replaced. - """ - for key, value in _input.items(): - # If we found the target dict "datasource" - if ( - key == "datasource" - and "uid" in value - and "type" in value - and isinstance(value, dict) - ): - if value["type"] == datasourceType: - value["uid"] = replacementID - # Recursively step down if value is a dict - elif isinstance(value, dict): - # if "datasource" in value: - # print("v: ",value) - dictionary_traversal_datasource_uid_replacement( - value, datasourceType, replacementID - ) - # Iterate list of dict - elif isinstance(value, list) and len(value) > 0 and isinstance(value[0], dict): - for i in value: - dictionary_traversal_datasource_uid_replacement( - i, datasourceType, replacementID - ) - # Ignore endpoints of recursive traversal - else: - time.sleep(0) # Do nothing - - -def subsituteDatasources( - directoriesDatasources, - configFilePath, - dashboardTitle, - jsonObject, -): - if configFilePath.is_file(): - with open(str(configFilePath)) as cfile: - configYaml = yaml.load(cfile, Loader=yaml.FullLoader) - else: - print("ERROR: Config file missing at: " + str(configFilePath)) - sys.exit(1) - ###### - ###### - listOfDatasources = [] - for file in directoriesDatasources: - with open(file) as jsonFile: - jsonObjectDatasource = json.load(jsonFile) - objectToKeepTrack = { - "name": jsonObjectDatasource["name"], - "uid": jsonObjectDatasource["uid"], - "type": jsonObjectDatasource["type"], - } - listOfDatasources.append(objectToKeepTrack) - - listOfDatasourcesWhichAreUnique = [ - i - for i in listOfDatasources - if [j["type"] for j in listOfDatasources].count(i["type"]) == 1 - ] - listOfDatasourcesWhichAreNotUnique = [ - i - for i in listOfDatasources - if [j["type"] for j in listOfDatasources].count(i["type"]) > 1 - ] - # - ####### - # - for presentDatasource in listOfDatasourcesWhichAreUnique: - # print("DEBUG: Subsituting unqiue type ",presentDatasource["type"]) - dictionary_traversal_datasource_uid_replacement( - jsonObject, presentDatasource["type"], presentDatasource["uid"] - ) - for nonUniqueDatasource in listOfDatasourcesWhichAreNotUnique: - assert nonUniqueDatasource["type"] in { - i["type"] for i in configYaml["defaults"] - } - defaultNameForCurrent = [ - i["datasource_name"] - for i in configYaml["defaults"] - if i["type"] == nonUniqueDatasource["type"] - ][0] - if nonUniqueDatasource["name"] == defaultNameForCurrent: - # print("DEBUG: Subsituting non-unqiue type ",nonUniqueDatasource["type"], " as given in defaults.") - dictionary_traversal_datasource_uid_replacement( - jsonObject, nonUniqueDatasource["type"], nonUniqueDatasource["uid"] - ) - # Subsitute custom dashboard mappings now - if "datasources2dashboards" in configYaml: - if len(configYaml["datasources2dashboards"]) > 0: - if dashboardTitle in [ - i["dashboard_name"] for i in configYaml["datasources2dashboards"] - ]: - currentConfigMapping = [ - i - for i in configYaml["datasources2dashboards"] - if i["dashboard_name"] == dashboardTitle - ][0]["mapping"] - for j in currentConfigMapping.copy(): - j["uid"] = [ - i["uid"] - for i in listOfDatasources - if i["name"] == j["datasource_name"] - ][0] - # print("DEBUG: Subsituting custom type ",j["type"], " as given in config.") - dictionary_traversal_datasource_uid_replacement( - jsonObject, j["type"], j["uid"] - ) - - -def main(foldername: str = "", overwrite: bool = True): - # Get mail adress for alerts: - grafanaAlertingMailTarget = env.str("GRAFANA_ALERTS_MAIL", default=None) - grafanaAlertingSlackTarget = env.str("GRAFANA_ALERTS_SLACK", default=None) - - # We first import the datasources - url = "https://monitoring." + env.str("MACHINE_FQDN") + "/grafana/api/" - # - # - print("**************** GRAFANA PROVISIONING *******************") - print("Assuming deployment", env.str("MACHINE_FQDN"), "at", url) - if grafanaAlertingMailTarget: - print("Assuming alerting mail address", grafanaAlertingMailTarget) - if grafanaAlertingSlackTarget: - print("Assuming alerting slack webhook", grafanaAlertingSlackTarget) - # - # - session = requests.Session() - session.auth = (env.str("SERVICES_USER"), env.str("SERVICES_PASSWORD")) - hed = {"Content-Type": "application/json"} - - if foldername == "": - directoriesDatasources = glob.glob("./../assets/datasources/*") - directoriesDatasources += glob.glob("./../assets/shared" + "/datasources/*") - else: - directoriesDatasources = glob.glob(foldername + "/datasources/*") - # - print("**************** Add datasources *******************") - if overwrite: - # Get all datasources - # print("Deleting datasource " + str(i["uid"]) + " - " + str(i["name"])) - r = session.get(url + "datasources", headers=hed, verify=False) - if r.status_code > 300: - print("Recieved non-200 status code upon import: ", str(r.status_code)) - print("ABORTING!") - print(r.json()) - sys.exit(1) - for i in r.json(): - print("Response: ", r.status_code) - r = session.delete( - url + "datasources/uid/" + str(i["uid"]), headers=hed, verify=False - ) - listOfDatasources = [] - for file in directoriesDatasources: - with open(file) as jsonFile: - jsonObjectDatasource = json.load(jsonFile) - jsonFile.close() - - # We add the credentials for the PGSQL Databases with the secureJsonData field - # DK Mar2023 : THIS IS CURRENTLY NOT USED - if jsonObjectDatasource["type"].lower() == "postgres": - print("postgres datasource is currently not supported (deprecated)") - sys.exit(1) - elif jsonObjectDatasource["type"] == "Prometheus": - jsonObjectDatasource["basicAuthUser"] = env.str("SERVICES_USER") - jsonObjectDatasource["basicAuthPassword"] = env.str("SERVICES_PASSWORD") - jsonObjectDatasource["url"] = "http://prometheus:" + env.str( - "MONITORING_PROMETHEUS_PORT" - ) - r = session.post( - url + "datasources", json=jsonObjectDatasource, headers=hed, verify=False - ) - objectToKeepTrack = { - "name": jsonObjectDatasource["name"], - "uid": jsonObjectDatasource["uid"], - "type": jsonObjectDatasource["type"], - } - listOfDatasources.append(objectToKeepTrack) - # print(r.json()) - print("Import of datasource " + jsonObjectDatasource["name"]) - if r.status_code != 200: - print("Received non-200 status code upon import: ", str(r.status_code)) - print("JSON file failed uploading.") - # - # Second, we import the folders structure - directoriesData = [] - if foldername == "": - directoriesDashboards = glob.glob("./../assets/dashboards/*") - directoriesDashboards = [ - *directoriesDashboards, - *list(glob.glob("./../assets/shared" + "/dashboards/*")), - ] - else: - directoriesDashboards = glob.glob(foldername + "/dashboards/*") - for directory in directoriesDashboards: - if ".json" in directory: - print( - "Error: Looking for folders but got json file. Is your folder structure organized properly?\nABORTING" - ) - sys.exit(1) - for file in glob.glob(directory + "/*"): - with open(file) as jsonFile: - jsonObject = json.load( - jsonFile - ) # Assert the file is valid json, otherwise will give an error - break - directoriesData.append(os.path.basename(os.path.normpath(directory))) - directoriesData = list(set(directoriesData)) - - print("Deleting alerts") - r = session.get(url + "v1/provisioning/alert-rules", headers=hed, verify=False) - # Status code is 404 if no alerts are present. Handling it: - if r.status_code != 404: - for alert in r.json(): - deleteResponse = session.delete( - url + f"v1/provisioning/alert-rules/{alert['uid']}", - headers=hed, - verify=False, - ) - if deleteResponse.status_code < 200 or deleteResponse.status_code > 204: - print( - "Received status code not 200-204 upon delete: ", - str(deleteResponse.status_code), - ) - print("ABORTING!") - sys.exit(1) - - # We add them in grafana - print("**************** Add folders *******************") - if overwrite: - print("Deleting all folders and dashboards") - # Get all datasources - r = session.get(url + "folders", headers=hed, verify=False) - for i in r.json(): - r = session.delete( - url + "folders/" + str(i["uid"]), headers=hed, verify=False - ) - print("Adding folders") - for directoryData in directoriesData: - r = session.post( - url + "folders", json={"title": directoryData}, headers=hed, verify=False - ) - if r.status_code != 200: - print("Received non-200 status code upon import: ", str(r.status_code)) - print("JSON file failed uploading:") - print(json.dumps(directoryData, sort_keys=True, indent=2)) - print("**************** Add dashboards *******************") - # - # - configFilePath = Path("./../assets/datasources2dashboards.yaml") - - # Finally we import the dashboards - for directory in directoriesDashboards: - for file in glob.glob(directory + "/*.json"): - with open(file) as jsonFile: - jsonObject = json.load(jsonFile) - # We set the folder ID - r = session.get(url + "folders", headers=hed, verify=False) - folderID = None - for i in r.json(): - if i["title"] == file.split("/")[-2]: - folderID = i["id"] - break - assert folderID - print("Add dashboard " + jsonObject["dashboard"]["title"]) - # Subsitute datasource UID - # pylint: disable=too-many-function-args - subsituteDatasources( - directoriesDatasources, - configFilePath, - jsonObject["dashboard"]["title"], - jsonObject, - ) - dashboard = {"Dashboard": jsonObject["dashboard"]} - # DEBUGPRINT - # with open(".out.temp","w") as ofile: - # ofile.write(json.dumps(jsonObject,indent=2)) - - dashboard["Dashboard"]["id"] = "null" - dashboard["overwrite"] = True - dashboard["folderId"] = folderID - r = session.post( - url + "dashboards/db", json=dashboard, headers=hed, verify=False - ) - - if r.status_code != 200: - print( - "Received non-200 status code upon import: ", str(r.status_code) - ) - # print(r.json()) - print("JSON file failed uploading.") - sys.exit() - - # IMPORT ALERTS - # 1. Provision Alerting User - print("**************** Add Target Mail Bucket / Slack Webhook *******************") - if grafanaAlertingMailTarget: - mailAddressProvisioningJSON = ( - '''{ - "template_files": {}, - "alertmanager_config": { - "route": { - "receiver": "''' - + grafanaAlertingMailTarget.split("@")[0] - + '''", - "continue": false, - "group_by": [], - "routes": [] - }, - "templates": null, - "receivers": [{ - "name": "''' - + grafanaAlertingMailTarget.split("@")[0] - + '''", - "grafana_managed_receiver_configs": [{ - "name": "''' - + grafanaAlertingMailTarget.split("@")[0] - + '''", - "type": "email", - "disableResolveMessage": false, - "settings": { - "addresses": "''' - + grafanaAlertingMailTarget - + """" - }, - "secureFields": {} - }] - }] - } - }""" - ) - else: - slackWebhookProvisioningJSON = ( - '''{ - "template_files": {}, - "alertmanager_config": { - "route": { - "receiver": "''' - + "slackwebhook" - + '''", - "continue": false, - "group_by": [], - "routes": [] - }, - "templates": null, - "receivers": [{ - "name": "''' - + "slackwebhook" - + '''", - "grafana_managed_receiver_configs": [{ - "name": "''' - + "slackwebhook" - + '''", - "type": "slack", - "disableResolveMessage": false, - "settings": { - "username": "''' - + "grafana-alert" - + '''" - }, - "secureSettings": - { - "url": "''' - + str(grafanaAlertingSlackTarget) - + """", - "token": "" - } - }] - }] - } - }""" - ) - r = session.post( - url + "alertmanager/grafana/config/api/v1/alerts", - json=json.loads( - mailAddressProvisioningJSON - if grafanaAlertingMailTarget - else slackWebhookProvisioningJSON - ), - verify=False, - headers=hed, - ) - if r.status_code != 202: - print( - "Received non-202 status code upon mail address provisioning: ", - str(r.status_code), - ) - print( - "POST to URL", url + "alertmanager/grafana/config/api/v1/alerts", "failed" - ) - print("JSON file failed uploading:") - print( - mailAddressProvisioningJSON - if grafanaAlertingMailTarget - else slackWebhookProvisioningJSON - ) - print("Response Error:") - print(r.json()) - sys.exit(1) - # 2. Import alerts - print("**************** Add alerts *******************") - # Finally we import the dashboards - if foldername == "": - directoriesAlerts = glob.glob("./../assets/alerts/*") - directoriesAlerts += glob.glob("./../assets/shared" + "/alerts/*") - else: - directoriesAlerts = glob.glob(foldername + "/alerts/*") - # - print("***************** Add folders ******************") - r = session.get( - url + "folders", - headers=hed, - verify=False, - ) - ops_uid = ( - r.json()[ - next((i for i, item in enumerate(r.json()) if item["title"] == "ops"), None) - ]["uid"] - if next((i for i, item in enumerate(r.json()) if item["title"] == "ops"), None) - else None - ) - if not ops_uid: - print("Could not find required grafana folder named `ops`. Aborting.") - sys.exit(1) - print(f"Info: Adding alerts always to folder `ops`, determined with uid {ops_uid}.") - if r.status_code != 200: - print( - "Received non-200 status code upon alerts folder creation: ", - str(r.status_code), - ) - sys.exit(1) - # - for file in directoriesAlerts: - with open(file) as jsonFile: - jsonObject = json.load(jsonFile) - # pylint: disable=too-many-nested-blocks - for rule in jsonObject["rules"]: - # Add deployment name to alert name - rule["grafana_alert"]["title"] = ( - env.str("MACHINE_FQDN") + " - " + rule["grafana_alert"]["title"] - ) - # Subsitute UIDs of datasources - # pylint: disable=too-many-function-args - subsituteDatasources( - directoriesDatasources, - configFilePath, - jsonObject["name"], - rule, - ) - # Propagate subsituted UIDs to other fields - for i in rule["grafana_alert"]["data"]: - if "datasourceUid" in i: - if "model" in i: - if "datasource" in i["model"]: - if "type" in i["model"]["datasource"]: - if ( - i["model"]["datasource"]["type"] - != "grafana-expression" - ): - i["datasourceUid"] = i["model"]["datasource"][ - "uid" - ] - # Remove UID if present - rule["grafana_alert"].pop("uid", None) - - print("Add alerts " + jsonObject["name"]) - - r = session.post( - url + f"ruler/grafana/api/v1/rules/{ops_uid}", - json=jsonObject, - headers=hed, - verify=False, - ) - if r.status_code != 202: - print("Received non-202 status code upon import: ", str(r.status_code)) - print(r.json()) - print("JSON file failed uploading.") - sys.exit() - - -if __name__ == "__main__": - """ - Imports grafana dashboard from dumped json files via the Grafana API - - If --foldername is used, the data is taken from this location. - Otherwise, the default ops-repo folder is assumed. - """ - typer.run(main) diff --git a/services/monitoring/grafana/scripts/requirements.txt b/services/monitoring/grafana/scripts/requirements.txt deleted file mode 100644 index a94c2e09..00000000 --- a/services/monitoring/grafana/scripts/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -environs -requests -typer -pyyaml diff --git a/services/monitoring/grafana/terraform/.gitignore b/services/monitoring/grafana/terraform/.gitignore new file mode 100644 index 00000000..a99c01d2 --- /dev/null +++ b/services/monitoring/grafana/terraform/.gitignore @@ -0,0 +1,6 @@ +main.tf +.terraform/** +.terraform.lock.hcl +plan.cache +terraform.tfstate +terraform.tfstate.backup diff --git a/services/monitoring/grafana/terraform/.terraform-version b/services/monitoring/grafana/terraform/.terraform-version new file mode 100644 index 00000000..4dae2985 --- /dev/null +++ b/services/monitoring/grafana/terraform/.terraform-version @@ -0,0 +1 @@ +1.10.1 diff --git a/services/monitoring/grafana/terraform/dashboards.tf b/services/monitoring/grafana/terraform/dashboards.tf new file mode 100644 index 00000000..ae73bbc4 --- /dev/null +++ b/services/monitoring/grafana/terraform/dashboards.tf @@ -0,0 +1,35 @@ +// Import subfolders using an external script +data "external" "subfolders" { + program = ["bash", "${path.module}/tf_helper_list_subfolders.sh", "${path.module}/../assets/shared/dashboards"] +} + +// Local mappings of folder names to their paths +locals { + folder_map = data.external.subfolders.result +} + +// Create Grafana folders for each subfolder +resource "grafana_folder" "subfolders" { + for_each = local.folder_map + + title = each.key // Use each.key to access each folder's name +} + +// Function to list all JSON files within a directory +data "external" "dashboard_files" { + for_each = local.folder_map + + program = ["bash", "${path.module}/tf_helper_list_json_files_in_folder.sh", "${path.module}/../assets/shared/dashboards/${each.key}"] +} + +// Create Grafana dashboards from JSON files +resource "grafana_dashboard" "dashboards" { + for_each = toset(flatten([ + for folder_name in keys(local.folder_map) : [ + for file in values(data.external.dashboard_files[folder_name].result) : "${folder_name},${file}" + ]] + )) + # CSV approach + config_json = jsonencode(jsondecode(file(split(",", each.value)[1])).dashboard) + folder = grafana_folder.subfolders[split(",", each.value)[0]].id +} diff --git a/services/monitoring/grafana/terraform/datasources.tf b/services/monitoring/grafana/terraform/datasources.tf new file mode 100644 index 00000000..ecfd9686 --- /dev/null +++ b/services/monitoring/grafana/terraform/datasources.tf @@ -0,0 +1,17 @@ + +resource "grafana_data_source" "prometheusfederation" { + type = "prometheus" + name = "prometheus-federation" + url = var.PROMETHEUS_FEDERATION_URL + basic_auth_enabled = false + is_default = true +} + +resource "grafana_data_source" "prometheuscatchall" { + type = "prometheus" + name = "prometheus-catchall" + url = var.PROMETHEUS_CATCHALL_URL + basic_auth_enabled = false + is_default = false + uid = "RmZEr52nz" +} diff --git a/services/monitoring/grafana/terraform/main.tf.j2 b/services/monitoring/grafana/terraform/main.tf.j2 new file mode 100644 index 00000000..f64ec34b --- /dev/null +++ b/services/monitoring/grafana/terraform/main.tf.j2 @@ -0,0 +1,41 @@ +terraform { + required_version = "~> 1.10.1" + {% if GRAFANA_TERRAFORM_STATE_BACKEND_TYPE == "local" %} + backend "{{ GRAFANA_TERRAFORM_STATE_BACKEND_TYPE }}" { + path = "terraform.tfstate" # Specify the path for the state file, can be a different path if needed + } + {% endif %} + {% if GRAFANA_TERRAFORM_STATE_BACKEND_TYPE == "s3" %} + backend "s3" { + key = "{{ GRAFANA_TERRAFORM_STATE_BACKEND_S3_KEY }}" + use_lockfile = true + encrypt = false + bucket = "{{ TF_GRAFANA_STATE_BACKEND_S3_BUCKET_NAME }}" + region = "{{ TF_GRAFANA_STATE_BACKEND_AWS_REGION }}" + {% if GRAFANA_TERRAFORM_STATE_BACKEND_S3_ENDPOINT %} + skip_credentials_validation = true + skip_requesting_account_id = true + skip_metadata_api_check = true + use_path_style = true + endpoints = { + s3 = "{{ GRAFANA_TERRAFORM_STATE_BACKEND_S3_ENDPOINT }}" + } + {% endif %} + } + {% endif %} + required_providers { + grafana = { + source = "grafana/grafana" + version = "~> 3.13" + } + random = { + source = "hashicorp/random" + version = "~> 3.1" + } + } +} + +provider "grafana" { + url = var.GRAFANA_URL + auth = var.GRAFANA_AUTH +} diff --git a/services/monitoring/grafana/terraform/tf_helper_list_json_files_in_folder.sh b/services/monitoring/grafana/terraform/tf_helper_list_json_files_in_folder.sh new file mode 100755 index 00000000..65f0267b --- /dev/null +++ b/services/monitoring/grafana/terraform/tf_helper_list_json_files_in_folder.sh @@ -0,0 +1,16 @@ +#!/bin/bash +set -e + +DIRECTORY=$1 + +# Find all JSON files within the directory +FILES=$(find "$DIRECTORY" -mindepth 1 -maxdepth 1 -type f -name '*.json') + +# Create a JSON object where each file's basename is the key, with full paths as values +JSON_OBJECT=$(echo "$FILES" | while read -r FILE; do + BASENAME=$(basename "$FILE" .json) + echo "{\"$BASENAME\": \"$FILE\"}" +done | jq -s 'add') + +# Output the JSON map +jq -n --argjson files "$JSON_OBJECT" '$files' diff --git a/services/monitoring/grafana/terraform/tf_helper_list_subfolders.sh b/services/monitoring/grafana/terraform/tf_helper_list_subfolders.sh new file mode 100755 index 00000000..2b61270a --- /dev/null +++ b/services/monitoring/grafana/terraform/tf_helper_list_subfolders.sh @@ -0,0 +1,16 @@ +#!/bin/bash +set -e + +DIRECTORY=$1 + +# Use `find` to get the directories' base names +SUBFOLDERS=$(find "$DIRECTORY" -mindepth 1 -maxdepth 1 -type d -exec basename {} \;) + +# Convert the subfolder names into a JSON object with jq, where each is paired with itself +JSON_OBJECT=$(echo "$SUBFOLDERS" | tr ' ' '\n' | jq -Rn ' + [inputs] | + map(select(. != "")) | + map({key: ., value: .}) | + from_entries') +# Output the JSON map +jq -n --argjson subfolders "$JSON_OBJECT" '$subfolders' diff --git a/services/monitoring/grafana/terraform/variables.tf b/services/monitoring/grafana/terraform/variables.tf new file mode 100644 index 00000000..fd1d8ed7 --- /dev/null +++ b/services/monitoring/grafana/terraform/variables.tf @@ -0,0 +1,16 @@ +variable "GRAFANA_URL" { + description = "grafana_url" + sensitive = false +} +variable "GRAFANA_AUTH" { + description = "Username:Password" + sensitive = true +} +variable "PROMETHEUS_FEDERATION_URL" { + description = "Prometheus Federation URL" + sensitive = false +} +variable "PROMETHEUS_CATCHALL_URL" { + description = "Prometheus Catchall URL" + sensitive = false +} diff --git a/services/simcore/docker-compose.deploy.local.yml b/services/simcore/docker-compose.deploy.local.yml index ad894127..341cc825 100644 --- a/services/simcore/docker-compose.deploy.local.yml +++ b/services/simcore/docker-compose.deploy.local.yml @@ -27,9 +27,9 @@ services: - DIRECTOR_SELF_SIGNED_SSL_SECRET_NAME=rootca.crt - DIRECTOR_SELF_SIGNED_SSL_SECRET_ID=${DIRECTOR_SELF_SIGNED_SSL_SECRET_ID} - DIRECTOR_REGISTRY_CACHING=False - - DIRECTOR_REGISTRY_CACHING_TTL=15 + - DIRECTOR_REGISTRY_CACHING_TTL=00:15:00 - REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt # via https://stackoverflow.com/questions/31448854/how-to-force-requests-use-the-certificates-on-my-ubuntu-system#comment78596389_37447847 - #- SSL_CERT_FILE=/usr/local/share/ca-certificates/osparc.crt + - SSL_CERT_FILE=/usr/local/share/ca-certificates/osparc.crt secrets: - source: rootca.crt target: /usr/local/share/ca-certificates/osparc.crt