diff --git a/scripts/deployments/deploy_everything_locally.bash b/scripts/deployments/deploy_everything_locally.bash
index e1c0c1f1..a86c911c 100755
--- a/scripts/deployments/deploy_everything_locally.bash
+++ b/scripts/deployments/deploy_everything_locally.bash
@@ -243,9 +243,9 @@ if [ "$start_opsstack" -eq 0 ]; then
call_make "." up-"$stack_target";
popd
- # -------------------------------- GRAYLOG -------------------------------
- log_info "starting graylog..."
- service_dir="${repo_basedir}"/services/graylog
+ # -------------------------------- LOGGING -------------------------------
+ log_info "starting logging..."
+ service_dir="${repo_basedir}"/services/logging
pushd "${service_dir}"
call_make "." up-"$stack_target"
sleep 1
diff --git a/services/graylog/GraylogWorkflow.png b/services/graylog/GraylogWorkflow.png
deleted file mode 100644
index 0f795fa6..00000000
Binary files a/services/graylog/GraylogWorkflow.png and /dev/null differ
diff --git a/services/graylog/docker-compose.aws.yml b/services/graylog/docker-compose.aws.yml
deleted file mode 100644
index b360b20c..00000000
--- a/services/graylog/docker-compose.aws.yml
+++ /dev/null
@@ -1,18 +0,0 @@
-services:
- mongodb:
- deploy:
- placement:
- constraints:
- - node.labels.graylog==true
- elasticsearch:
- deploy:
- placement:
- constraints:
- - node.labels.graylog==true
- graylog:
- dns: # Add this always for AWS, otherwise we get "No such image: " for docker services
- 8.8.8.8
- deploy:
- placement:
- constraints:
- - node.labels.graylog==true
diff --git a/services/graylog/docker-compose.dalco.yml b/services/graylog/docker-compose.dalco.yml
deleted file mode 100644
index ad187885..00000000
--- a/services/graylog/docker-compose.dalco.yml
+++ /dev/null
@@ -1,18 +0,0 @@
-services:
- mongodb:
- deploy:
- placement:
- constraints:
- - node.labels.graylog==true
-
- elasticsearch:
- deploy:
- placement:
- constraints:
- - node.labels.graylog==true
-
- graylog:
- deploy:
- placement:
- constraints:
- - node.labels.graylog==true
diff --git a/services/graylog/docker-compose.master.yml b/services/graylog/docker-compose.master.yml
deleted file mode 100644
index ad187885..00000000
--- a/services/graylog/docker-compose.master.yml
+++ /dev/null
@@ -1,18 +0,0 @@
-services:
- mongodb:
- deploy:
- placement:
- constraints:
- - node.labels.graylog==true
-
- elasticsearch:
- deploy:
- placement:
- constraints:
- - node.labels.graylog==true
-
- graylog:
- deploy:
- placement:
- constraints:
- - node.labels.graylog==true
diff --git a/services/graylog/.gitignore b/services/logging/.gitignore
similarity index 100%
rename from services/graylog/.gitignore
rename to services/logging/.gitignore
diff --git a/services/graylog/Makefile b/services/logging/Makefile
similarity index 100%
rename from services/graylog/Makefile
rename to services/logging/Makefile
diff --git a/services/graylog/README.md b/services/logging/README.md
similarity index 100%
rename from services/graylog/README.md
rename to services/logging/README.md
diff --git a/services/graylog/data/contentpacks/osparc-custom-content-pack-v2.json b/services/logging/data/contentpacks/osparc-custom-content-pack-v2.json
similarity index 100%
rename from services/graylog/data/contentpacks/osparc-custom-content-pack-v2.json
rename to services/logging/data/contentpacks/osparc-custom-content-pack-v2.json
diff --git a/services/logging/docker-compose.aws.yml b/services/logging/docker-compose.aws.yml
new file mode 100644
index 00000000..cae555cc
--- /dev/null
+++ b/services/logging/docker-compose.aws.yml
@@ -0,0 +1,28 @@
+services:
+ mongodb:
+ deploy:
+ placement:
+ constraints:
+ - node.labels.logging==true
+ elasticsearch:
+ deploy:
+ placement:
+ constraints:
+ - node.labels.logging==true
+ graylog:
+ deploy:
+ placement:
+ constraints:
+ - node.labels.logging==true
+
+ fluentd:
+ deploy:
+ placement:
+ constraints:
+ - node.labels.logging==true
+
+ loki:
+ deploy:
+ placement:
+ constraints:
+ - node.labels.logging==true
diff --git a/services/logging/docker-compose.dalco.yml b/services/logging/docker-compose.dalco.yml
new file mode 100644
index 00000000..c8c7f863
--- /dev/null
+++ b/services/logging/docker-compose.dalco.yml
@@ -0,0 +1,30 @@
+services:
+ mongodb:
+ deploy:
+ placement:
+ constraints:
+ - node.labels.logging==true
+
+ elasticsearch:
+ deploy:
+ placement:
+ constraints:
+ - node.labels.logging==true
+
+ graylog:
+ deploy:
+ placement:
+ constraints:
+ - node.labels.logging==true
+
+ fluentd:
+ deploy:
+ placement:
+ constraints:
+ - node.labels.logging==true
+
+ loki:
+ deploy:
+ placement:
+ constraints:
+ - node.labels.logging==true
diff --git a/services/graylog/docker-compose.letsencrypt.dns.yml b/services/logging/docker-compose.letsencrypt.dns.yml
similarity index 100%
rename from services/graylog/docker-compose.letsencrypt.dns.yml
rename to services/logging/docker-compose.letsencrypt.dns.yml
diff --git a/services/graylog/docker-compose.letsencrypt.http.yml b/services/logging/docker-compose.letsencrypt.http.yml
similarity index 100%
rename from services/graylog/docker-compose.letsencrypt.http.yml
rename to services/logging/docker-compose.letsencrypt.http.yml
diff --git a/services/graylog/docker-compose.local.yml b/services/logging/docker-compose.local.yml
similarity index 76%
rename from services/graylog/docker-compose.local.yml
rename to services/logging/docker-compose.local.yml
index 036d0dee..69bc86ad 100644
--- a/services/graylog/docker-compose.local.yml
+++ b/services/logging/docker-compose.local.yml
@@ -13,3 +13,8 @@ services:
deploy:
placement:
constraints: []
+
+ fluentd:
+ deploy:
+ placement:
+ constraints: []
diff --git a/services/logging/docker-compose.master.yml b/services/logging/docker-compose.master.yml
new file mode 100644
index 00000000..c8c7f863
--- /dev/null
+++ b/services/logging/docker-compose.master.yml
@@ -0,0 +1,30 @@
+services:
+ mongodb:
+ deploy:
+ placement:
+ constraints:
+ - node.labels.logging==true
+
+ elasticsearch:
+ deploy:
+ placement:
+ constraints:
+ - node.labels.logging==true
+
+ graylog:
+ deploy:
+ placement:
+ constraints:
+ - node.labels.logging==true
+
+ fluentd:
+ deploy:
+ placement:
+ constraints:
+ - node.labels.logging==true
+
+ loki:
+ deploy:
+ placement:
+ constraints:
+ - node.labels.logging==true
diff --git a/services/logging/docker-compose.yml.j2 b/services/logging/docker-compose.yml.j2
new file mode 100644
index 00000000..1f822ee8
--- /dev/null
+++ b/services/logging/docker-compose.yml.j2
@@ -0,0 +1,196 @@
+services:
+ # MongoDB: https://hub.docker.com/_/mongo/
+ mongodb:
+ image: mongo:6.0.6
+ init: true
+ volumes:
+ # data persistency
+ - mongo_data:/data/db
+ deploy:
+ replicas: 1
+ restart_policy:
+ condition: on-failure
+ resources:
+ limits:
+ memory: 1.2G
+ cpus: "1"
+ reservations:
+ memory: 300M
+ cpus: "0.1"
+ networks:
+ graylog:
+ aliases:
+ - mongo # needed because of graylog configuration
+
+ # Elasticsearch: https://www.elastic.co/guide/en/elasticsearch/reference/6.6/docker.html
+ elasticsearch:
+ image: docker.elastic.co/elasticsearch/elasticsearch-oss:7.10.2
+ init: true
+ volumes:
+ # data persistency
+ - elasticsearch_data:/usr/share/elasticsearch/data
+ environment:
+ - http.host=0.0.0.0
+ - transport.host=localhost
+ - network.host=0.0.0.0
+ - "ES_JAVA_OPTS=-Xms512m -Xmx512m"
+ deploy:
+ replicas: 1
+ restart_policy:
+ condition: on-failure
+ resources:
+ limits:
+ memory: 2G
+ cpus: "2"
+ reservations:
+ memory: 1G
+ cpus: "0.1"
+ networks:
+ graylog:
+ # Graylog: https://hub.docker.com/r/graylog/graylog/
+ graylog:
+ image: graylog/graylog:6.0.5
+ init: true
+ # user: "1000:1001"
+ configs:
+ - source: graylog_config
+ target: /files/osparc-custom-content-pack-v2.json
+ volumes:
+ # Mount local configuration directory into Docker container
+ # - graylog_config:/usr/share/graylog/data/config
+ # data persistency
+ - graylog_journal:/usr/share/graylog/data/journal
+ env_file:
+ - .env
+ environment:
+ # CHANGE ME (must be at least 16 characters)!
+ - GRAYLOG_PASSWORD_SECRET=${GRAYLOG_PASSWORD_SECRET}
+ # Username: admin
+ - GRAYLOG_ROOT_PASSWORD_SHA2=${GRAYLOG_ROOT_PASSWORD_SHA2}
+ - GRAYLOG_HTTP_EXTERNAL_URI=${GRAYLOG_HTTP_EXTERNAL_URI}
+ - GRAYLOG_ELASTICSEARCH_HOSTS=http://elasticsearch:9200,
+ networks:
+ public:
+ monitoring:
+ graylog:
+ aliases:
+ - graylog
+ ports:
+ - 12201:12201/udp
+ - 12202:12202/udp
+ deploy:
+ replicas: 1
+ restart_policy:
+ condition: on-failure
+ resources:
+ limits:
+ cpus: "2.00"
+ memory: 5G
+ reservations:
+ cpus: "0.1"
+ memory: 1G
+ labels:
+ - traefik.enable=true
+ - traefik.docker.network=${PUBLIC_NETWORK}
+ # direct access through port
+ - traefik.http.services.graylog.loadbalancer.server.port=9000
+ - traefik.http.routers.graylog.rule=Host(`${MONITORING_DOMAIN}`) && PathPrefix(`/graylog`)
+ - traefik.http.routers.graylog.entrypoints=https
+ - traefik.http.routers.graylog.tls=true
+ - traefik.http.middlewares.graylog_replace_regex.replacepathregex.regex=^/graylog/?(.*)$$
+ - traefik.http.middlewares.graylog_replace_regex.replacepathregex.replacement=/$${1}
+ - traefik.http.routers.graylog.middlewares=ops_whitelist_ips@swarm, ops_gzip@swarm, graylog_replace_regex
+ fluentd:
+ image: itisfoundation/fluentd:v1.16.9-1.0
+ configs:
+ - source: fluentd_config
+ target: /fluentd/etc/fluent.conf
+ environment:
+ - GRAYLOG_HOST=graylog
+ - GRAYLOG_PORT=12201
+ - LOKI_URL=http://loki:3100
+ - FLUENTD_HOSTNAME={% raw %}{{.Node.Hostname}}{% endraw %}
+ ports:
+ - "24224:24224/tcp"
+ deploy:
+ #mode: global # Run on all nodes
+ restart_policy:
+ condition: on-failure
+ resources:
+ limits:
+ cpus: '1.0'
+ memory: 1G
+ reservations:
+ cpus: '0.5'
+ memory: 512M
+ update_config:
+ parallelism: 1
+ delay: 10s
+ order: start-first
+ networks:
+ - monitoring
+ - graylog
+ healthcheck:
+ test: ["CMD", "curl", "-f", "http://0.0.0.0:24220/api/plugins"]
+ interval: 30s
+ timeout: 10s
+ retries: 3
+ start_period: 40s
+
+ loki:
+ image: grafana/loki:3.5.0
+ configs:
+ - source: loki_config
+ target: /etc/loki/loki.yaml
+ command: -config.file=/etc/loki/loki.yaml
+ deploy:
+ placement:
+ constraints: []
+ replicas: 1
+ restart_policy:
+ condition: any
+ delay: 5s
+ resources:
+ limits:
+ cpus: '1.0'
+ memory: 2G
+ reservations:
+ cpus: '0.5'
+ memory: 1G
+ update_config:
+ parallelism: 1
+ delay: 10s
+ order: start-first
+ networks:
+ - monitoring
+ healthcheck:
+ test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://0.0.0.0:3100/ready"]
+ interval: 30s
+ timeout: 10s
+ retries: 3
+ start_period: 40s
+
+
+volumes:
+ mongo_data:
+ elasticsearch_data:
+ graylog_journal:
+
+networks:
+ graylog:
+ public:
+ external: true
+ name: ${PUBLIC_NETWORK}
+ monitoring:
+ external: true
+ name: ${MONITORED_NETWORK}
+configs:
+ graylog_config:
+ name: ${STACK_NAME}_graylog_config_{{ "./data/contentpacks/osparc-custom-content-pack-v2.json" | sha256file | substring(0,10) }}
+ file: ./data/contentpacks/osparc-custom-content-pack-v2.json
+ fluentd_config:
+ name: ${STACK_NAME}_fluentd_config_{{ "./fluentd/fluent.conf" | sha256file | substring(0,10) }}
+ file: ./fluentd/fluent.conf
+ loki_config:
+ name: ${STACK_NAME}_loki_config_{{ "./loki.yaml" | sha256file | substring(0,10) }}
+ file: ./loki.yaml
diff --git a/services/logging/fluentd/Dockerfile b/services/logging/fluentd/Dockerfile
new file mode 100644
index 00000000..ee66adff
--- /dev/null
+++ b/services/logging/fluentd/Dockerfile
@@ -0,0 +1,26 @@
+FROM fluent/fluentd:v1.16.9-1.0
+
+USER root
+
+# Install dependencies and plugins
+RUN apk add --no-cache --update --virtual .build-deps \
+ sudo build-base ruby-dev curl \
+ && sudo gem install fluent-plugin-grafana-loki \
+ && sudo gem install fluent-plugin-gelf-best \
+ && sudo gem install fluent-plugin-prometheus \
+ && apk del .build-deps \
+ && apk add --no-cache curl jq \
+ && rm -rf /var/cache/apk/* \
+ && rm -rf /tmp/* /var/tmp/* /usr/lib/ruby/gems/*/cache/*.gem
+
+# Create directories with appropriate permissions
+RUN mkdir -p /fluentd/buffer /fluentd/log \
+ && chown -R fluent:fluent /fluentd/buffer /fluentd/log
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=30s --retries=3 \
+ CMD curl -s http://localhost:24220/api/plugins | jq -e '.plugins | length > 0' || exit 1
+
+USER fluent
+
+ENTRYPOINT ["fluentd", "-c", "/fluentd/etc/fluent.conf"]
diff --git a/services/logging/fluentd/Makefile b/services/logging/fluentd/Makefile
new file mode 100644
index 00000000..c353f2f3
--- /dev/null
+++ b/services/logging/fluentd/Makefile
@@ -0,0 +1,9 @@
+.DEFAULT_GOAL := help
+STACK_NAME = $(notdir $(shell pwd)/..)
+REPO_BASE_DIR := $(shell git rev-parse --show-toplevel)
+
+# TARGETS --------------------------------------------------
+include ${REPO_BASE_DIR}/scripts/common.Makefile
+
+build:
+ @docker build -t itisfoundation/fluentd:v1.16.9-1.0 .
diff --git a/services/logging/fluentd/README.md b/services/logging/fluentd/README.md
new file mode 100644
index 00000000..40979a86
--- /dev/null
+++ b/services/logging/fluentd/README.md
@@ -0,0 +1,4 @@
+There is currently no CD for building the fluentd image.
+It has to be built and pushed manually:
+
+Run e.g. `docker build -t itisfoundation/fluentd:v1.16.9-1.0 .` in this folder, then push the image to dockerhub.
diff --git a/services/logging/fluentd/fluent.conf b/services/logging/fluentd/fluent.conf
new file mode 100644
index 00000000..e562e59c
--- /dev/null
+++ b/services/logging/fluentd/fluent.conf
@@ -0,0 +1,111 @@
+# Monitoring
+
+ @type monitor_agent
+ bind 0.0.0.0
+ port 24220
+
+
+# Prometheus metrics
+
+ @type prometheus
+ bind 0.0.0.0
+ port 24231
+ metrics_path metrics
+
+
+
+ workers 1
+
+
+
+ @type prometheus_output_monitor
+ interval 10
+
+ hostname ${hostname}
+
+
+
+# Input: Receive logs from Docker containers
+
+ @type forward
+ port 24224
+ bind 0.0.0.0
+
+
+# Add additional metadata
+
+ @type record_transformer
+
+ hostname "#{Socket.gethostname}"
+ fluentd_hostname "#{ENV['FLUENTD_HOSTNAME']}"
+ tag ${tag}
+
+
+
+# Output to both Graylog (GELF) and Loki
+
+ @type copy
+
+ # Output to Graylog using GELF
+
+ @type gelf
+ host logging_graylog
+ port 12201
+ protocol udp
+ add_msec_time true
+ flush_interval 5s
+
+ @type file
+ path /fluentd/buffer/graylog
+ flush_thread_count 8
+ flush_interval 5s
+ retry_forever false
+ retry_timeout 1h
+ retry_max_times 30
+ retry_randomize true
+ chunk_limit_size 8M
+ total_limit_size 2G
+ overflow_action drop_oldest_chunk
+ flush_mode interval
+
+
+ @type file
+ path /fluentd/log/graylog-error
+ append true
+
+ @type json
+
+
+
+
+ # Output to Loki
+
+ @type loki
+ url "#{ENV['LOKI_URL']}"
+ extra_labels {"job": "docker"}
+ line_format json
+ username ""
+ password ""
+ flush_interval 5s
+
+ @type file
+ path /fluentd/buffer/loki
+ flush_thread_count 8
+ flush_interval 5s
+ retry_forever false
+ retry_max_interval 30
+ retry_max_times 30
+ retry_randomize true
+ chunk_limit_size 8M
+ total_limit_size 2G
+
+
+ @type file
+ path /fluentd/log/loki-error
+ append true
+
+ @type json
+
+
+
+
diff --git a/services/logging/loki.yaml b/services/logging/loki.yaml
new file mode 100644
index 00000000..5100e6f7
--- /dev/null
+++ b/services/logging/loki.yaml
@@ -0,0 +1,44 @@
+auth_enabled: false
+
+server:
+ http_listen_port: 3100
+
+common:
+ path_prefix: /tmp/loki # Required for internal directories
+
+ingester:
+ lifecycler:
+ address: 0.0.0.0
+ ring:
+ kvstore:
+ store: inmemory
+ replication_factor: 1
+
+schema_config:
+ configs:
+ - from: 2020-10-15
+ store: tsdb
+ object_store: s3
+ schema: v13
+ index:
+ prefix: index_
+ period: 24h
+
+storage_config:
+ tsdb_shipper:
+ active_index_directory: /tmp/loki/tsdb-index # Local cache for index metadata
+ cache_location: /tmp/loki/tsdb-cache # Local cache for tsdb
+ aws:
+ s3: s3://${S3_BUCKET_NAME_LOKI}
+ region: ${S3_REGION_LOKI}
+ access_key_id: ${S3_ACCESS_KEY_LOKI}
+ secret_access_key: ${S3_SECRET_KEY_LOKI}
+ s3forcepathstyle: ${S3_FORCE_PATH_STYLE_LOKI} # Set to true if using MinIO or S3-compatible API; optional for AWS
+ endpoint: ${S3_ENDPOINT_LOKI} # Optional; use for non-default endpoints
+
+compactor:
+ working_directory: /tmp/loki/compactor
+ retention_enabled: false
+
+limits_config:
+ retention_period: ${LOKI_RETENTION_PERIOD} # must be >= 24h and multiple of index period (24h)
diff --git a/services/graylog/scripts/.gitignore b/services/logging/scripts/.gitignore
similarity index 100%
rename from services/graylog/scripts/.gitignore
rename to services/logging/scripts/.gitignore
diff --git a/services/graylog/scripts/README.md b/services/logging/scripts/README.md
similarity index 100%
rename from services/graylog/scripts/README.md
rename to services/logging/scripts/README.md
diff --git a/services/graylog/scripts/alerts.template.yaml b/services/logging/scripts/alerts.template.yaml
similarity index 95%
rename from services/graylog/scripts/alerts.template.yaml
rename to services/logging/scripts/alerts.template.yaml
index fcadc6cf..8568215e 100644
--- a/services/graylog/scripts/alerts.template.yaml
+++ b/services/logging/scripts/alerts.template.yaml
@@ -3,7 +3,7 @@
priority: 3
config:
query: >
- container_name: /.*director-v2.*/ AND "could not find an available, non-overlapping IPv4 address pool among the defaults to assign to the network" AND NOT container_name:/.*graylog_graylog.*/
+ container_name: /.*director-v2.*/ AND "could not find an available, non-overlapping IPv4 address pool among the defaults to assign to the network" AND NOT container_name:/.*logging_graylog.*/
query_parameters: []
search_within_ms: 600000
event_limit: 1000
@@ -43,7 +43,7 @@
priority: 2
config:
query: >
- "lock is no longer owned. This is unexpected and requires investigation" AND NOT container_name:/.*graylog_graylog.*/
+ "lock is no longer owned. This is unexpected and requires investigation" AND NOT container_name:/.*logging_graylog.*/
query_parameters: []
search_within_ms: 3600000
event_limit: 1000
@@ -82,7 +82,7 @@
priority: 2
config:
query: >
- "LockNotOwnedError" AND NOT container_name:/.*graylog_graylog.*/
+ "LockNotOwnedError" AND NOT container_name:/.*logging_graylog.*/
query_parameters: []
search_within_ms: 3600000
event_limit: 1000
diff --git a/services/graylog/scripts/configure.py b/services/logging/scripts/configure.py
similarity index 100%
rename from services/graylog/scripts/configure.py
rename to services/logging/scripts/configure.py
diff --git a/services/graylog/scripts/requirements.txt b/services/logging/scripts/requirements.txt
similarity index 100%
rename from services/graylog/scripts/requirements.txt
rename to services/logging/scripts/requirements.txt
diff --git a/services/graylog/template.env b/services/logging/template.env
similarity index 72%
rename from services/graylog/template.env
rename to services/logging/template.env
index 6d8bd056..d660de18 100644
--- a/services/graylog/template.env
+++ b/services/logging/template.env
@@ -15,3 +15,11 @@ GRAYLOG_WAIT_ONLINE_TIMEOUT_SEC=${GRAYLOG_WAIT_ONLINE_TIMEOUT_SEC}
GRAYLOG_LOG_MAX_DAYS_IN_STORAGE=${GRAYLOG_LOG_MAX_DAYS_IN_STORAGE}
GRAYLOG_LOG_MIN_DAYS_IN_STORAGE=${GRAYLOG_LOG_MIN_DAYS_IN_STORAGE}
PUBLIC_NETWORK=${PUBLIC_NETWORK}
+MONITORED_NETWORK=${MONITORED_NETWORK}
+LOKI_RETETION_PERIOD=${LOKI_RETETION_PERIOD}
+S3_ENDPOINT_LOKI=${S3_ENDPOINT_LOKI}
+S3_ACCESS_KEY_LOKI=${S3_ACCESS_KEY_LOKI}
+S3_BUCKET_NAME_LOKI=${S3_BUCKET_NAME_LOKI}
+S3_FORCE_PATH_STYLE_LOKI=${S3_FORCE_PATH_STYLE_LOKI}
+S3_REGION_LOKI=${S3_REGION_LOKI}
+S3_SECRET_KEY_LOKI=${S3_SECRET_KEY_LOKI}
diff --git a/services/monitoring/grafana/terraform/datasources.tf b/services/monitoring/grafana/terraform/datasources.tf
index c5fcefc6..ee299f6a 100644
--- a/services/monitoring/grafana/terraform/datasources.tf
+++ b/services/monitoring/grafana/terraform/datasources.tf
@@ -35,6 +35,13 @@ resource "grafana_data_source" "tempo" {
uid = "delr011tpeupsc"
}
+resource "grafana_data_source" "loki" {
+ type = "loki"
+ name = "loki"
+ url = "http://loki:3100"
+ basic_auth_enabled = false
+ is_default = false
+}
resource "grafana_data_source" "cloudwatch" {
# This resource is only created if the AWS Deployments
count = var.IS_AWS_DEPLOYMENT ? 1 : 0