diff --git a/services/logging/docker-compose.aws.yml b/services/logging/docker-compose.aws.yml
index cae555cc5..0b1e81f9f 100644
--- a/services/logging/docker-compose.aws.yml
+++ b/services/logging/docker-compose.aws.yml
@@ -15,7 +15,7 @@ services:
constraints:
- node.labels.logging==true
- fluentd:
+ vector:
deploy:
placement:
constraints:
diff --git a/services/logging/docker-compose.dalco.yml b/services/logging/docker-compose.dalco.yml
index c8c7f8631..9968232e7 100644
--- a/services/logging/docker-compose.dalco.yml
+++ b/services/logging/docker-compose.dalco.yml
@@ -17,7 +17,7 @@ services:
constraints:
- node.labels.logging==true
- fluentd:
+ vector:
deploy:
placement:
constraints:
diff --git a/services/logging/docker-compose.local.yml b/services/logging/docker-compose.local.yml
index 69bc86ade..ca93bc624 100644
--- a/services/logging/docker-compose.local.yml
+++ b/services/logging/docker-compose.local.yml
@@ -14,7 +14,7 @@ services:
placement:
constraints: []
- fluentd:
+ vector:
deploy:
placement:
constraints: []
diff --git a/services/logging/docker-compose.master.yml b/services/logging/docker-compose.master.yml
index c8c7f8631..9968232e7 100644
--- a/services/logging/docker-compose.master.yml
+++ b/services/logging/docker-compose.master.yml
@@ -17,7 +17,7 @@ services:
constraints:
- node.labels.logging==true
- fluentd:
+ vector:
deploy:
placement:
constraints:
diff --git a/services/logging/docker-compose.yml.j2 b/services/logging/docker-compose.yml.j2
index 7b1052649..be2aa1a9a 100644
--- a/services/logging/docker-compose.yml.j2
+++ b/services/logging/docker-compose.yml.j2
@@ -51,7 +51,6 @@ services:
graylog:
image: graylog/graylog:6.0.5
init: true
- # user: "1000:1001"
configs:
- source: graylog_config
target: /files/osparc-custom-content-pack-v2.json
@@ -76,7 +75,7 @@ services:
aliases:
- graylog
ports:
- - 12201:12201/udp
+ - 12200:12201/udp
- 12202:12202/udp
deploy:
replicas: 1
@@ -100,42 +99,29 @@ services:
- traefik.http.middlewares.graylog_replace_regex.replacepathregex.regex=^/graylog/?(.*)$$
- traefik.http.middlewares.graylog_replace_regex.replacepathregex.replacement=/$${1}
- traefik.http.routers.graylog.middlewares=ops_whitelist_ips@swarm, ops_gzip@swarm, graylog_replace_regex
- fluentd:
- image: itisfoundation/fluentd:v1.16.9-1.0
- configs:
- - source: fluentd_config
- target: /fluentd/etc/fluent.conf
- environment:
- - GRAYLOG_HOST=graylog
- - GRAYLOG_PORT=12201
- - LOKI_URL=http://loki:3100
- - FLUENTD_HOSTNAME={% raw %}{{.Node.Hostname}}{% endraw %}
+ vector:
+ image: timberio/vector:0.49.X-debian
ports:
- - "24224:24224/tcp"
+ - "12201:12201/udp" # GELF input
+ volumes:
+ - /var/run/docker.sock:/var/run/docker.sock:ro
+ environment:
+ - VECTOR_CONFIG=/etc/vector/vector.yaml
+ - VECTOR_LOG=info
+ configs:
+ - source: vector_config
+ target: /etc/vector/vector.yaml
deploy:
- #mode: global # Run on all nodes
- restart_policy:
- condition: on-failure
resources:
limits:
- cpus: '1.0'
- memory: 1G
- reservations:
- cpus: '0.5'
+ cpus: "1.0"
memory: 512M
- update_config:
- parallelism: 1
- delay: 10s
- order: start-first
+ reservations:
+ memory: 256M
+ labels: []
networks:
- monitoring
- graylog
- healthcheck:
- test: ["CMD", "curl", "-f", "http://0.0.0.0:24220/api/plugins"]
- interval: 30s
- timeout: 10s
- retries: 3
- start_period: 40s
loki:
image: grafana/loki:3.5.4
@@ -196,9 +182,9 @@ configs:
graylog_config:
name: ${STACK_NAME}_graylog_config_{{ "./data/contentpacks/osparc-custom-content-pack-v2.json" | sha256file | substring(0,10) }}
file: ./data/contentpacks/osparc-custom-content-pack-v2.json
- fluentd_config:
- name: ${STACK_NAME}_fluentd_config_{{ "./fluentd/fluent.conf" | sha256file | substring(0,10) }}
- file: ./fluentd/fluent.conf
+ vector_config:
+ name: ${STACK_NAME}_vector_config_{{ "./vector.yaml" | sha256file | substring(0,10) }}
+ file: ./vector.yaml
loki_config:
name: ${STACK_NAME}_loki_config_{{ "./loki.yaml" | sha256file | substring(0,10) }}
file: ./loki.yaml
diff --git a/services/logging/fluentd/Dockerfile b/services/logging/fluentd/Dockerfile
deleted file mode 100644
index 0d47084ba..000000000
--- a/services/logging/fluentd/Dockerfile
+++ /dev/null
@@ -1,28 +0,0 @@
-FROM fluent/fluentd:v1.16.9-debian-1.0
-
-USER root
-
-# Install dependencies and plugins using apt instead of apk
-RUN apt-get update && apt-get install -y --no-install-recommends \
- build-essential \
- ruby-dev \
- curl \
- jq \
- && gem install fluent-plugin-grafana-loki \
- && gem install fluent-plugin-gelf-best \
- && gem install fluent-plugin-prometheus \
- && apt-get purge -y --auto-remove build-essential ruby-dev \
- && rm -rf /var/lib/apt/lists/* \
- && rm -rf /tmp/* /var/tmp/* /usr/lib/ruby/gems/*/cache/*.gem
-
-# Create directories with appropriate permissions
-RUN mkdir -p /fluentd/buffer /fluentd/log \
- && chown -R fluent:fluent /fluentd/buffer /fluentd/log
-
-# Health check
-HEALTHCHECK --interval=30s --timeout=30s --retries=3 \
- CMD curl -s http://localhost:24220/api/plugins | jq -e '.plugins | length > 0' || exit 1
-
-USER fluent
-
-ENTRYPOINT ["fluentd", "-c", "/fluentd/etc/fluent.conf"]
diff --git a/services/logging/fluentd/Makefile b/services/logging/fluentd/Makefile
deleted file mode 100644
index c353f2f32..000000000
--- a/services/logging/fluentd/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-.DEFAULT_GOAL := help
-STACK_NAME = $(notdir $(shell pwd)/..)
-REPO_BASE_DIR := $(shell git rev-parse --show-toplevel)
-
-# TARGETS --------------------------------------------------
-include ${REPO_BASE_DIR}/scripts/common.Makefile
-
-build:
- @docker build -t itisfoundation/fluentd:v1.16.9-1.0 .
diff --git a/services/logging/fluentd/README.md b/services/logging/fluentd/README.md
deleted file mode 100644
index 6e152979b..000000000
--- a/services/logging/fluentd/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-There is currently no CD for building the fluentd image.
-It has to be built and pushed manually:
-
-Run e.g. `docker buildx build --platform linux/amd64,linux/arm64 --push -t itisfoundation/fluentd:v1.16.9-1.0 .` in this folder, then push the image to dockerhub.
-Keep in mind that some ops machines run on ARM, so we need an ARM image as well.
diff --git a/services/logging/fluentd/fluent.conf b/services/logging/fluentd/fluent.conf
deleted file mode 100644
index bd32cc1a5..000000000
--- a/services/logging/fluentd/fluent.conf
+++ /dev/null
@@ -1,131 +0,0 @@
-# Monitoring
-
- @type monitor_agent
- bind 0.0.0.0
- port 24220
-
-
-# Prometheus metrics
-
- @type prometheus
- bind 0.0.0.0
- port 24231
- metrics_path metrics
-
-
-
- workers 1
-
-
-
- @type prometheus_output_monitor
- interval 10
-
- hostname ${hostname}
-
-
-
-# Input: Receive logs from Docker containers
-
- @type forward
- port 24224
- bind 0.0.0.0
- # Add source hostname to records
- source_hostname_key source_hostname
-
-
-# Add additional metadata
-
- @type record_transformer
-
- hostname "#{Socket.gethostname}"
- fluentd_hostname "#{ENV['FLUENTD_HOSTNAME']}"
-
-
-
-# Clean container names and set proper host field
-
- @type record_transformer
- enable_ruby true
-
- # cleanup container names by removing leading slashes
- container_name ${record["container_name"] ? record["container_name"].sub(/^\//, '') : record["container_name"]}
- # Use source hostname from forward input as the host field for GELF
- host ${record["source_hostname"] || record["source"] || record["_hostname"] || "unknown"}
-
-
-
-
-# Output to both Graylog (GELF) and Loki
-
- @type copy
-
- # Output to Graylog using GELF
-
- @type gelf
- host logging_graylog
- port 12201
- protocol udp
- add_msec_time true
- flush_interval 5s
- # Use the host field from record for GELF host field
- use_record_host true
- # Map the correct fields for Graylog
-
- @type json
-
-
- @type file
- path /fluentd/buffer/graylog
- flush_thread_count 8
- flush_interval 5s
- retry_forever false
- retry_timeout 1h
- retry_max_times 30
- retry_randomize true
- chunk_limit_size 8M
- total_limit_size 2G
- overflow_action drop_oldest_chunk
- flush_mode interval
-
-
- @type file
- path /fluentd/log/graylog-error
- append true
-
- @type json
-
-
-
-
- # Output to Loki
-
- @type loki
- url "#{ENV['LOKI_URL']}"
- extra_labels {"job": "docker"}
- line_format json
- username ""
- password ""
- flush_interval 5s
-
- @type file
- path /fluentd/buffer/loki
- flush_thread_count 8
- flush_interval 5s
- retry_forever false
- retry_max_interval 30
- retry_max_times 30
- retry_randomize true
- chunk_limit_size 8M
- total_limit_size 2G
-
-
- @type file
- path /fluentd/log/loki-error
- append true
-
- @type json
-
-
-
-
diff --git a/services/logging/vector.yaml b/services/logging/vector.yaml
new file mode 100644
index 000000000..16ddf97d8
--- /dev/null
+++ b/services/logging/vector.yaml
@@ -0,0 +1,98 @@
+# Vector configuration to replace Fluent Bit
+# Ingests GELF logs from Docker daemon and forwards to Loki and Graylog
+
+sources:
+ # Receive GELF messages from Docker containers via UDP
+ docker_gelf:
+ type: socket
+ address: "0.0.0.0:12201"
+ mode: udp
+ decoding:
+ codec: gelf
+ framing:
+ method: chunked_gelf
+ # Auto-detect compression (gzip, zlib, or uncompressed)
+ decompression: Auto
+
+transforms:
+ # Process and enrich the logs
+ process_logs:
+ type: remap
+ inputs: ["docker_gelf"]
+ source: |
+ if !exists(.host) {
+ .host = get_hostname!()
+ }
+
+ # Map short_message to message for Loki compatibility
+ if exists(.short_message) {
+ .message = .short_message
+ }
+
+ # Handle container name - GELF uses _container_name (with underscore prefix)
+ if exists(._container_name) {
+ .container_name = ._container_name
+ } else {
+ .container_name = "unknown"
+ }
+
+ # Handle container ID
+ if exists(._container_id) {
+ .container_id = ._container_id
+ } else {
+ .container_id = "unknown"
+ }
+
+ # Handle image name
+ if exists(._image_name) {
+ .image_name = ._image_name
+ } else {
+ .image_name = "unknown"
+ }
+
+ # Add processing metadata
+ .processed_by = "vector"
+
+sinks:
+ # Send to Loki
+ loki:
+ type: loki
+ inputs: ["process_logs"]
+ endpoint: "http://loki:3100"
+ encoding:
+ codec: json
+ labels:
+ job: "docker"
+ source: "vector"
+ # Simple field references - defaults are set in transform above
+ host: "{{ host }}"
+ container_name: "{{ container_name }}"
+ # Remove label fields from the log line to avoid duplication
+ remove_label_fields: true
+ healthcheck:
+ enabled: true
+
+ # Send to Graylog via GELF over UDP (not TCP with framing)
+ graylog:
+ type: socket
+ inputs: ["process_logs"]
+ address: "logging_graylog:12201"
+ mode: udp
+ encoding:
+ codec: gelf
+ healthcheck:
+ enabled: true
+
+ # Temporary: Output to console for debugging
+ #console_debug:
+ # type: console
+ # inputs: ["process_logs"]
+ # encoding:
+ # codec: json
+
+# Global configuration
+api:
+ enabled: true
+ address: "0.0.0.0:8686"
+
+data_dir: "/var/lib/vector"