diff --git a/services/logging/docker-compose.aws.yml b/services/logging/docker-compose.aws.yml index cae555cc5..0b1e81f9f 100644 --- a/services/logging/docker-compose.aws.yml +++ b/services/logging/docker-compose.aws.yml @@ -15,7 +15,7 @@ services: constraints: - node.labels.logging==true - fluentd: + vector: deploy: placement: constraints: diff --git a/services/logging/docker-compose.dalco.yml b/services/logging/docker-compose.dalco.yml index c8c7f8631..9968232e7 100644 --- a/services/logging/docker-compose.dalco.yml +++ b/services/logging/docker-compose.dalco.yml @@ -17,7 +17,7 @@ services: constraints: - node.labels.logging==true - fluentd: + vector: deploy: placement: constraints: diff --git a/services/logging/docker-compose.local.yml b/services/logging/docker-compose.local.yml index 69bc86ade..ca93bc624 100644 --- a/services/logging/docker-compose.local.yml +++ b/services/logging/docker-compose.local.yml @@ -14,7 +14,7 @@ services: placement: constraints: [] - fluentd: + vector: deploy: placement: constraints: [] diff --git a/services/logging/docker-compose.master.yml b/services/logging/docker-compose.master.yml index c8c7f8631..9968232e7 100644 --- a/services/logging/docker-compose.master.yml +++ b/services/logging/docker-compose.master.yml @@ -17,7 +17,7 @@ services: constraints: - node.labels.logging==true - fluentd: + vector: deploy: placement: constraints: diff --git a/services/logging/docker-compose.yml.j2 b/services/logging/docker-compose.yml.j2 index 7b1052649..be2aa1a9a 100644 --- a/services/logging/docker-compose.yml.j2 +++ b/services/logging/docker-compose.yml.j2 @@ -51,7 +51,6 @@ services: graylog: image: graylog/graylog:6.0.5 init: true - # user: "1000:1001" configs: - source: graylog_config target: /files/osparc-custom-content-pack-v2.json @@ -76,7 +75,7 @@ services: aliases: - graylog ports: - - 12201:12201/udp + - 12200:12201/udp - 12202:12202/udp deploy: replicas: 1 @@ -100,42 +99,29 @@ services: - traefik.http.middlewares.graylog_replace_regex.replacepathregex.regex=^/graylog/?(.*)$$ - traefik.http.middlewares.graylog_replace_regex.replacepathregex.replacement=/$${1} - traefik.http.routers.graylog.middlewares=ops_whitelist_ips@swarm, ops_gzip@swarm, graylog_replace_regex - fluentd: - image: itisfoundation/fluentd:v1.16.9-1.0 - configs: - - source: fluentd_config - target: /fluentd/etc/fluent.conf - environment: - - GRAYLOG_HOST=graylog - - GRAYLOG_PORT=12201 - - LOKI_URL=http://loki:3100 - - FLUENTD_HOSTNAME={% raw %}{{.Node.Hostname}}{% endraw %} + vector: + image: timberio/vector:0.49.X-debian ports: - - "24224:24224/tcp" + - "12201:12201/udp" # GELF input + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + environment: + - VECTOR_CONFIG=/etc/vector/vector.yaml + - VECTOR_LOG=info + configs: + - source: vector_config + target: /etc/vector/vector.yaml deploy: - #mode: global # Run on all nodes - restart_policy: - condition: on-failure resources: limits: - cpus: '1.0' - memory: 1G - reservations: - cpus: '0.5' + cpus: "1.0" memory: 512M - update_config: - parallelism: 1 - delay: 10s - order: start-first + reservations: + memory: 256M + labels: [] networks: - monitoring - graylog - healthcheck: - test: ["CMD", "curl", "-f", "http://0.0.0.0:24220/api/plugins"] - interval: 30s - timeout: 10s - retries: 3 - start_period: 40s loki: image: grafana/loki:3.5.4 @@ -196,9 +182,9 @@ configs: graylog_config: name: ${STACK_NAME}_graylog_config_{{ "./data/contentpacks/osparc-custom-content-pack-v2.json" | sha256file | substring(0,10) }} file: ./data/contentpacks/osparc-custom-content-pack-v2.json - fluentd_config: - name: ${STACK_NAME}_fluentd_config_{{ "./fluentd/fluent.conf" | sha256file | substring(0,10) }} - file: ./fluentd/fluent.conf + vector_config: + name: ${STACK_NAME}_vector_config_{{ "./vector.yaml" | sha256file | substring(0,10) }} + file: ./vector.yaml loki_config: name: ${STACK_NAME}_loki_config_{{ "./loki.yaml" | sha256file | substring(0,10) }} file: ./loki.yaml diff --git a/services/logging/fluentd/Dockerfile b/services/logging/fluentd/Dockerfile deleted file mode 100644 index 0d47084ba..000000000 --- a/services/logging/fluentd/Dockerfile +++ /dev/null @@ -1,28 +0,0 @@ -FROM fluent/fluentd:v1.16.9-debian-1.0 - -USER root - -# Install dependencies and plugins using apt instead of apk -RUN apt-get update && apt-get install -y --no-install-recommends \ - build-essential \ - ruby-dev \ - curl \ - jq \ - && gem install fluent-plugin-grafana-loki \ - && gem install fluent-plugin-gelf-best \ - && gem install fluent-plugin-prometheus \ - && apt-get purge -y --auto-remove build-essential ruby-dev \ - && rm -rf /var/lib/apt/lists/* \ - && rm -rf /tmp/* /var/tmp/* /usr/lib/ruby/gems/*/cache/*.gem - -# Create directories with appropriate permissions -RUN mkdir -p /fluentd/buffer /fluentd/log \ - && chown -R fluent:fluent /fluentd/buffer /fluentd/log - -# Health check -HEALTHCHECK --interval=30s --timeout=30s --retries=3 \ - CMD curl -s http://localhost:24220/api/plugins | jq -e '.plugins | length > 0' || exit 1 - -USER fluent - -ENTRYPOINT ["fluentd", "-c", "/fluentd/etc/fluent.conf"] diff --git a/services/logging/fluentd/Makefile b/services/logging/fluentd/Makefile deleted file mode 100644 index c353f2f32..000000000 --- a/services/logging/fluentd/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -.DEFAULT_GOAL := help -STACK_NAME = $(notdir $(shell pwd)/..) -REPO_BASE_DIR := $(shell git rev-parse --show-toplevel) - -# TARGETS -------------------------------------------------- -include ${REPO_BASE_DIR}/scripts/common.Makefile - -build: - @docker build -t itisfoundation/fluentd:v1.16.9-1.0 . diff --git a/services/logging/fluentd/README.md b/services/logging/fluentd/README.md deleted file mode 100644 index 6e152979b..000000000 --- a/services/logging/fluentd/README.md +++ /dev/null @@ -1,5 +0,0 @@ -There is currently no CD for building the fluentd image. -It has to be built and pushed manually: - -Run e.g. `docker buildx build --platform linux/amd64,linux/arm64 --push -t itisfoundation/fluentd:v1.16.9-1.0 .` in this folder, then push the image to dockerhub. -Keep in mind that some ops machines run on ARM, so we need an ARM image as well. diff --git a/services/logging/fluentd/fluent.conf b/services/logging/fluentd/fluent.conf deleted file mode 100644 index bd32cc1a5..000000000 --- a/services/logging/fluentd/fluent.conf +++ /dev/null @@ -1,131 +0,0 @@ -# Monitoring - - @type monitor_agent - bind 0.0.0.0 - port 24220 - - -# Prometheus metrics - - @type prometheus - bind 0.0.0.0 - port 24231 - metrics_path metrics - - - - workers 1 - - - - @type prometheus_output_monitor - interval 10 - - hostname ${hostname} - - - -# Input: Receive logs from Docker containers - - @type forward - port 24224 - bind 0.0.0.0 - # Add source hostname to records - source_hostname_key source_hostname - - -# Add additional metadata - - @type record_transformer - - hostname "#{Socket.gethostname}" - fluentd_hostname "#{ENV['FLUENTD_HOSTNAME']}" - - - -# Clean container names and set proper host field - - @type record_transformer - enable_ruby true - - # cleanup container names by removing leading slashes - container_name ${record["container_name"] ? record["container_name"].sub(/^\//, '') : record["container_name"]} - # Use source hostname from forward input as the host field for GELF - host ${record["source_hostname"] || record["source"] || record["_hostname"] || "unknown"} - - - - -# Output to both Graylog (GELF) and Loki - - @type copy - - # Output to Graylog using GELF - - @type gelf - host logging_graylog - port 12201 - protocol udp - add_msec_time true - flush_interval 5s - # Use the host field from record for GELF host field - use_record_host true - # Map the correct fields for Graylog - - @type json - - - @type file - path /fluentd/buffer/graylog - flush_thread_count 8 - flush_interval 5s - retry_forever false - retry_timeout 1h - retry_max_times 30 - retry_randomize true - chunk_limit_size 8M - total_limit_size 2G - overflow_action drop_oldest_chunk - flush_mode interval - - - @type file - path /fluentd/log/graylog-error - append true - - @type json - - - - - # Output to Loki - - @type loki - url "#{ENV['LOKI_URL']}" - extra_labels {"job": "docker"} - line_format json - username "" - password "" - flush_interval 5s - - @type file - path /fluentd/buffer/loki - flush_thread_count 8 - flush_interval 5s - retry_forever false - retry_max_interval 30 - retry_max_times 30 - retry_randomize true - chunk_limit_size 8M - total_limit_size 2G - - - @type file - path /fluentd/log/loki-error - append true - - @type json - - - - diff --git a/services/logging/vector.yaml b/services/logging/vector.yaml new file mode 100644 index 000000000..16ddf97d8 --- /dev/null +++ b/services/logging/vector.yaml @@ -0,0 +1,98 @@ +# Vector configuration to replace Fluent Bit +# Ingests GELF logs from Docker daemon and forwards to Loki and Graylog + +sources: + # Receive GELF messages from Docker containers via UDP + docker_gelf: + type: socket + address: "0.0.0.0:12201" + mode: udp + decoding: + codec: gelf + framing: + method: chunked_gelf + # Auto-detect compression (gzip, zlib, or uncompressed) + decompression: Auto + +transforms: + # Process and enrich the logs + process_logs: + type: remap + inputs: ["docker_gelf"] + source: | + if !exists(.host) { + .host = get_hostname!() + } + + # Map short_message to message for Loki compatibility + if exists(.short_message) { + .message = .short_message + } + + # Handle container name - GELF uses _container_name (with underscore prefix) + if exists(._container_name) { + .container_name = ._container_name + } else { + .container_name = "unknown" + } + + # Handle container ID + if exists(._container_id) { + .container_id = ._container_id + } else { + .container_id = "unknown" + } + + # Handle image name + if exists(._image_name) { + .image_name = ._image_name + } else { + .image_name = "unknown" + } + + # Add processing metadata + .processed_by = "vector" + +sinks: + # Send to Loki + loki: + type: loki + inputs: ["process_logs"] + endpoint: "http://loki:3100" + encoding: + codec: json + labels: + job: "docker" + source: "vector" + # Simple field references - defaults are set in transform above + host: "{{ host }}" + container_name: "{{ container_name }}" + # Remove label fields from the log line to avoid duplication + remove_label_fields: true + healthcheck: + enabled: true + + # Send to Graylog via GELF over UDP (not TCP with framing) + graylog: + type: socket + inputs: ["process_logs"] + address: "logging_graylog:12201" + mode: udp + encoding: + codec: gelf + healthcheck: + enabled: true + + # Temporary: Output to console for debugging + #console_debug: + # type: console + # inputs: ["process_logs"] + # encoding: + # codec: json + +# Global configuration +api: + enabled: true + address: "0.0.0.0:8686" + +data_dir: "/var/lib/vector"