Skip to content
Closed
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
2249efc
Refactor ops-traefik - Remove custom docker compose overwrites, add c…
mrnicegyu11 May 12, 2025
df6482b
remove ununsed env-vars
mrnicegyu11 May 12, 2025
a35cea8
rename log-level env-vars
mrnicegyu11 May 12, 2025
fb5b138
Merge remote-tracking branch 'upstream/main' into 2025/refactor/traefik
mrnicegyu11 May 13, 2025
5234228
only bind ports when needed
mrnicegyu11 May 13, 2025
70c8908
Add env_file back to traefik compose spec
mrnicegyu11 May 13, 2025
1cf605d
Merge remote-tracking branch 'upstream/main' into 2025/add/fluentd
mrnicegyu11 May 14, 2025
bcc67d4
wip
mrnicegyu11 May 14, 2025
4fa4f26
Merge remote-tracking branch 'upstream/main' into 2025/refactor/traefik
mrnicegyu11 May 20, 2025
858c82d
Merge remote-tracking branch 'upstream/main' into 2025/refactor/traefik
mrnicegyu11 May 21, 2025
57947e3
Merge remote-tracking branch 'upstream/main' into 2025/add/fluentd
mrnicegyu11 May 21, 2025
88e4ed5
Merge remote-tracking branch 'upstream/main' into 2025/add/fluentd
mrnicegyu11 May 28, 2025
1eba82c
Merge branch '2025/add/fluentd' into 2025/refactor/traefik
mrnicegyu11 May 28, 2025
236dda9
Merge branch '2025/add/traefikOpenTelemetry' into 2025/refactor/traefik
mrnicegyu11 Jul 3, 2025
de1e17e
Kubernetes: add local storage (#1100)
YuryHrytsuk Jul 3, 2025
87875e0
Merge branch '2025/add/traefikOpenTelemetry' into 2025/refactor/traefik
mrnicegyu11 Jul 3, 2025
b0f1710
Fix deploy_ops CD step - monitoring
mrnicegyu11 Jul 4, 2025
1970766
Merge remote-tracking branch 'upstream/main' into 2025/refactor/traefik
mrnicegyu11 Jul 4, 2025
221c930
wip
mrnicegyu11 Jul 8, 2025
0969ffe
wip
mrnicegyu11 Jul 8, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions scripts/deployments/deploy_everything_locally.bash
Original file line number Diff line number Diff line change
Expand Up @@ -243,9 +243,9 @@ if [ "$start_opsstack" -eq 0 ]; then
call_make "." up-"$stack_target";
popd

# -------------------------------- GRAYLOG -------------------------------
log_info "starting graylog..."
service_dir="${repo_basedir}"/services/graylog
# -------------------------------- LOGGING -------------------------------
log_info "starting logging..."
service_dir="${repo_basedir}"/services/logging
pushd "${service_dir}"
call_make "." up-"$stack_target"
sleep 1
Expand Down
Binary file removed services/graylog/GraylogWorkflow.png
Binary file not shown.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version: '3.7'
version: '3.8'
services:
mongodb:
deploy:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version: "3.7"
version: "3.8"
services:
mongodb:
deploy:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version: '3.7'
version: '3.8'
services:
graylog:
deploy:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version: '3.7'
version: '3.8'
services:
graylog:
deploy:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version: "3.7"
version: "3.8"
services:
mongodb:
deploy:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version: "3.7"
version: "3.8"
services:
mongodb:
deploy:
Expand Down
198 changes: 198 additions & 0 deletions services/logging/docker-compose.yml.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
version: "3.8"
Copy link
Collaborator

@YuryHrytsuk YuryHrytsuk Jul 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this will not work in CI (aka on machines) as our servers have different docker compose verison. It will probably cause version mismatch error and fail in deploy_ops CI

I propose to ditch this label altogether. version is deprecated and is not used anymore (except when define may cause errors)

See https://docs.docker.com/reference/compose-file/version-and-name/#version-top-level-element-obsolete

Copy link
Collaborator

@YuryHrytsuk YuryHrytsuk Jul 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See example of no version definition in the newest metabase stack ITISFoundation#1093. Works like a charm 🚀

services:
# MongoDB: https://hub.docker.com/_/mongo/
mongodb:
image: mongo:6.0.6
init: true
volumes:
# data persistency
- mongo_data:/data/db
deploy:
replicas: 1
restart_policy:
condition: on-failure
resources:
limits:
memory: 1.2G
cpus: "1"
reservations:
memory: 300M
cpus: "0.1"
networks:
graylog:
aliases:
- mongo # needed because of graylog configuration

# Elasticsearch: https://www.elastic.co/guide/en/elasticsearch/reference/6.6/docker.html
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch-oss:7.10.2
init: true
volumes:
# data persistency
- elasticsearch_data:/usr/share/elasticsearch/data
environment:
- http.host=0.0.0.0
- transport.host=localhost
- network.host=0.0.0.0
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
deploy:
replicas: 1
restart_policy:
condition: on-failure
resources:
limits:
memory: 2G
cpus: "2"
reservations:
memory: 1G
cpus: "0.1"
networks:
graylog:
# Graylog: https://hub.docker.com/r/graylog/graylog/
graylog:
image: graylog/graylog:6.0.5
init: true
# user: "1000:1001"
configs:
- source: graylog_config
target: /files/osparc-custom-content-pack-v2.json
volumes:
# Mount local configuration directory into Docker container
# - graylog_config:/usr/share/graylog/data/config
# data persistency
- graylog_journal:/usr/share/graylog/data/journal
env_file:
- .env
environment:
# CHANGE ME (must be at least 16 characters)!
- GRAYLOG_PASSWORD_SECRET=${GRAYLOG_PASSWORD_SECRET}
# Username: admin
- GRAYLOG_ROOT_PASSWORD_SHA2=${GRAYLOG_ROOT_PASSWORD_SHA2}
- GRAYLOG_HTTP_EXTERNAL_URI=${GRAYLOG_HTTP_EXTERNAL_URI}
- GRAYLOG_ELASTICSEARCH_HOSTS=http://elasticsearch:9200,
networks:
public:
monitoring:
graylog:
aliases:
- graylog
ports:
- 12201:12201/udp
- 12202:12202/udp
deploy:
replicas: 1
restart_policy:
condition: on-failure
resources:
limits:
cpus: "2.00"
memory: 5G
reservations:
cpus: "0.1"
memory: 1G
labels:
- traefik.enable=true
- traefik.docker.network=${PUBLIC_NETWORK}
# direct access through port
- traefik.http.services.graylog.loadbalancer.server.port=9000
- traefik.http.routers.graylog.rule=Host(`${MONITORING_DOMAIN}`) && PathPrefix(`/graylog`)
- traefik.http.routers.graylog.entrypoints=https
- traefik.http.routers.graylog.tls=true
- traefik.http.middlewares.graylog_replace_regex.replacepathregex.regex=^/graylog/?(.*)$$
- traefik.http.middlewares.graylog_replace_regex.replacepathregex.replacement=/$${1}
- traefik.http.routers.graylog.middlewares=ops_whitelist_ips@swarm, ops_gzip@swarm, graylog_replace_regex
fluentd:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hm, this is refactor OPS traefik PR or introduce fluentd / loki PR 🤔

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is true, I made a bad merge and all was mixed :O I will make a fresh PR for traefik ,sorry @YuryHrytsuk

image: itisfoundation/fluentd:v1.16.8-1.0
configs:
- source: fluentd_config
target: /fluentd/etc/fluent.conf
environment:
- GRAYLOG_HOST=graylog
- GRAYLOG_PORT=12201
- LOKI_URL=http://loki:3100
- FLUENTD_HOSTNAME={% raw %}{{.Node.Hostname}}{% endraw %}
ports:
- "24224:24224/tcp"
deploy:
#mode: global # Run on all nodes
restart_policy:
condition: on-failure
resources:
limits:
cpus: '1.0'
memory: 1G
reservations:
cpus: '0.5'
memory: 512M
update_config:
parallelism: 1
delay: 10s
order: start-first
networks:
- monitoring
- graylog
healthcheck:
test: ["CMD", "curl", "-f", "http://0.0.0.0:24220/api/plugins"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s

loki:
image: grafana/loki:3.5.0
configs:
- source: loki_config
target: /etc/loki/loki.yaml
command: -config.file=/etc/loki/loki.yaml
deploy:
placement:
constraints: []
replicas: 1
restart_policy:
condition: any
delay: 5s
resources:
limits:
cpus: '1.0'
memory: 2G
reservations:
cpus: '0.5'
memory: 1G
update_config:
parallelism: 1
delay: 10s
order: start-first
networks:
- monitoring
healthcheck:
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://0.0.0.0:3100/ready"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s


volumes:
loki-data:
mongo_data:
elasticsearch_data:
graylog_journal:

networks:
graylog:
public:
external: true
name: ${PUBLIC_NETWORK}
monitoring:
external: true
name: ${MONITORED_NETWORK}
configs:
graylog_config:
name: ${STACK_NAME}_graylog_config_{{ "./data/contentpacks/osparc-custom-content-pack-v2.json" | sha256file | substring(0,10) }}
file: ./data/contentpacks/osparc-custom-content-pack-v2.json
fluentd_config:
name: ${STACK_NAME}_fluentd_config_{{ "./fluentd/fluent.conf" | sha256file | substring(0,10) }}
file: ./fluentd/fluent.conf
loki_config:
name: ${STACK_NAME}_loki_config_{{ "./loki.yaml" | sha256file | substring(0,10) }}
file: ./loki.yaml
26 changes: 26 additions & 0 deletions services/logging/fluentd/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
FROM fluent/fluentd:v1.16.8-1.0

USER root

# Install dependencies and plugins
RUN apk add --no-cache --update --virtual .build-deps \
sudo build-base ruby-dev curl \
&& sudo gem install fluent-plugin-grafana-loki \
&& sudo gem install fluent-plugin-gelf-best \
&& sudo gem install fluent-plugin-prometheus \
&& apk del .build-deps \
&& apk add --no-cache curl jq \
&& rm -rf /var/cache/apk/* \
&& rm -rf /tmp/* /var/tmp/* /usr/lib/ruby/gems/*/cache/*.gem

# Create directories with appropriate permissions
RUN mkdir -p /fluentd/buffer /fluentd/log \
&& chown -R fluent:fluent /fluentd/buffer /fluentd/log

# Health check
HEALTHCHECK --interval=30s --timeout=30s --retries=3 \
CMD curl -s http://localhost:24220/api/plugins | jq -e '.plugins | length > 0' || exit 1

USER fluent

ENTRYPOINT ["fluentd", "-c", "/fluentd/etc/fluent.conf"]
105 changes: 105 additions & 0 deletions services/logging/fluentd/fluent.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# Monitoring
<source>
@type monitor_agent
bind 0.0.0.0
port 24220
</source>

# Prometheus metrics
<source>
@type prometheus
bind 0.0.0.0
port 24231
metrics_path metrics
</source>

#<system>
# log_level debug
#</system>

<source>
@type prometheus_output_monitor
interval 10
<labels>
hostname ${hostname}
</labels>
</source>

# Input: Receive logs from Docker containers
<source>
@type forward
port 24224
bind 0.0.0.0
</source>

# Add additional metadata
<filter docker.**>
@type record_transformer
<record>
hostname "#{Socket.gethostname}"
fluentd_hostname "#{ENV['FLUENTD_HOSTNAME']}"
tag ${tag}
</record>
</filter>

# Output to both Graylog (GELF) and Loki
<match docker.**>
@type copy

# Output to Graylog using GELF
<store>
@type gelf
host graylog
port 12201
protocol udp
add_msec_time true
flush_interval 5s
<buffer>
@type file
path /fluentd/buffer/graylog
flush_thread_count 8
flush_interval 5s
retry_forever true
retry_max_interval 30
chunk_limit_size 8M
total_limit_size 1G
</buffer>
<secondary>
@type file
path /fluentd/log/graylog-error
append true
<format>
@type json
</format>
</secondary>
</store>

# Output to Loki
<store>
@type loki
url "#{ENV['LOKI_URL']}"
extra_labels {"job": "docker"}
line_format json
username ""
password ""
flush_interval 5s
<buffer>
@type file
path /fluentd/buffer/loki
flush_thread_count 8
flush_interval 5s
retry_forever true
retry_max_interval 30
chunk_limit_size 8M
total_limit_size 1G
</buffer>
<secondary>
@type file
path /fluentd/log/loki-error
append true
<format>
@type json
</format>
</secondary>
</store>
</match>
Loading