Skip to content

Commit 657a819

Browse files
committed
Merge remote-tracking branch 'upstream/main' into 2024/jaeger/increaseRAM
2 parents 60741e4 + 8126d0f commit 657a819

File tree

11 files changed

+138
-99
lines changed

11 files changed

+138
-99
lines changed

charts/Makefile

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@ helmfile-lint: .check-helmfile-installed helmfile.yaml ## Lints the helmfile
2727
.helmfile-local-post-install: ## Post install steps for local helmfile deployment
2828
@$(MAKE) -s configure-local-hosts
2929
@echo "";
30-
@echo "Cluster has been deployed locally: http://$(MACHINE_FQDN)";
30+
@echo "Cluster has been deployed locally: https://$(MACHINE_FQDN)";
3131
@echo " For secure connections self-signed certificates are used.";
32-
@echo "
32+
@echo "";
3333

3434
.PHONY: helmfile-apply
3535
helmfile-apply: .check-helmfile-installed helmfile.yaml ## Applies the helmfile configuration
@@ -41,7 +41,7 @@ helmfile-apply: .check-helmfile-installed helmfile.yaml ## Applies the helmfile
4141
fi
4242

4343
.PHONY: helmfile-sync
44-
helmfile-sync: .check-helmfile-installed helmfile.yaml ## Syncs the helmfile configuration
44+
helmfile-sync: .check-helmfile-installed helmfile.yaml ## Syncs the helmfile configuration (use `helmfile-apply` to deploy the app)
4545
set -a; source $(REPO_CONFIG_LOCATION); set +a; \
4646
helmfile -f $(REPO_BASE_DIR)/charts/helmfile.yaml sync
4747

@@ -64,3 +64,10 @@ helmfile-diff: .check-helmfile-installed helmfile.yaml ## Shows the differences
6464
helmfile-delete: .check-helmfile-installed helmfile.yaml ## Deletes the helmfile configuration
6565
@set -a; source $(REPO_CONFIG_LOCATION); set +a; \
6666
helmfile -f $(REPO_BASE_DIR)/charts/helmfile.yaml delete
67+
68+
.PHONY: up
69+
up: helmfile-apply ## Start the stack
70+
71+
.PHONY: leave
72+
leave: ## Leaves kind cluster
73+
kind delete clusters kind

charts/adminer/values.yaml.gotmpl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ ingress:
4949
enabled: true
5050
className: ""
5151
annotations:
52+
namespace: {{ .Release.Namespace }}
5253
cert-manager.io/cluster-issuer: "cert-issuer"
5354
traefik.ingress.kubernetes.io/router.entrypoints: websecure
5455
tls:

charts/cert-manager/values.selfsigned.yaml.gotmpl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@ extraObjects:
2222
"helm.sh/hook": post-install,post-upgrade
2323
"helm.sh/hook-weight": "1"
2424
spec:
25+
secretTemplate:
26+
annotations:
27+
reflector.v1.k8s.emberstack.com/reflection-allowed: "true"
28+
reflector.v1.k8s.emberstack.com/reflection-allowed-namespaces: "" # Control destination namespaces: emptystring means all
29+
reflector.v1.k8s.emberstack.com/reflection-auto-enabled: "true" # Auto create reflection for matching namespaces
30+
reflector.v1.k8s.emberstack.com/reflection-auto-namespaces: "" # Control auto-reflection namespaces
2531
isCA: true
2632
commonName: local-ca
2733
subject:

charts/traefik/values.insecure.yaml.gotmpl

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@ extraObjects:
33
kind: Service
44
metadata:
55
name: traefik-api
6+
namespace: {{.Release.Namespace}}
67
spec:
78
type: ClusterIP
89
selector:
910
app.kubernetes.io/name: traefik
10-
app.kubernetes.io/instance: traefik-default
11+
app.kubernetes.io/instance: {{.Release.Namespace}}-traefik
1112
ports:
1213
- port: 8080
1314
name: traefik
@@ -17,7 +18,7 @@ extraObjects:
1718
kind: Secret
1819
metadata:
1920
name: traefik-authorized-users
20-
namespace: default
21+
namespace: {{.Release.Namespace}}
2122
data:
2223
users: |2
2324
{{ requiredEnv "TRAEFIK_K8S_AUTHORIZED_USER" }}
@@ -32,16 +33,18 @@ extraObjects:
3233
kind: Ingress
3334
metadata:
3435
name: traefik-dashboard
36+
namespace: {{.Release.Namespace}}
3537
annotations:
3638
traefik.ingress.kubernetes.io/router.entrypoints: web,websecure # allow http(s) for local deployment
37-
traefik.ingress.kubernetes.io/router.middlewares: default-traefik-basic-auth@kubernetescrd # namespace + middleware name
39+
traefik.ingress.kubernetes.io/router.middlewares: {{.Release.Namespace}}-traefik-basic-auth@kubernetescrd # namespace + middleware name
40+
cert-manager.io/cluster-issuer: "cert-issuer"
3841
spec:
3942
tls:
40-
- hosts:
41-
- k8s.monitoring.{{ requiredEnv "MACHINE_FQDN" }}
42-
secretName: monitoring-tls
43+
- hosts:
44+
- {{ requiredEnv "K8S_MONITORING_FQDN" }}
45+
secretName: monitoring-tls
4346
rules:
44-
- host: k8s.monitoring.{{ requiredEnv "MACHINE_FQDN" }}
47+
- host: {{ requiredEnv "K8S_MONITORING_FQDN" }}
4548
http:
4649
paths:
4750
- path: /dashboard
@@ -51,7 +54,7 @@ extraObjects:
5154
name: traefik-api
5255
port:
5356
name: traefik
54-
- host: k8s.monitoring.{{ requiredEnv "MACHINE_FQDN" }}
57+
- host: {{ requiredEnv "K8S_MONITORING_FQDN" }}
5558
http:
5659
paths:
5760
- path: /api

charts/traefik/values.secure.yaml.gotmpl

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,12 @@ extraObjects:
1010
kind: Service
1111
metadata:
1212
name: traefik-api
13+
namespace: {{.Release.Namespace}}
1314
spec:
1415
type: ClusterIP
1516
selector:
1617
app.kubernetes.io/name: traefik
17-
app.kubernetes.io/instance: traefik-default
18+
app.kubernetes.io/instance: {{.Release.Namespace}}-traefik
1819
ports:
1920
- port: 8080
2021
name: traefik
@@ -25,7 +26,7 @@ extraObjects:
2526
kind: Secret
2627
metadata:
2728
name: traefik-authorized-users
28-
namespace: default
29+
namespace: {{.Release.Namespace}}
2930
data:
3031
users: |2
3132
{{ requiredEnv "TRAEFIK_K8S_AUTHORIZED_USER" }}
@@ -53,9 +54,11 @@ extraObjects:
5354
kind: Ingress
5455
metadata:
5556
name: traefik-dashboard
57+
namespace: {{.Release.Namespace}}
5658
annotations:
5759
traefik.ingress.kubernetes.io/router.entrypoints: websecure
58-
traefik.ingress.kubernetes.io/router.middlewares: default-traefik-basic-auth@kubernetescrd
60+
traefik.ingress.kubernetes.io/router.middlewares: {{.Release.Namespace}}-traefik-basic-auth@kubernetescrd # namespace + middleware name
61+
cert-manager.io/cluster-issuer: "cert-issuer"
5962
spec:
6063
tls:
6164
- hosts:

charts/traefik/values.webinternal.yaml.gotmpl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,4 @@ ports:
99
http3:
1010
enabled: false
1111
middlewares:
12-
- default-internal-ipallowlist@kubernetescrd
12+
- {{.Release.Namespace}}-internal-ipallowlist@kubernetescrd

services/graylog/scripts/configure.py

Lines changed: 84 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,17 @@
99
import requests
1010
import yaml
1111
from environs import Env, EnvError
12-
from tenacity import retry, stop_after_attempt, wait_random
12+
from requests.exceptions import HTTPError
13+
from tenacity import (
14+
before_log,
15+
retry,
16+
retry_if_exception_type,
17+
stop_after_attempt,
18+
wait_fixed,
19+
wait_random,
20+
)
1321
from yaml.loader import SafeLoader
1422

15-
RETRY_SLEEP_DURATION_SEC = 15
16-
1723
logging.basicConfig(level="INFO")
1824
logger = logging.getLogger()
1925

@@ -25,31 +31,44 @@
2531
env = Env()
2632
env.read_env("./../.env", recurse=False)
2733

34+
SUPPORTED_GRAYLOG_MAJOR_VERSION = 6
35+
36+
GRAYLOG_BASE_DOMAIN = "https://monitoring." + env.str("MACHINE_FQDN") + "/graylog"
37+
GRAYLOG_WAIT_ONLINE_TIMEOUT_SEC = env.int("GRAYLOG_WAIT_ONLINE_TIMEOUT_SEC")
38+
REQUESTS_AUTH = (env.str("SERVICES_USER"), env.str("SERVICES_PASSWORD"))
2839

29-
def log_attempt_number(retry_state):
30-
"""return the result of the last call attempt"""
31-
print(f"Retrying: {retry_state.attempt_number}...")
40+
GRAYLOG_LOG_MAX_DAYS_IN_STORAGE = env.int("GRAYLOG_LOG_MAX_DAYS_IN_STORAGE")
41+
GRAYLOG_LOG_MIN_DAYS_IN_STORAGE = env.int("GRAYLOG_LOG_MIN_DAYS_IN_STORAGE")
3242

3343

3444
@retry(
35-
stop=stop_after_attempt(10),
36-
wait=wait_random(min=1, max=10),
37-
after=log_attempt_number,
45+
stop=stop_after_attempt(GRAYLOG_WAIT_ONLINE_TIMEOUT_SEC / 5),
46+
wait=wait_fixed(5),
47+
retry=retry_if_exception_type(HTTPError),
48+
before=before_log(logger, logging.INFO),
3849
)
39-
def check_graylog_online():
40-
_url = "https://monitoring." + env.str("MACHINE_FQDN") + "/graylog/api/users"
41-
_urlhed = {"Content-Type": "application/json", "Accept": "application/json"}
42-
_session = requests.Session()
43-
_session.auth = (
44-
"admin",
45-
env.str("SERVICES_PASSWORD"),
46-
)
47-
_r = _session.get(_url, headers=_urlhed, verify=False)
48-
if _r.status_code != 401 and str(_r.status_code) != "200":
49-
print(_r.status_code)
50-
sleep(RETRY_SLEEP_DURATION_SEC)
51-
raise RuntimeError("Could not connect to graylog.")
52-
return True
50+
def wait_graylog_is_online():
51+
_r = requests.get(GRAYLOG_BASE_DOMAIN + "/api/system", auth=REQUESTS_AUTH)
52+
53+
if _r.status_code == 401:
54+
raise TypeError(f"Graylog unauthorized HTTP response: {_r}")
55+
56+
_r.raise_for_status()
57+
logger.info("Graylog is online")
58+
59+
60+
def validate_graylog_version_is_supported():
61+
_r = requests.get(GRAYLOG_BASE_DOMAIN + "/api/system", auth=REQUESTS_AUTH)
62+
_r.raise_for_status()
63+
64+
graylog_version = _r.json()["version"]
65+
major_version = int(graylog_version.split(".")[0])
66+
67+
if major_version != SUPPORTED_GRAYLOG_MAJOR_VERSION:
68+
raise TypeError(
69+
f"Graylog major version {major_version} is not supported by this script. "
70+
f"Supported version is {SUPPORTED_GRAYLOG_MAJOR_VERSION}"
71+
)
5372

5473

5574
@retry(stop=stop_after_attempt(5), wait=wait_random(min=1, max=10))
@@ -59,7 +78,6 @@ def get_graylog_inputs(_session, _headers, _url):
5978
# DEBUG
6079
if _r.status_code == 200:
6180
print("Successfully send GET /api/system/inputs")
62-
print("Graylog is online :)")
6381
return _r
6482
error_message = (
6583
"Error while sending GET /api/system/inputs. Status code of the request : "
@@ -121,54 +139,46 @@ def configure_email_notifications(_session, _headers):
121139

122140

123141
def configure_log_retention(_session, _headers):
124-
try:
125-
_url = (
126-
"https://monitoring."
127-
+ env.str("MACHINE_FQDN")
128-
+ "/graylog/api/system/indices/index_sets"
129-
)
130-
_r = _session.get(_url, headers=_headers, verify=False)
131-
index_of_interest = [
132-
index
133-
for index in _r.json()["index_sets"]
134-
if index["title"] == "Default index set"
135-
][0]
136-
index_of_interest[
137-
"rotation_strategy_class"
138-
] = "org.graylog2.indexer.rotation.strategies.TimeBasedRotationStrategy"
139-
# Rotate logs every day
140-
index_of_interest["rotation_strategy"] = {
141-
"rotation_period": "P1D",
142-
"type": "org.graylog2.indexer.rotation.strategies.TimeBasedRotationStrategyConfig",
143-
}
144-
index_of_interest["retention_strategy"] = {
145-
"max_number_of_indices": str(env.str("GRAYLOG_RETENTION_TIME_DAYS")),
146-
"type": "org.graylog2.indexer.retention.strategies.DeletionRetentionStrategyConfig",
147-
}
148-
_url = (
149-
"https://monitoring."
150-
+ env.str("MACHINE_FQDN")
151-
+ "/graylog/api/system/indices/index_sets"
152-
)
153-
raw_data = json.dumps(index_of_interest)
154-
_r = _session.put(
155-
_url + "/" + str(index_of_interest["id"]),
156-
headers=_headers,
157-
data=raw_data,
158-
verify=False,
159-
)
160-
if _r.status_code == 200:
161-
print("Log retention time successfully updated !")
162-
else:
163-
print(
164-
"Error updating log retention time! Status code of the request : "
165-
+ str(_r.status_code)
166-
+ " "
167-
+ r.text
168-
)
169-
except EnvError:
142+
_url = (
143+
"https://monitoring."
144+
+ env.str("MACHINE_FQDN")
145+
+ "/graylog/api/system/indices/index_sets"
146+
)
147+
_r = _session.get(_url, headers=_headers, verify=False)
148+
index_of_interest = [
149+
index
150+
for index in _r.json()["index_sets"]
151+
if index["title"] == "Default index set"
152+
][0]
153+
154+
# https://graylog.org/post/understanding-data-tiering-in-60-seconds/
155+
# https://community.graylog.org/t/graylog-6-0-data-tiering-for-rotation-and-retention/33302
156+
index_of_interest["use_legacy_rotation"] = False
157+
index_of_interest["data_tiering"] = {
158+
"type": "hot_only",
159+
"index_lifetime_min": f"P{GRAYLOG_LOG_MIN_DAYS_IN_STORAGE}D",
160+
"index_lifetime_max": f"P{GRAYLOG_LOG_MAX_DAYS_IN_STORAGE}D",
161+
}
162+
_url = (
163+
"https://monitoring."
164+
+ env.str("MACHINE_FQDN")
165+
+ "/graylog/api/system/indices/index_sets"
166+
)
167+
raw_data = json.dumps(index_of_interest)
168+
_r = _session.put(
169+
_url + "/" + str(index_of_interest["id"]),
170+
headers=_headers,
171+
data=raw_data,
172+
verify=False,
173+
)
174+
if _r.status_code == 200:
175+
print("Log retention time successfully updated !")
176+
else:
170177
print(
171-
"Setting retention time: GRAYLOG_RETENTION_TIME_DAYS not set or failed, default retention is used..."
178+
"Error updating log retention time! Status code of the request : "
179+
+ str(_r.status_code)
180+
+ " "
181+
+ r.text
172182
)
173183

174184
try:
@@ -380,21 +390,13 @@ def install_content_pack_revision(content_pack):
380390

381391

382392
if __name__ == "__main__":
383-
print(
384-
"Waiting for graylog to run for provisioning. This can take up to some minutes, please be patient..."
385-
)
386-
try:
387-
check_graylog_online()
388-
except RuntimeError as e:
389-
print(e)
390-
print("Exception or: Graylog is still not online.")
391-
print("Graylog script will now stop.")
392-
sys.exit(1)
393+
wait_graylog_is_online()
394+
validate_graylog_version_is_supported()
393395

394396
session = requests.Session()
395397
session.verify = False
396398
session.auth = (
397-
"admin",
399+
env.str("SERVICES_USER"),
398400
env.str("SERVICES_PASSWORD"),
399401
) # Graylog username is always "admin"
400402
hed = {

services/graylog/template.env

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,14 @@ GRAYLOG_TRANSPORT_EMAIL_FROM_EMAIL=${SMTP_USERNAME}
1616
GRAYLOG_SYSLOG_CAPTURE_PORT=${GRAYLOG_SYSLOG_CAPTURE_PORT}
1717
SERVICES_PASSWORD=${SERVICES_PASSWORD}
1818
SERVICES_USER=${SERVICES_USER}
19-
GRAYLOG_RETENTION_TIME_DAYS=${GRAYLOG_RETENTION_TIME_DAYS}
19+
2020
MACHINE_FQDN=${MACHINE_FQDN}
2121
GRAYLOG_SLACK_WEBHOOK_URL=${GRAYLOG_SLACK_WEBHOOK_URL}
2222
GRAYLOG_SLACK_WEBHOOK_ICON_URL=${GRAYLOG_SLACK_WEBHOOK_ICON_URL}
2323
GRAYLOG_SLACK_WEBHOOK_CHANNEL=${GRAYLOG_SLACK_WEBHOOK_CHANNEL}
2424

25+
GRAYLOG_WAIT_ONLINE_TIMEOUT_SEC=${GRAYLOG_WAIT_ONLINE_TIMEOUT_SEC}
26+
GRAYLOG_LOG_MAX_DAYS_IN_STORAGE=${GRAYLOG_LOG_MAX_DAYS_IN_STORAGE}
27+
GRAYLOG_LOG_MIN_DAYS_IN_STORAGE=${GRAYLOG_LOG_MIN_DAYS_IN_STORAGE}
28+
2529
PUBLIC_NETWORK=${PUBLIC_NETWORK}

0 commit comments

Comments
 (0)