Skip to content

Commit ffc51a7

Browse files
authored
Merge pull request #104 from opencrvs/fix-apm-service-name
fix: Improved Pod metrics visibility
2 parents 91abd1f + e5b82a8 commit ffc51a7

34 files changed

+225
-124
lines changed

.github/TEMPLATES/secret-mapping-deps.yml

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -25,24 +25,6 @@ postgres-admin-user:
2525
- POSTGRES_USER
2626
- POSTGRES_PASSWORD
2727

28-
# SMTP_HOST
29-
30-
# SMTP_PORT
31-
32-
# SMTP_USERNAME
33-
34-
# SMTP_PASSWORD
35-
36-
# SMTP_SECURE
37-
38-
# Whether or not your SMTP port requires TLS
39-
40-
# ALERT_EMAIL
41-
42-
# Email address or Slack channel address to send system technical alerts to.
43-
44-
# SENDER_EMAIL_ADDRESS
45-
4628

4729
# REPLICAS
4830

.github/TEMPLATES/secret-mapping-opencrvs.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,14 @@ smtp-config:
3535
- SMTP_PORT
3636
- SMTP_SECURE
3737
- SMTP_USERNAME
38+
- ALERT_EMAIL
39+
40+
# TODO:
3841

3942
# CONTENT_SECURITY_POLICY_WILDCARD
4043

4144
# This string is supplied to the clients and nginx config and ensures that the format of your domain above can be configurable for CORS purposes.
4245

4346
# ACTIVATE_USERS
44-
4547
# NOTIFICATION_TRANSPORT
46-
4748
# A prop which can be used to configure either Email or SMS for staff and beneficiary comms or potentially both.

.github/workflows/k8s-reset-data.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ jobs:
6767
-s templates/postgres-on-update-core.yaml \
6868
oci://ghcr.io/opencrvs/opencrvs-services | kubectl apply -n ${namespace} --wait=true -f -;
6969
kubectl wait --for=condition=complete job/postgres-on-update-core -n ${namespace} --timeout=600s || true
70-
kubectl logs job/postgres-on-update-core -f --all-containers=true -n ${namespace}; || true
70+
kubectl logs job/postgres-on-update-core -f --all-containers=true -n ${namespace} || true
7171
- name: Re-run postgres-data-migration
7272
run: |
7373
kubectl delete job -n ${namespace} --ignore-not-found=true postgres-data-migration;

charts/dependencies/README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,18 @@ elasticsearch:
266266
use_default_credentials: false
267267
```
268268

269+
For backward compatibility `HTTP_POST2_ALERT_URL` environment variable needs to be added to elastalert configuration. All alerts will be send to country config service and forwarded to email address
270+
271+
See example:
272+
```yaml
273+
elastalert:
274+
env:
275+
HTTP_POST2_ALERT_URL: http://countryconfig.opencrvs-dev.svc.cluster.local:3040/email
276+
```
277+
278+
> NOTE: This behavior will be changed in future releases, see [#10608](https://github.com/opencrvs/opencrvs-core/issues/10608)
279+
280+
269281
## Backup Configuration
270282

271283
The dependencies chart includes a built-in backup feature that supports automated backups for internal components. Backups are stored on an external server via an SSH connection.

charts/dependencies/files/beats/rollover-policy.json renamed to charts/dependencies/files/beats/filebeat-rollover-policy.json

File renamed without changes.

charts/dependencies/files/beats/filebeat.yml

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@ filebeat.autodiscover:
1616
- type: kubernetes
1717
hints.enabled: true
1818

19+
filebeat.modules:
20+
- module: system
21+
syslog:
22+
enabled: true
23+
auth:
24+
enabled: true
1925
#================================ Processors ===================================
2026
processors:
2127
- add_kubernetes_metadata: ~
@@ -66,7 +72,11 @@ processors:
6672
event.Put("level_name", levelName);
6773
event.Put("log.level", levelName); // ECS standard
6874
}
69-
75+
}
76+
- script:
77+
lang: javascript
78+
source: >
79+
function process(event) {
7080
// Kubernetes correlation
7181
var k8s = event.Get("kubernetes");
7282
if (k8s) {
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
{
2+
"policy": {
3+
"phases": {
4+
"hot": {
5+
"actions": {
6+
"rollover": {
7+
"max_size": "2GB",
8+
"max_age": "1d"
9+
},
10+
"set_priority": {
11+
"priority": 100
12+
}
13+
}
14+
},
15+
"warm": {
16+
"min_age": "3d",
17+
"actions": {
18+
"allocate": {
19+
"number_of_replicas": 0
20+
},
21+
"forcemerge": {
22+
"max_num_segments": 1
23+
},
24+
"set_priority": {
25+
"priority": 50
26+
}
27+
}
28+
},
29+
"cold": {
30+
"min_age": "7d",
31+
"actions": {
32+
"allocate": {
33+
"number_of_replicas": 0
34+
},
35+
"set_priority": {
36+
"priority": 0
37+
}
38+
}
39+
},
40+
"delete": {
41+
"min_age": "30d",
42+
"actions": {
43+
"delete": {}
44+
}
45+
}
46+
},
47+
"_meta": {
48+
"managed": true,
49+
"description": "built-in ILM policy using the hot and warm phases with a retention of 7 days"
50+
}
51+
}
52+
}
Lines changed: 39 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -1,94 +1,42 @@
1-
# This Source Code Form is subject to the terms of the Mozilla Public
2-
# License, v. 2.0. If a copy of the MPL was not distributed with this
3-
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
4-
#
5-
# OpenCRVS is also distributed under the terms of the Civil Registration
6-
# & Healthcare Disclaimer located at http://opencrvs.org/license.
7-
#
8-
# Copyright (C) The OpenCRVS Authors located at https://github.com/opencrvs/opencrvs-core/blob/master/AUTHORS.
9-
---
10-
#-------------------------------- Autodiscovery -------------------------------
11-
# metricbeat.autodiscover:
12-
# providers:
13-
# - type: kubernetes
14-
# node: ${NODE_IP}
15-
# hints.enabled: true
1+
system.hostfs: /hostfs
162

173
metricbeat.modules:
18-
#------------------------------- System Module -------------------------------
19-
- module: system
20-
metricsets:
21-
- cpu
22-
- load
23-
- memory
24-
- network
25-
- process
26-
- process_summary
27-
- core
28-
- diskio
29-
- socket
30-
processes: ['.*']
31-
process.include_top_n:
32-
by_cpu: 5
33-
by_memory: 5
34-
period: 10s
35-
cpu.metrics: ['percentages']
36-
core.metrics: ['percentages']
37-
38-
- module: system
39-
period: 1m
40-
metricsets:
41-
- filesystem
42-
- fsstat
43-
processors:
44-
- drop_event.when.regexp:
45-
system.filesystem.mount_point: '^/(sys|cgroup|proc|dev|etc|host|lib)($|/)'
46-
47-
- module: system
48-
period: 15m
49-
metricsets:
50-
- uptime
51-
52-
#------------------------------- Kubernetes Module -------------------------------
53-
# Kubernetes pod metrics
54-
- module: kubernetes
55-
period: 10s
56-
host: ${NODE_IP}
57-
hosts: ["https://${NODE_IP}:10250"]
58-
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
59-
ssl.verification_mode: "none"
60-
metricsets: ["node", "pod", "container", "volume"]
61-
62-
#================================ Processors ===================================
4+
- module: kubernetes
5+
period: 10s
6+
host: ${NODE_NAME}
7+
hosts: ["https://${NODE_IP}:10250"]
8+
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
9+
ssl.verification_mode: "none"
10+
metricsets:
11+
- node
12+
- pod
13+
- container
14+
- volume
15+
- system
16+
add_metadata: true
17+
- module: system
18+
metricsets:
19+
- cpu
20+
- memory
21+
- network
22+
- process
23+
- diskio
24+
period: 10s
25+
- module: system
26+
metricsets:
27+
- filesystem
28+
period: 60s
29+
processors:
30+
- drop_event.when.regexp:
31+
system.filesystem.mount_point: "^/hostfs/run"
6332
processors:
64-
- add_fields:
65-
target: orchestrator.cluster.name
66-
fields:
67-
name: opencrvs-k8s
68-
33+
- add_host_metadata: ~
6934
- add_kubernetes_metadata:
70-
host: ${NODE_IP}
71-
- add_locale:
72-
format: offset
73-
- add_host_metadata:
74-
netinfo.enabled: true
75-
# # Ensure proper field mapping for Infrastructure correlation
76-
- script:
77-
lang: javascript
78-
source: >
79-
function process(event) {
80-
var k8s = event.Get("kubernetes");
81-
if (k8s && k8s.pod && k8s.pod.name) {
82-
event.Put("host.name", k8s.pod.name);
83-
event.Put("host.hostname", k8s.pod.name);
84-
if (k8s.container && k8s.container.name) {
85-
event.Put("container.name", k8s.container.name);
86-
}
87-
if (k8s && k8s.labels && k8s.labels["service_name"]) {
88-
event.Put("service.name", k8s.labels["service_name"]);
89-
}
90-
}
91-
}
35+
host: ${NODE_NAME}
36+
# Enable more indexers for better pod correlation
37+
default_indexers.enabled: true
38+
default_matchers.enabled: true
39+
9240
#========================== Elasticsearch output ===============================
9341
output.elasticsearch:
9442
hosts: ['${ELASTICSEARCH_HOST}']
@@ -97,7 +45,7 @@ output.elasticsearch:
9745

9846
#============================== Dashboards =====================================
9947
setup.dashboards:
100-
enabled: true
48+
enabled: false
10149

10250
#============================== Kibana =========================================
10351
setup.kibana:
@@ -120,4 +68,6 @@ setup.ilm.overwrite: true
12068

12169
#============================== Logging ===============================
12270
logging.level: info
123-
logging.to_stderr: true
71+
logging.selectors: ["*"]
72+
logging.metrics.enabled: true
73+
logging.to_files: false

charts/dependencies/files/elastalert/elastalert.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,7 @@ es_port: "${ES_HOST}"
2020
# es_password: <passed as environment variables>
2121
writeback_index: elastalert_status
2222

23+
http_post2_url: "${HTTP_POST2_ALERT_URL}"
24+
2325
alert_time_limit:
2426
days: 2

charts/dependencies/files/elastalert/rules/alert.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ filter:
3636
minimum_should_match: 1
3737

3838
alert: post2
39-
http_post2_url: 'http://countryconfig:3040/email'
39+
http_post2_url: '${HTTP_POST2_ALERT_URL}'
4040
http_post2_payload:
4141
subject: '{% raw %}{{DOMAIN}}{% endraw %} {{kibana.alert.context.metrics__alert__inventory__threshold.alertState}}: {{rule.name}} 🚨'
4242
html: 'Reason: {{kibana.alert.context.metrics__alert__inventory__threshold.reason}}. Login to https://kibana.{% raw %}{{DOMAIN}}{% endraw %} to view the alert.'

0 commit comments

Comments
 (0)