Skip to content

Commit f734c8d

Browse files
committed
WIP
1 parent 74f08e4 commit f734c8d

File tree

14 files changed

+346
-286
lines changed

14 files changed

+346
-286
lines changed

.github/workflows/helm-tests.yml

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ jobs:
2626
cd charts
2727
helm unittest eoapi -f 'tests/*.yaml' -v eoapi/test-helm-values.yaml
2828
# Run autoscaling-specific unit tests
29-
helm unittest eoapi -f 'tests/autoscaling_tests.yaml' -v eoapi/test-autoscaling-values.yaml
29+
helm unittest eoapi -f 'tests/autoscaling_tests.yaml' -v '../docs/examples/values-autoscaling.yaml'
3030
# Run observability chart tests if they exist
3131
if [ -d "eoapi-observability/tests" ]; then
3232
helm unittest eoapi-observability -f 'tests/*.yaml'
@@ -128,6 +128,7 @@ jobs:
128128
./scripts/test.sh integration --debug
129129
130130
- name: Debug deployment status
131+
id: watchservices
131132
if: always()
132133
run: |
133134
echo "=== Final Deployment Status ==="
@@ -138,28 +139,28 @@ jobs:
138139
echo ""
139140
140141
echo "Waiting for raster service to be ready..."
141-
kubectl wait --for=condition=Ready pod -l app=${RELEASE_NAME}-raster --timeout=180s || {
142+
kubectl wait --for=condition=Ready pod -l app="${RELEASE_NAME}"-raster --timeout=180s || {
142143
echo "Raster service failed to become ready. Checking status..."
143-
kubectl get pods -l app=${RELEASE_NAME}-raster -o wide
144-
kubectl describe pods -l app=${RELEASE_NAME}-raster
144+
kubectl get pods -l app="${RELEASE_NAME}"-raster -o wide
145+
kubectl describe pods -l app="${RELEASE_NAME}"-raster
145146
exit 1
146147
}
147148
echo "raster service is ready, moving on..."
148149
149150
echo "Waiting for vector service to be ready..."
150-
kubectl wait --for=condition=Ready pod -l app=${RELEASE_NAME}-vector --timeout=180s || {
151+
kubectl wait --for=condition=Ready pod -l app="${RELEASE_NAME}"-vector --timeout=180s || {
151152
echo "Vector service failed to become ready. Checking status..."
152-
kubectl get pods -l app=${RELEASE_NAME}-vector -o wide
153-
kubectl describe pods -l app=${RELEASE_NAME}-vector
153+
kubectl get pods -l app="${RELEASE_NAME}"-vector -o wide
154+
kubectl describe pods -l app="${RELEASE_NAME}"-vector
154155
exit 1
155156
}
156157
echo "vector service is ready, moving on..."
157158
158159
echo "Waiting for stac service to be ready..."
159-
kubectl wait --for=condition=Ready pod -l app=${RELEASE_NAME}-stac --timeout=180s || {
160+
kubectl wait --for=condition=Ready pod -l app="${RELEASE_NAME}"-stac --timeout=180s || {
160161
echo "STAC service failed to become ready. Checking status..."
161-
kubectl get pods -l app=${RELEASE_NAME}-stac -o wide
162-
kubectl describe pods -l app=${RELEASE_NAME}-stac
162+
kubectl get pods -l app="${RELEASE_NAME}"-stac -o wide
163+
kubectl describe pods -l app="${RELEASE_NAME}"-stac
163164
exit 1
164165
}
165166
echo "all services are ready, moving on..."
@@ -181,20 +182,20 @@ jobs:
181182
# Check init container logs for all services
182183
for SERVICE in raster vector stac multidim; do
183184
echo "===== $SERVICE Service Pod Status ====="
184-
kubectl get pods -l app=$RELEASE_NAME-$SERVICE -o wide || echo "No $SERVICE pods found"
185+
kubectl get pods -l app="$RELEASE_NAME"-"$SERVICE" -o wide || echo "No $SERVICE pods found"
185186
186-
POD_NAME=$(kubectl get pod -l app=$RELEASE_NAME-$SERVICE -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")
187+
POD_NAME=$(kubectl get pod -l app="$RELEASE_NAME"-"$SERVICE" -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")
187188
if [ -n "$POD_NAME" ]; then
188189
echo "===== $SERVICE Pod ($POD_NAME) Init Container Logs ====="
189-
kubectl logs pod/$POD_NAME -c wait-for-pgstac-jobs --tail=100 || echo "Could not get $SERVICE init container logs"
190+
kubectl logs pod/"$POD_NAME" -c wait-for-pgstac-jobs --tail=100 || echo "Could not get $SERVICE init container logs"
190191
echo ""
191192
192193
echo "===== $SERVICE Pod ($POD_NAME) Main Container Logs ====="
193-
kubectl logs pod/$POD_NAME --tail=100 || echo "Could not get $SERVICE main container logs"
194+
kubectl logs pod/"$POD_NAME" --tail=100 || echo "Could not get $SERVICE main container logs"
194195
echo ""
195196
196197
echo "===== $SERVICE Pod ($POD_NAME) Description ====="
197-
kubectl describe pod/$POD_NAME
198+
kubectl describe pod/"$POD_NAME"
198199
echo ""
199200
fi
200201
done
@@ -206,8 +207,8 @@ jobs:
206207
207208
# Check pgstac jobs using labels instead of hardcoded names
208209
for APP_LABEL in pgstac-migrate pgstac-load-samples; do
209-
echo "===== Jobs with app=$RELEASE_NAME-$APP_LABEL Status ====="
210-
JOBS=$(kubectl get jobs -l app=$RELEASE_NAME-$APP_LABEL -o name 2>/dev/null || true)
210+
echo "===== Jobs with app=\"$RELEASE_NAME\"-\"$APP_LABEL\" Status ====="
211+
JOBS=$(kubectl get jobs -l app="$RELEASE_NAME"-"$APP_LABEL" -o name 2>/dev/null || true)
211212
if [ -n "$JOBS" ]; then
212213
for JOB in $JOBS; do
213214
echo "--- Job $JOB ---"
@@ -223,4 +224,4 @@ jobs:
223224
- name: Cleanup
224225
if: always()
225226
run: |
226-
helm uninstall "$RELEASE_NAME" || true
227+
helm uninstall "$RELEASE_NAME" || true

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1313
- Added code formatting and linting with pre-commit hooks [#283](https://github.com/developmentseed/eoapi-k8s/pull/283)
1414
- Added values.schema.json validation [#296](https://github.com/developmentseed/eoapi-k8s/pull/296)
1515

16+
### Changed
17+
18+
- Excluded renovate.json from CHANGELOG.md edits [#301](https://github.com/developmentseed/eoapi-k8s/pull/301)
19+
- Refactores eoapi-support into core eoapi chart [#262](https://github.com/developmentseed/eoapi-k8s/pull/262)
20+
21+
1622
## [0.7.8] - 2025-09-10
1723

1824
### Added
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
{{/*
2+
Shared configurations for observability components
3+
*/}}
4+
5+
{{/*
6+
Standard Grafana service configuration
7+
*/}}
8+
{{- define "observability.grafana.service" -}}
9+
type: {{ .Values.grafana.service.type | default "LoadBalancer" }}
10+
annotations:
11+
service.beta.kubernetes.io/aws-load-balancer-type: "nlb"
12+
service.beta.kubernetes.io/aws-load-balancer-internal: {{ .Values.grafana.service.internal | default "false" | quote }}
13+
{{- end -}}
14+
15+
{{/*
16+
Standard Grafana deployment configuration
17+
*/}}
18+
{{- define "observability.grafana.deployment" -}}
19+
persistence:
20+
enabled: {{ .Values.grafana.persistence.enabled | default false }}
21+
deploymentStrategy:
22+
type: Recreate
23+
rbac:
24+
namespaced: true
25+
pspEnabled: false
26+
initChownData:
27+
enabled: false
28+
{{- end -}}
29+
30+
{{/*
31+
Grafana resources based on observed usage patterns
32+
*/}}
33+
{{- define "observability.grafana.resources" -}}
34+
limits:
35+
cpu: 100m
36+
memory: 200Mi
37+
requests:
38+
cpu: 50m
39+
memory: 100Mi
40+
{{- end -}}
41+
42+
{{/*
43+
Common datasource configuration for Prometheus
44+
*/}}
45+
{{- define "observability.prometheus.datasource" -}}
46+
name: Prometheus
47+
type: prometheus
48+
url: {{ .Values.prometheus.url | default "http://eoapi-prometheus-server" }}
49+
access: proxy
50+
isDefault: true
51+
{{- end -}}
Lines changed: 28 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,60 +1,40 @@
11
######################
22
# EOAPI OBSERVABILITY
33
######################
4-
# This chart provides observability and dashboarding tools for eoAPI monitoring.
5-
# It expects a Prometheus instance to already be available (deployed by main eoapi chart or externally).
4+
# Observability tools for eoAPI monitoring
5+
# Expects Prometheus instance from main eoapi chart or external source
66

77
grafana:
88
enabled: true
9-
persistence:
10-
enabled: false
11-
deploymentStrategy:
12-
type: Recreate
9+
10+
# Service configuration
1311
service:
1412
type: LoadBalancer
1513
annotations:
1614
service.beta.kubernetes.io/aws-load-balancer-type: "nlb"
1715
service.beta.kubernetes.io/aws-load-balancer-internal: "false"
16+
17+
# Deployment configuration
18+
persistence:
19+
enabled: false
20+
deploymentStrategy:
21+
type: Recreate
1822
rbac:
1923
namespaced: true
2024
pspEnabled: false
21-
# initChownData refers to an init container enabled by default that isn't
22-
# needed as we don't reconfigure the linux user the grafana server will run as.
2325
initChownData:
2426
enabled: false
2527

26-
# Resources for grafana based on observed usage patterns
27-
# Memory use increases over time but stays reasonable below 200Mi
28-
# CPU use is minimal with peaks at up to 9m during dashboard browsing
28+
# Resources based on observed usage patterns
2929
resources:
3030
limits:
3131
cpu: 100m
3232
memory: 200Mi
3333
requests:
34-
cpu: 10m
35-
memory: 200Mi
34+
cpu: 50m
35+
memory: 100Mi
3636

37-
# Prometheus datasource configuration
38-
# Configure this to point to your Prometheus instance
39-
datasources:
40-
datasources.yaml:
41-
apiVersion: 1
42-
datasources:
43-
- name: prometheus
44-
45-
orgId: 1
46-
type: prometheus
47-
# Default: assumes Prometheus deployed by main eoapi chart in same namespace
48-
# Override prometheusUrl to point to external Prometheus if needed
49-
url: "{{ .Values.prometheusUrl | default (printf \"http://%s-prometheus-server.%s.svc.cluster.local\" .Release.Name .Release.Namespace) }}"
50-
access: proxy
51-
jsonData:
52-
timeInterval: "5s"
53-
isDefault: true
54-
editable: true
55-
version: 1
56-
57-
# Dashboard providers configuration
37+
# Dashboard provisioning
5838
dashboardProviders:
5939
dashboardproviders.yaml:
6040
apiVersion: 1
@@ -70,30 +50,21 @@ grafana:
7050

7151
# Dashboard ConfigMaps
7252
dashboardsConfigMaps:
73-
# References the ConfigMap created by templates/dashboard.config.yaml
7453
default: "{{ .Release.Name }}-dashboards"
7554

55+
# Prometheus datasource configuration
56+
datasources:
57+
datasources.yaml:
58+
apiVersion: 1
59+
datasources:
60+
- name: Prometheus
61+
type: prometheus
62+
url: "{{ .Values.prometheusUrl | default (printf \"http://%s-prometheus-server.%s.svc.cluster.local\" .Release.Name .Release.Namespace) }}"
63+
access: proxy
64+
isDefault: true
65+
jsonData:
66+
timeInterval: "5s"
67+
editable: true
68+
7669
# Prometheus connection configuration
77-
# Override this if connecting to external Prometheus instance
7870
prometheusUrl: ""
79-
80-
# Advanced Prometheus features (optional)
81-
# These can be enabled if you want additional Prometheus functionality
82-
# beyond what's provided by the main eoapi chart
83-
prometheus:
84-
enabled: false
85-
# If enabled, provides alertmanager functionality
86-
alertmanager:
87-
enabled: false
88-
# If enabled, provides pushgateway functionality
89-
prometheus-pushgateway:
90-
enabled: false
91-
# Prometheus server - only enable if you want a separate instance
92-
# for advanced monitoring beyond the core metrics in main chart
93-
server:
94-
enabled: false
95-
service:
96-
annotations:
97-
service.beta.kubernetes.io/aws-load-balancer-type: "nlb"
98-
service.beta.kubernetes.io/aws-load-balancer-internal: "false"
99-
type: LoadBalancer

charts/eoapi/Chart.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,20 +53,20 @@ dependencies:
5353
version: 5.7.4
5454
repository: "https://devseed.com/eoapi-k8s/"
5555
condition: postgrescluster.enabled
56-
56+
5757
# Optional monitoring components for metrics collection and autoscaling
5858
# These are disabled by default to keep deployments lightweight
5959
# Enable via: monitoring.prometheus.enabled=true, monitoring.metricsServer.enabled=true
6060
- name: metrics-server
6161
version: 7.2.8
6262
repository: https://charts.bitnami.com/bitnami
6363
condition: monitoring.metricsServer.enabled
64-
64+
6565
- name: prometheus
6666
version: 25.3.1
6767
repository: https://prometheus-community.github.io/helm-charts
6868
condition: monitoring.prometheus.enabled
69-
69+
7070
- name: prometheus-adapter
7171
version: 4.7.1
7272
repository: https://prometheus-community.github.io/helm-charts
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
{{/*
2+
Common monitoring configurations to avoid duplication across values files
3+
*/}}
4+
5+
{{/*
6+
Basic monitoring stack configuration
7+
*/}}
8+
{{- define "eoapi.monitoring.basic" -}}
9+
metricsServer:
10+
enabled: true
11+
apiService:
12+
create: true
13+
prometheus:
14+
enabled: true
15+
alertmanager:
16+
enabled: false
17+
prometheus-pushgateway:
18+
enabled: false
19+
kube-state-metrics:
20+
enabled: true
21+
prometheus-node-exporter:
22+
enabled: true
23+
resources: {{- include "eoapi.resources.small" . | nindent 6 }}
24+
server:
25+
service:
26+
type: ClusterIP
27+
{{- end -}}
28+
29+
{{/*
30+
Production monitoring with persistence
31+
*/}}
32+
{{- define "eoapi.monitoring.production" -}}
33+
metricsServer:
34+
enabled: true
35+
apiService:
36+
create: true
37+
prometheus:
38+
enabled: true
39+
alertmanager:
40+
enabled: true
41+
prometheus-pushgateway:
42+
enabled: false
43+
kube-state-metrics:
44+
enabled: true
45+
prometheus-node-exporter:
46+
enabled: true
47+
resources: {{- include "eoapi.resources.small" . | nindent 6 }}
48+
server:
49+
service:
50+
type: ClusterIP
51+
persistentVolume:
52+
enabled: true
53+
size: 10Gi
54+
{{- end -}}
55+
56+
{{/*
57+
Testing monitoring with minimal resources
58+
*/}}
59+
{{- define "eoapi.monitoring.testing" -}}
60+
metricsServer:
61+
enabled: true
62+
apiService:
63+
create: true
64+
prometheus:
65+
enabled: true
66+
alertmanager:
67+
enabled: false
68+
prometheus-pushgateway:
69+
enabled: false
70+
kube-state-metrics:
71+
enabled: true
72+
prometheus-node-exporter:
73+
enabled: true
74+
resources: {{- include "eoapi.resources.small" . | nindent 6 }}
75+
server:
76+
service:
77+
type: ClusterIP
78+
persistentVolume:
79+
enabled: false
80+
{{- end -}}

0 commit comments

Comments
 (0)