Skip to content

Commit 33dc4ef

Browse files
dsessler7cbandy
authored andcommitted
Add an OTel Collector with Patroni metrics
Issue: PGO-2043
1 parent 97b31a6 commit 33dc4ef

File tree

9 files changed

+167
-8
lines changed

9 files changed

+167
-8
lines changed

internal/collector/config.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2024 Crunchy Data Solutions, Inc.
1+
// Copyright 2024 - 2025 Crunchy Data Solutions, Inc.
22
//
33
// SPDX-License-Identifier: Apache-2.0
44

internal/collector/config_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2024 Crunchy Data Solutions, Inc.
1+
// Copyright 2024 - 2025 Crunchy Data Solutions, Inc.
22
//
33
// SPDX-License-Identifier: Apache-2.0
44

internal/collector/instance.go

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
// Copyright 2024 - 2025 Crunchy Data Solutions, Inc.
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
package collector
6+
7+
import (
8+
"context"
9+
10+
corev1 "k8s.io/api/core/v1"
11+
12+
"github.com/crunchydata/postgres-operator/internal/feature"
13+
"github.com/crunchydata/postgres-operator/internal/initialize"
14+
"github.com/crunchydata/postgres-operator/internal/naming"
15+
"github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1"
16+
)
17+
18+
// AddToConfigMap populates the shared ConfigMap with fields needed to run the Collector.
19+
func AddToConfigMap(
20+
ctx context.Context,
21+
inConfig *Config,
22+
outInstanceConfigMap *corev1.ConfigMap,
23+
) error {
24+
var err error
25+
if outInstanceConfigMap.Data == nil {
26+
outInstanceConfigMap.Data = make(map[string]string)
27+
}
28+
29+
outInstanceConfigMap.Data["collector.yaml"], err = inConfig.ToYAML()
30+
31+
return err
32+
}
33+
34+
// AddToPod adds the OpenTelemetry collector container to a given Pod
35+
func AddToPod(
36+
ctx context.Context,
37+
inCluster *v1beta1.PostgresCluster,
38+
inInstanceConfigMap *corev1.ConfigMap,
39+
outPod *corev1.PodSpec,
40+
volumeMounts []corev1.VolumeMount,
41+
) {
42+
if !feature.Enabled(ctx, feature.OpenTelemetryMetrics) {
43+
return
44+
}
45+
46+
configVolumeMount := corev1.VolumeMount{
47+
Name: "collector-config",
48+
MountPath: "/etc/otel-collector",
49+
ReadOnly: true,
50+
}
51+
configVolume := corev1.Volume{Name: configVolumeMount.Name}
52+
configVolume.Projected = &corev1.ProjectedVolumeSource{
53+
Sources: []corev1.VolumeProjection{{
54+
ConfigMap: &corev1.ConfigMapProjection{
55+
LocalObjectReference: corev1.LocalObjectReference{
56+
Name: inInstanceConfigMap.Name,
57+
},
58+
Items: []corev1.KeyToPath{{
59+
Key: "collector.yaml",
60+
Path: "config.yaml",
61+
}},
62+
},
63+
}},
64+
}
65+
66+
container := corev1.Container{
67+
Name: naming.ContainerCollector,
68+
69+
Image: "ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:0.116.1",
70+
ImagePullPolicy: inCluster.Spec.ImagePullPolicy,
71+
Command: []string{"/otelcol-contrib", "--config", "/etc/otel-collector/config.yaml"},
72+
73+
SecurityContext: initialize.RestrictedSecurityContext(),
74+
VolumeMounts: append(volumeMounts, configVolumeMount),
75+
}
76+
77+
container.Ports = []corev1.ContainerPort{{
78+
ContainerPort: int32(8889),
79+
Name: "otel-metrics",
80+
Protocol: corev1.ProtocolTCP,
81+
}}
82+
83+
outPod.Containers = append(outPod.Containers, container)
84+
outPod.Volumes = append(outPod.Volumes, configVolume)
85+
}

internal/collector/naming.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2024 Crunchy Data Solutions, Inc.
1+
// Copyright 2024 - 2025 Crunchy Data Solutions, Inc.
22
//
33
// SPDX-License-Identifier: Apache-2.0
44

@@ -8,3 +8,5 @@ const CompactingProcessor = "groupbyattrs/compact"
88
const DebugExporter = "debug"
99
const OneSecondBatchProcessor = "batch/1s"
1010
const SubSecondBatchProcessor = "batch/200ms"
11+
const Prometheus = "prometheus"
12+
const Metrics = "metrics"

internal/collector/postgres.go

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// Copyright 2024 - 2025 Crunchy Data Solutions, Inc.
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
package collector
6+
7+
import (
8+
"context"
9+
10+
"github.com/crunchydata/postgres-operator/internal/feature"
11+
)
12+
13+
func NewConfigForPostgresPod(ctx context.Context) *Config {
14+
config := NewConfig()
15+
16+
if feature.Enabled(ctx, feature.OpenTelemetryMetrics) {
17+
// Add Prometheus exporter
18+
config.Exporters[Prometheus] = map[string]any{
19+
"endpoint": "0.0.0.0:8889",
20+
}
21+
22+
// Add Prometheus Receiver
23+
config.Receivers[Prometheus] = map[string]any{
24+
"config": map[string]any{
25+
"scrape_configs": []map[string]any{
26+
{
27+
"job_name": "patroni",
28+
"scheme": "https",
29+
"tls_config": map[string]any{
30+
"insecure_skip_verify": true,
31+
},
32+
"scrape_interval": "10s",
33+
"static_configs": []map[string]any{
34+
{
35+
"targets": []string{
36+
"0.0.0.0:8008",
37+
},
38+
},
39+
},
40+
},
41+
},
42+
},
43+
}
44+
45+
// Add Metrics Pipeline
46+
config.Pipelines[Metrics] = Pipeline{
47+
Receivers: []ComponentID{Prometheus},
48+
Exporters: []ComponentID{Prometheus},
49+
}
50+
}
51+
52+
return config
53+
}

internal/controller/postgrescluster/controller.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
"sigs.k8s.io/controller-runtime/pkg/manager"
2828
"sigs.k8s.io/controller-runtime/pkg/reconcile"
2929

30+
"github.com/crunchydata/postgres-operator/internal/collector"
3031
"github.com/crunchydata/postgres-operator/internal/config"
3132
"github.com/crunchydata/postgres-operator/internal/controller/runtime"
3233
"github.com/crunchydata/postgres-operator/internal/initialize"
@@ -241,6 +242,8 @@ func (r *Reconciler) Reconcile(
241242
pgbackrest.PostgreSQL(cluster, &pgParameters, backupsSpecFound)
242243
pgmonitor.PostgreSQLParameters(cluster, &pgParameters)
243244

245+
otelConfig := collector.NewConfigForPostgresPod(ctx)
246+
244247
// Set huge_pages = try if a hugepages resource limit > 0, otherwise set "off"
245248
postgres.SetHugePages(cluster, &pgParameters)
246249

@@ -349,7 +352,7 @@ func (r *Reconciler) Reconcile(
349352
ctx, cluster, clusterConfigMap, clusterReplicationSecret, rootCA,
350353
clusterPodService, instanceServiceAccount, instances, patroniLeaderService,
351354
primaryCertificate, clusterVolumes, exporterQueriesConfig, exporterWebConfig,
352-
backupsSpecFound,
355+
backupsSpecFound, otelConfig,
353356
)
354357
}
355358

internal/controller/postgrescluster/instance.go

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"sigs.k8s.io/controller-runtime/pkg/client"
2525
"sigs.k8s.io/controller-runtime/pkg/reconcile"
2626

27+
"github.com/crunchydata/postgres-operator/internal/collector"
2728
"github.com/crunchydata/postgres-operator/internal/config"
2829
"github.com/crunchydata/postgres-operator/internal/controller/runtime"
2930
"github.com/crunchydata/postgres-operator/internal/feature"
@@ -591,6 +592,7 @@ func (r *Reconciler) reconcileInstanceSets(
591592
clusterVolumes []*corev1.PersistentVolumeClaim,
592593
exporterQueriesConfig, exporterWebConfig *corev1.ConfigMap,
593594
backupsSpecFound bool,
595+
otelConfig *collector.Config,
594596
) error {
595597

596598
// Go through the observed instances and check if a primary has been determined.
@@ -628,7 +630,7 @@ func (r *Reconciler) reconcileInstanceSets(
628630
patroniLeaderService, primaryCertificate,
629631
findAvailableInstanceNames(*set, instances, clusterVolumes),
630632
numInstancePods, clusterVolumes, exporterQueriesConfig, exporterWebConfig,
631-
backupsSpecFound,
633+
backupsSpecFound, otelConfig,
632634
)
633635

634636
if err == nil {
@@ -1063,6 +1065,7 @@ func (r *Reconciler) scaleUpInstances(
10631065
clusterVolumes []*corev1.PersistentVolumeClaim,
10641066
exporterQueriesConfig, exporterWebConfig *corev1.ConfigMap,
10651067
backupsSpecFound bool,
1068+
otelConfig *collector.Config,
10661069
) ([]*appsv1.StatefulSet, error) {
10671070
log := logging.FromContext(ctx)
10681071

@@ -1109,7 +1112,7 @@ func (r *Reconciler) scaleUpInstances(
11091112
rootCA, clusterPodService, instanceServiceAccount,
11101113
patroniLeaderService, primaryCertificate, instances[i],
11111114
numInstancePods, clusterVolumes, exporterQueriesConfig, exporterWebConfig,
1112-
backupsSpecFound,
1115+
backupsSpecFound, otelConfig,
11131116
)
11141117
}
11151118
if err == nil {
@@ -1140,6 +1143,7 @@ func (r *Reconciler) reconcileInstance(
11401143
clusterVolumes []*corev1.PersistentVolumeClaim,
11411144
exporterQueriesConfig, exporterWebConfig *corev1.ConfigMap,
11421145
backupsSpecFound bool,
1146+
otelConfig *collector.Config,
11431147
) error {
11441148
log := logging.FromContext(ctx).WithValues("instance", instance.Name)
11451149
ctx = logging.NewContext(ctx, log)
@@ -1164,7 +1168,7 @@ func (r *Reconciler) reconcileInstance(
11641168
)
11651169

11661170
if err == nil {
1167-
instanceConfigMap, err = r.reconcileInstanceConfigMap(ctx, cluster, spec, instance)
1171+
instanceConfigMap, err = r.reconcileInstanceConfigMap(ctx, cluster, spec, instance, otelConfig)
11681172
}
11691173
if err == nil {
11701174
instanceCertificates, err = r.reconcileInstanceCertificates(
@@ -1196,6 +1200,10 @@ func (r *Reconciler) reconcileInstance(
11961200
spec, instanceCertificates, instanceConfigMap, &instance.Spec.Template)
11971201
}
11981202

1203+
if err == nil && feature.Enabled(ctx, feature.OpenTelemetryMetrics) {
1204+
collector.AddToPod(ctx, cluster, instanceConfigMap, &instance.Spec.Template.Spec, nil)
1205+
}
1206+
11991207
// Add pgMonitor resources to the instance Pod spec
12001208
if err == nil {
12011209
err = addPGMonitorToInstancePodSpec(ctx, cluster, &instance.Spec.Template, exporterQueriesConfig, exporterWebConfig)
@@ -1377,7 +1385,7 @@ func addPGBackRestToInstancePodSpec(
13771385
// files (etc) that apply to instance of cluster.
13781386
func (r *Reconciler) reconcileInstanceConfigMap(
13791387
ctx context.Context, cluster *v1beta1.PostgresCluster, spec *v1beta1.PostgresInstanceSetSpec,
1380-
instance *appsv1.StatefulSet,
1388+
instance *appsv1.StatefulSet, otelConfig *collector.Config,
13811389
) (*corev1.ConfigMap, error) {
13821390
instanceConfigMap := &corev1.ConfigMap{ObjectMeta: naming.InstanceConfigMap(instance)}
13831391
instanceConfigMap.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("ConfigMap"))
@@ -1397,6 +1405,9 @@ func (r *Reconciler) reconcileInstanceConfigMap(
13971405
naming.LabelInstance: instance.Name,
13981406
})
13991407

1408+
if err == nil && feature.Enabled(ctx, feature.OpenTelemetryMetrics) {
1409+
err = collector.AddToConfigMap(ctx, otelConfig, instanceConfigMap)
1410+
}
14001411
if err == nil {
14011412
err = patroni.InstanceConfigMap(ctx, cluster, spec, instanceConfigMap)
14021413
}

internal/feature/features.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@ const (
8181
// Support custom sidecars for PostgreSQL instance Pods
8282
InstanceSidecars = "InstanceSidecars"
8383

84+
// Export metrics using OpenTelemetry
85+
OpenTelemetryMetrics = "OpenTelemetryMetrics"
86+
8487
// Support custom sidecars for pgBouncer Pods
8588
PGBouncerSidecars = "PGBouncerSidecars"
8689

@@ -104,6 +107,7 @@ func NewGate() MutableGate {
104107
AutoGrowVolumes: {Default: false, PreRelease: featuregate.Alpha},
105108
BridgeIdentifiers: {Default: false, PreRelease: featuregate.Deprecated},
106109
InstanceSidecars: {Default: false, PreRelease: featuregate.Alpha},
110+
OpenTelemetryMetrics: {Default: false, PreRelease: featuregate.Alpha},
107111
PGBouncerSidecars: {Default: false, PreRelease: featuregate.Alpha},
108112
PGUpgradeCPUConcurrency: {Default: false, PreRelease: featuregate.Alpha},
109113
TablespaceVolumes: {Default: false, PreRelease: featuregate.Alpha},

internal/feature/features_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ func TestDefaults(t *testing.T) {
2121
assert.Assert(t, false == gate.Enabled(AutoGrowVolumes))
2222
assert.Assert(t, false == gate.Enabled(BridgeIdentifiers))
2323
assert.Assert(t, false == gate.Enabled(InstanceSidecars))
24+
assert.Assert(t, false == gate.Enabled(OpenTelemetryMetrics))
2425
assert.Assert(t, false == gate.Enabled(PGBouncerSidecars))
2526
assert.Assert(t, false == gate.Enabled(PGUpgradeCPUConcurrency))
2627
assert.Assert(t, false == gate.Enabled(TablespaceVolumes))

0 commit comments

Comments
 (0)