Skip to content

Commit eee1b37

Browse files
dsessler7cbandy
andcommitted
Add PgBouncer metrics
A generator converts YAML with comments to JSON to avoid errors at runtime and comments in the binary. Co-authored-by: Chris Bandy <[email protected]> Issue: PGO-2054
1 parent 33dc4ef commit eee1b37

File tree

18 files changed

+332
-65
lines changed

18 files changed

+332
-65
lines changed

Makefile

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -254,16 +254,16 @@ generate-kuttl: ## Generate kuttl tests
254254
##@ Generate
255255

256256
.PHONY: check-generate
257-
check-generate: ## Check crd, deepcopy functions, and rbac generation
258-
check-generate: generate-crd
259-
check-generate: generate-deepcopy
260-
check-generate: generate-rbac
257+
check-generate: ## Check everything generated is also committed
258+
check-generate: generate
261259
git diff --exit-code -- config/crd
262260
git diff --exit-code -- config/rbac
261+
git diff --exit-code -- internal/collector
263262
git diff --exit-code -- pkg/apis
264263

265264
.PHONY: generate
266-
generate: ## Generate crd, deepcopy functions, and rbac
265+
generate: ## Generate everything
266+
generate: generate-collector
267267
generate: generate-crd
268268
generate: generate-deepcopy
269269
generate: generate-rbac
@@ -276,6 +276,10 @@ generate-crd: tools/controller-gen
276276
paths='./pkg/apis/...' \
277277
output:dir='config/crd/bases' # {directory}/{group}_{plural}.yaml
278278

279+
.PHONY: generate-collector
280+
generate-collector: ## Generate OTel Collector files
281+
$(GO) generate ./internal/collector
282+
279283
.PHONY: generate-deepcopy
280284
generate-deepcopy: ## Generate DeepCopy functions
281285
generate-deepcopy: tools/controller-gen
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// Copyright 2024 - 2025 Crunchy Data Solutions, Inc.
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
//go:build generate
6+
7+
//go:generate go run generate_json.go
8+
9+
package main
10+
11+
import (
12+
"bytes"
13+
"log/slog"
14+
"os"
15+
"path/filepath"
16+
"strings"
17+
18+
"sigs.k8s.io/yaml"
19+
)
20+
21+
func main() {
22+
cwd := need(os.Getwd())
23+
yamlFileNames := []string{}
24+
25+
slog.Info("Reading", "directory", cwd)
26+
for _, entry := range need(os.ReadDir(cwd)) {
27+
if entry.Type() == 0 && strings.HasSuffix(entry.Name(), ".yaml") {
28+
yamlFileNames = append(yamlFileNames, entry.Name())
29+
}
30+
}
31+
32+
for _, yamlName := range yamlFileNames {
33+
slog.Info("Reading", "file", yamlName)
34+
jsonData := need(yaml.YAMLToJSONStrict(need(os.ReadFile(yamlName))))
35+
jsonPath := filepath.Join("generated", strings.TrimSuffix(yamlName, ".yaml")+".json")
36+
37+
slog.Info("Writing", "file", jsonPath)
38+
must(os.WriteFile(jsonPath, append(bytes.TrimSpace(jsonData), '\n'), 0o644))
39+
}
40+
}
41+
42+
func must(err error) { need(0, err) }
43+
func need[V any](v V, err error) V {
44+
if err != nil {
45+
panic(err)
46+
}
47+
return v
48+
}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# https://docs.github.com/en/repositories/working-with-files/managing-files/customizing-how-changed-files-appear-on-github
2+
/*.json linguist-generated=true

internal/collector/generated/pgbouncer_metrics_queries.json

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/collector/instance.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ func AddToPod(
3838
inInstanceConfigMap *corev1.ConfigMap,
3939
outPod *corev1.PodSpec,
4040
volumeMounts []corev1.VolumeMount,
41+
sqlQueryPassword string,
4142
) {
4243
if !feature.Enabled(ctx, feature.OpenTelemetryMetrics) {
4344
return
@@ -69,6 +70,12 @@ func AddToPod(
6970
Image: "ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:0.116.1",
7071
ImagePullPolicy: inCluster.Spec.ImagePullPolicy,
7172
Command: []string{"/otelcol-contrib", "--config", "/etc/otel-collector/config.yaml"},
73+
Env: []corev1.EnvVar{
74+
{
75+
Name: "PGPASSWORD",
76+
Value: sqlQueryPassword,
77+
},
78+
},
7279

7380
SecurityContext: initialize.RestrictedSecurityContext(),
7481
VolumeMounts: append(volumeMounts, configVolumeMount),

internal/collector/naming.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@ const OneSecondBatchProcessor = "batch/1s"
1010
const SubSecondBatchProcessor = "batch/200ms"
1111
const Prometheus = "prometheus"
1212
const Metrics = "metrics"
13+
const SqlQuery = "sqlquery"

internal/collector/patroni.go

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// Copyright 2024 - 2025 Crunchy Data Solutions, Inc.
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
package collector
6+
7+
import (
8+
"context"
9+
10+
"github.com/crunchydata/postgres-operator/internal/feature"
11+
"github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1"
12+
)
13+
14+
func EnablePatroniMetrics(ctx context.Context,
15+
inCluster *v1beta1.PostgresCluster,
16+
outConfig *Config,
17+
) {
18+
if feature.Enabled(ctx, feature.OpenTelemetryMetrics) {
19+
// Add Prometheus exporter
20+
outConfig.Exporters[Prometheus] = map[string]any{
21+
"endpoint": "0.0.0.0:8889",
22+
}
23+
24+
// Add Prometheus Receiver
25+
outConfig.Receivers[Prometheus] = map[string]any{
26+
"config": map[string]any{
27+
"scrape_configs": []map[string]any{
28+
{
29+
"job_name": "patroni",
30+
"scheme": "https",
31+
"tls_config": map[string]any{
32+
"insecure_skip_verify": true,
33+
},
34+
"scrape_interval": "10s",
35+
"static_configs": []map[string]any{
36+
{
37+
"targets": []string{
38+
"0.0.0.0:8008",
39+
},
40+
},
41+
},
42+
},
43+
},
44+
},
45+
}
46+
47+
// Add Metrics Pipeline
48+
outConfig.Pipelines[Metrics] = Pipeline{
49+
Receivers: []ComponentID{Prometheus},
50+
Exporters: []ComponentID{Prometheus},
51+
}
52+
}
53+
}

internal/collector/pgbouncer.go

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
// Copyright 2024 - 2025 Crunchy Data Solutions, Inc.
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
package collector
6+
7+
import (
8+
"context"
9+
_ "embed"
10+
"encoding/json"
11+
"fmt"
12+
"slices"
13+
14+
"github.com/crunchydata/postgres-operator/internal/feature"
15+
"github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1"
16+
)
17+
18+
// https://pkg.go.dev/embed
19+
//
20+
//go:embed "generated/pgbouncer_metrics_queries.json"
21+
var pgBouncerMetricsQueries json.RawMessage
22+
23+
// NewConfigForPgBouncerPod creates a config for the OTel collector container
24+
// that runs as a sidecar in the pgBouncer Pod
25+
func NewConfigForPgBouncerPod(
26+
ctx context.Context, cluster *v1beta1.PostgresCluster, sqlQueryUsername string,
27+
) *Config {
28+
if cluster.Spec.Proxy == nil || cluster.Spec.Proxy.PGBouncer == nil {
29+
// pgBouncer is disabled; return nil
30+
return nil
31+
}
32+
33+
config := NewConfig()
34+
35+
EnablePgBouncerMetrics(ctx, config, sqlQueryUsername)
36+
37+
return config
38+
}
39+
40+
func EnablePgBouncerMetrics(ctx context.Context, config *Config, sqlQueryUsername string) {
41+
if feature.Enabled(ctx, feature.OpenTelemetryMetrics) {
42+
// Add Prometheus exporter
43+
config.Exporters[Prometheus] = map[string]any{
44+
"endpoint": "0.0.0.0:8889",
45+
}
46+
47+
// Add SqlQuery Receiver
48+
config.Receivers[SqlQuery] = map[string]any{
49+
"driver": "postgres",
50+
"datasource": fmt.Sprintf(`host=localhost dbname=pgbouncer port=5432 user=%s password=${env:PGPASSWORD}`,
51+
sqlQueryUsername),
52+
"queries": slices.Clone(pgBouncerMetricsQueries),
53+
}
54+
55+
// Add Metrics Pipeline
56+
config.Pipelines[Metrics] = Pipeline{
57+
Receivers: []ComponentID{SqlQuery},
58+
Exporters: []ComponentID{Prometheus},
59+
}
60+
}
61+
}
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
# This list of queries configures an OTel SQL Query Receiver to read pgMonitor
2+
# metrics from PgBouncer.
3+
#
4+
# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries
5+
# https://github.com/CrunchyData/pgmonitor/blob/v5.1.1/sql_exporter/common/crunchy_pgbouncer_121_collector.yml
6+
7+
- sql: "SHOW CLIENTS"
8+
metrics:
9+
- metric_name: ccp_pgbouncer_clients_wait_seconds
10+
value_column: wait
11+
attribute_columns: ["database", "user", "state", "application_name", "link"]
12+
description: "Current waiting time in seconds"
13+
14+
- sql: "SHOW DATABASES"
15+
metrics:
16+
- metric_name: ccp_pgbouncer_databases_pool_size
17+
value_column: pool_size
18+
attribute_columns: ["name", "host", "port", "database", "force_user", "pool_mode"]
19+
description: "Maximum number of server connections"
20+
21+
- metric_name: ccp_pgbouncer_databases_min_pool_size
22+
value_column: min_pool_size
23+
attribute_columns: ["name", "host", "port", "database", "force_user", "pool_mode"]
24+
description: "Minimum number of server connections"
25+
26+
- metric_name: ccp_pgbouncer_databases_reserve_pool
27+
value_column: reserve_pool
28+
attribute_columns: ["name", "host", "port", "database", "force_user", "pool_mode"]
29+
description: "Maximum number of additional connections for this database"
30+
31+
- metric_name: ccp_pgbouncer_databases_max_connections
32+
value_column: max_connections
33+
attribute_columns: ["name", "host", "port", "database", "force_user", "pool_mode"]
34+
description: >-
35+
Maximum number of allowed connections for this database,
36+
as set by max_db_connections, either globally or per database
37+
38+
- metric_name: ccp_pgbouncer_databases_current_connections
39+
value_column: current_connections
40+
attribute_columns: ["name", "host", "port", "database", "force_user", "pool_mode"]
41+
description: "Current number of connections for this database"
42+
43+
- metric_name: ccp_pgbouncer_databases_paused
44+
value_column: paused
45+
attribute_columns: ["name", "host", "port", "database", "force_user", "pool_mode"]
46+
description: "1 if this database is currently paused, else 0"
47+
48+
- metric_name: ccp_pgbouncer_databases_disabled
49+
value_column: disabled
50+
attribute_columns: ["name", "host", "port", "database", "force_user", "pool_mode"]
51+
description: "1 if this database is currently disabled, else 0"
52+
53+
- sql: "SHOW LISTS"
54+
metrics:
55+
- metric_name: ccp_pgbouncer_lists_item_count
56+
value_column: items
57+
attribute_columns: ["list"]
58+
description: "Count of items registered with pgBouncer"
59+
60+
- sql: "SHOW POOLS"
61+
metrics:
62+
- metric_name: ccp_pgbouncer_pools_client_active
63+
value_column: cl_active
64+
attribute_columns: ["database", "user"]
65+
description: >-
66+
Client connections that are either linked to server connections or
67+
are idle with no queries waiting to be processed
68+
69+
- metric_name: ccp_pgbouncer_pools_client_waiting
70+
value_column: cl_waiting
71+
attribute_columns: ["database", "user"]
72+
description: "Client connections that have sent queries but have not yet got a server connection"
73+
74+
- metric_name: ccp_pgbouncer_pools_server_active
75+
value_column: sv_active
76+
attribute_columns: ["database", "user"]
77+
description: "Server connections that are linked to a client"
78+
79+
- metric_name: ccp_pgbouncer_pools_server_idle
80+
value_column: sv_idle
81+
attribute_columns: ["database", "user"]
82+
description: "Server connections that are unused and immediately usable for client queries"
83+
84+
- metric_name: ccp_pgbouncer_pools_server_used
85+
value_column: sv_used
86+
attribute_columns: ["database", "user"]
87+
description: >-
88+
Server connections that have been idle for more than server_check_delay,
89+
so they need server_check_query to run on them before they can be used again
90+
91+
- sql: "SHOW SERVERS"
92+
metrics:
93+
- metric_name: ccp_pgbouncer_servers_close_needed
94+
value_column: close_needed
95+
attribute_columns: ["database", "user", "state", "application_name", "link"]
96+
description: >-
97+
1 if the connection will be closed as soon as possible,
98+
because a configuration file reload or DNS update changed the connection information
99+
or RECONNECT was issued

internal/collector/postgres.go

Lines changed: 3 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -7,47 +7,13 @@ package collector
77
import (
88
"context"
99

10-
"github.com/crunchydata/postgres-operator/internal/feature"
10+
"github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1"
1111
)
1212

13-
func NewConfigForPostgresPod(ctx context.Context) *Config {
13+
func NewConfigForPostgresPod(ctx context.Context, inCluster *v1beta1.PostgresCluster) *Config {
1414
config := NewConfig()
1515

16-
if feature.Enabled(ctx, feature.OpenTelemetryMetrics) {
17-
// Add Prometheus exporter
18-
config.Exporters[Prometheus] = map[string]any{
19-
"endpoint": "0.0.0.0:8889",
20-
}
21-
22-
// Add Prometheus Receiver
23-
config.Receivers[Prometheus] = map[string]any{
24-
"config": map[string]any{
25-
"scrape_configs": []map[string]any{
26-
{
27-
"job_name": "patroni",
28-
"scheme": "https",
29-
"tls_config": map[string]any{
30-
"insecure_skip_verify": true,
31-
},
32-
"scrape_interval": "10s",
33-
"static_configs": []map[string]any{
34-
{
35-
"targets": []string{
36-
"0.0.0.0:8008",
37-
},
38-
},
39-
},
40-
},
41-
},
42-
},
43-
}
44-
45-
// Add Metrics Pipeline
46-
config.Pipelines[Metrics] = Pipeline{
47-
Receivers: []ComponentID{Prometheus},
48-
Exporters: []ComponentID{Prometheus},
49-
}
50-
}
16+
EnablePatroniMetrics(ctx, inCluster, config)
5117

5218
return config
5319
}

0 commit comments

Comments
 (0)