Skip to content

Commit 77bdfa5

Browse files
committed
Combine ccp_archive_command_status queries into one query.
Add semicolons to the end of all queries. Make ccp_replication_lag_size return the replica name for grafana dashboard legend. DROP functions rather than CREATE OR REPLACE to avoid errors due to changes in functions.
1 parent 6b65607 commit 77bdfa5

File tree

5 files changed

+25
-34
lines changed

5 files changed

+25
-34
lines changed

internal/collector/generated/pgbouncer_metrics_queries.json

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/collector/generated/postgres_5s_metrics.json

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/collector/pgbouncer_metrics_queries.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/-/receiver/sqlqueryreceiver#metrics-queries
55
# https://github.com/CrunchyData/pgmonitor/blob/v5.1.1/sql_exporter/common/crunchy_pgbouncer_121_collector.yml
66

7-
- sql: "SHOW CLIENTS"
7+
- sql: "SHOW CLIENTS;"
88
metrics:
99
- metric_name: ccp_pgbouncer_clients_wait_seconds
1010
value_column: wait
@@ -15,7 +15,7 @@
1515
# can be NULL; the collector will warn against NULL even when not used. But it will emit
1616
# an error log if those columns are used.
1717
# The host column should always point either to pgBouncer's virtual database (the null case) or to the primary.
18-
- sql: "SHOW DATABASES"
18+
- sql: "SHOW DATABASES;"
1919
metrics:
2020
- metric_name: ccp_pgbouncer_databases_pool_size
2121
value_column: pool_size
@@ -54,14 +54,14 @@
5454
attribute_columns: ["name", "port", "database"]
5555
description: "1 if this database is currently disabled, else 0"
5656

57-
- sql: "SHOW LISTS"
57+
- sql: "SHOW LISTS;"
5858
metrics:
5959
- metric_name: ccp_pgbouncer_lists_item_count
6060
value_column: items
6161
attribute_columns: ["list"]
6262
description: "Count of items registered with pgBouncer"
6363

64-
- sql: "SHOW POOLS"
64+
- sql: "SHOW POOLS;"
6565
metrics:
6666
- metric_name: ccp_pgbouncer_pools_client_active
6767
value_column: cl_active
@@ -92,7 +92,7 @@
9292
Server connections that have been idle for more than server_check_delay,
9393
so they need server_check_query to run on them before they can be used again
9494
95-
- sql: "SHOW SERVERS"
95+
- sql: "SHOW SERVERS;"
9696
metrics:
9797
- metric_name: ccp_pgbouncer_servers_close_needed
9898
value_column: close_needed

internal/collector/postgres_5s_metrics.yaml

Lines changed: 12 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,14 @@
4343
4444
- sql: >
4545
SELECT
46-
COALESCE(EXTRACT(epoch from (CURRENT_TIMESTAMP - last_archived_time)), 0) AS seconds_since_last_archive
46+
COALESCE(EXTRACT(epoch from (CURRENT_TIMESTAMP - last_archived_time)), 0) AS seconds_since_last_archive,
47+
archived_count,
48+
failed_count,
49+
CASE
50+
WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) IS NULL THEN 0
51+
WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) < 0 THEN 0
52+
ELSE EXTRACT(epoch from (last_failed_time - last_archived_time))
53+
END AS seconds_since_last_fail
4754
FROM pg_catalog.pg_stat_archiver;
4855
metrics:
4956
- metric_name: ccp_archive_command_status_seconds_since_last_archive
@@ -52,35 +59,16 @@
5259
description: Seconds since the last successful archive operation
5360
static_attributes:
5461
server: "localhost:5432"
55-
56-
- sql: >
57-
SELECT archived_count
58-
FROM pg_catalog.pg_stat_archiver
59-
metrics:
6062
- metric_name: ccp_archive_command_status_archived_count
6163
value_column: archived_count
6264
description: Number of WAL files that have been successfully archived
6365
static_attributes:
6466
server: "localhost:5432"
65-
66-
- sql: >
67-
SELECT failed_count
68-
FROM pg_catalog.pg_stat_archiver
69-
metrics:
7067
- metric_name: ccp_archive_command_status_failed_count
7168
value_column: failed_count
7269
description: Number of failed attempts for archiving WAL files
7370
static_attributes:
7471
server: "localhost:5432"
75-
76-
- sql: >
77-
SELECT CASE
78-
WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) IS NULL THEN 0
79-
WHEN EXTRACT(epoch from (last_failed_time - last_archived_time)) < 0 THEN 0
80-
ELSE EXTRACT(epoch from (last_failed_time - last_archived_time))
81-
END AS seconds_since_last_fail
82-
FROM pg_catalog.pg_stat_archiver
83-
metrics:
8472
- metric_name: ccp_archive_command_status_seconds_since_last_fail
8573
value_column: seconds_since_last_fail
8674
description: Seconds since the last recorded failure of the archive_command
@@ -198,7 +186,7 @@
198186
199187
- sql: >
200188
SELECT monitor.kdapi_scalar_bigint('cpu_request') AS request
201-
, monitor.kdapi_scalar_bigint('cpu_limit') AS limit
189+
, monitor.kdapi_scalar_bigint('cpu_limit') AS limit;
202190
metrics:
203191
- metric_name: ccp_nodemx_cpu_limit
204192
value_column: limit
@@ -297,7 +285,7 @@
297285
FROM monitor.proc_mountinfo() m
298286
JOIN monitor.fsinfo(m.mount_point) f USING (major_number, minor_number)
299287
WHERE m.mount_point IN ('/pgdata', '/pgwal') OR
300-
m.mount_point like '/tablespaces/%'
288+
m.mount_point like '/tablespaces/%';
301289
metrics:
302290
- metric_name: ccp_nodemx_data_disk_available_bytes
303291
value_column: available_bytes
@@ -469,7 +457,7 @@
469457
,tx_bytes
470458
,tx_packets
471459
,rx_bytes
472-
,rx_packets from monitor.proc_network_stats()
460+
,rx_packets from monitor.proc_network_stats();
473461
metrics:
474462
- metric_name: ccp_nodemx_network_rx_bytes
475463
value_column: rx_bytes
@@ -632,6 +620,7 @@
632620
value_column: bytes
633621
value_type: double
634622
description: Replication lag in bytes.
623+
attribute_columns: ['replica']
635624
static_attributes:
636625
server: "localhost:5432"
637626

internal/controller/postgrescluster/metrics_setup.sql

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,23 +71,25 @@ $function$;
7171
GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA monitor TO ccp_monitoring;
7272
GRANT ALL ON ALL TABLES IN SCHEMA monitor TO ccp_monitoring;
7373

74+
DROP FUNCTION IF EXISTS get_replication_lag();
7475
--- get_replication_lag is used by the OTel collector.
7576
--- get_replication_lag is created as function, so that we can query without warning on a replica.
76-
CREATE OR REPLACE FUNCTION get_replication_lag() RETURNS TABLE(bytes NUMERIC) AS $$
77+
CREATE FUNCTION get_replication_lag() RETURNS TABLE(replica text, bytes NUMERIC) AS $$
7778
BEGIN
7879
IF pg_is_in_recovery() THEN
79-
RETURN QUERY SELECT 0::NUMERIC AS bytes;
80+
RETURN QUERY SELECT ''::text as replica, 0::NUMERIC AS bytes;
8081
ELSE
81-
RETURN QUERY SELECT pg_wal_lsn_diff(sent_lsn, replay_lsn) AS bytes
82+
RETURN QUERY SELECT application_name AS replica, pg_wal_lsn_diff(sent_lsn, replay_lsn) AS bytes
8283
FROM pg_catalog.pg_stat_replication;
8384
END IF;
8485
END;
8586
$$ LANGUAGE plpgsql;
8687

88+
DROP FUNCTION IF EXISTS get_pgbackrest_info();
8789
--- get_pgbackrest_info is used by the OTel collector.
8890
--- get_pgbackrest_info is created as a function so that no ddl runs on a replica.
8991
--- In the query, the --stanza argument matches DefaultStanzaName, defined in internal/pgbackrest/config.go.
90-
CREATE OR REPLACE FUNCTION get_pgbackrest_info()
92+
CREATE FUNCTION get_pgbackrest_info()
9193
RETURNS TABLE (
9294
last_diff_backup BIGINT,
9395
last_full_backup BIGINT,

0 commit comments

Comments
 (0)