Skip to content

Commit 0894079

Browse files
author
Dementii Priadko
committed
Merge branch 'multixact-members-folder-size-metric' into 'main'
Added a metric for multixact members folder size, fixed grants in readme.md See merge request postgres-ai/postgres_ai!66
2 parents a9f7af4 + 220a8cd commit 0894079

File tree

4 files changed

+289
-32
lines changed

4 files changed

+289
-32
lines changed

README.md

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -95,13 +95,9 @@ create user postgres_ai_mon with password '<password>';
9595
grant connect on database <database_name> to postgres_ai_mon;
9696

9797
grant pg_monitor to postgres_ai_mon;
98-
grant select on pg_stat_statements to postgres_ai_mon;
99-
grant select on pg_stat_database to postgres_ai_mon;
100-
grant select on pg_stat_user_tables to postgres_ai_mon;
101-
grant select on pg_stat_user_indexes to postgres_ai_mon;
10298
grant select on pg_index to postgres_ai_mon;
10399

104-
-- Create a public view for pg_statistic access (required for bloat metrics on user schemas)
100+
-- Create a public view for pg_statistic access (optional, for bloat analysis)
105101
create view public.pg_statistic as
106102
select
107103
n.nspname as schemaname,
@@ -116,11 +112,29 @@ join pg_namespace n on n.oid = c.relnamespace
116112
join pg_attribute a on a.attrelid = s.starelid and a.attnum = s.staattnum
117113
where a.attnum > 0 and not a.attisdropped;
118114

119-
grant select on public.pg_statistic to pg_monitor;
115+
grant select on public.pg_statistic to postgres_ai_mon;
120116
alter user postgres_ai_mon set search_path = "$user", public, pg_catalog;
121117
commit;
122118
```
123119

120+
### Optional permissions to analyze risks of certain performance cliffs
121+
122+
For RDS Postgres and Aurora:
123+
124+
```sql
125+
create extension if not exists rds_tools;
126+
grant execute on function rds_tools.pg_ls_multixactdir() to postgres_ai_mon;
127+
```
128+
129+
For self-managed Postgres:
130+
131+
```sql
132+
grant execute on function pg_stat_file(text) to postgres_ai_mon;
133+
grant execute on function pg_stat_file(text, boolean) to postgres_ai_mon;
134+
grant execute on function pg_ls_dir(text) to postgres_ai_mon;
135+
grant execute on function pg_ls_dir(text, boolean, boolean) to postgres_ai_mon;
136+
```
137+
124138
**One command setup:**
125139

126140
```bash

config/grafana/dashboards/Dashboard_1_Node_performance_overview.json

Lines changed: 142 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
"editable": true,
1919
"fiscalYearStartMonth": 0,
2020
"graphTooltip": 1,
21-
"id": 2,
21+
"id": 1,
2222
"links": [],
2323
"panels": [
2424
{
@@ -4588,6 +4588,145 @@
45884588
],
45894589
"type": "timeseries"
45904590
},
4591+
{
4592+
"datasource": {
4593+
"type": "datasource",
4594+
"uid": "-- Mixed --"
4595+
},
4596+
"fieldConfig": {
4597+
"defaults": {
4598+
"color": {
4599+
"mode": "palette-classic"
4600+
},
4601+
"custom": {
4602+
"axisBorderShow": false,
4603+
"axisCenteredZero": false,
4604+
"axisColorMode": "text",
4605+
"axisLabel": "",
4606+
"axisPlacement": "auto",
4607+
"axisSoftMin": 0,
4608+
"barAlignment": 0,
4609+
"barWidthFactor": 0.6,
4610+
"drawStyle": "line",
4611+
"fillOpacity": 0,
4612+
"gradientMode": "none",
4613+
"hideFrom": {
4614+
"legend": false,
4615+
"tooltip": false,
4616+
"viz": false
4617+
},
4618+
"insertNulls": false,
4619+
"lineInterpolation": "linear",
4620+
"lineWidth": 1,
4621+
"pointSize": 5,
4622+
"scaleDistribution": {
4623+
"type": "linear"
4624+
},
4625+
"showPoints": "auto",
4626+
"spanNulls": true,
4627+
"stacking": {
4628+
"group": "A",
4629+
"mode": "none"
4630+
},
4631+
"thresholdsStyle": {
4632+
"mode": "off"
4633+
}
4634+
},
4635+
"mappings": [],
4636+
"thresholds": {
4637+
"mode": "absolute",
4638+
"steps": [
4639+
{
4640+
"color": "green"
4641+
},
4642+
{
4643+
"color": "red",
4644+
"value": 80
4645+
}
4646+
]
4647+
},
4648+
"unit": "bytes"
4649+
},
4650+
"overrides": [
4651+
{
4652+
"matcher": {
4653+
"id": "byName",
4654+
"options": "Safe threshold"
4655+
},
4656+
"properties": [
4657+
{
4658+
"id": "custom.lineStyle",
4659+
"value": {
4660+
"dash": [
4661+
20,
4662+
10
4663+
],
4664+
"fill": "dash"
4665+
}
4666+
},
4667+
{
4668+
"id": "color",
4669+
"value": {
4670+
"fixedColor": "dark-red",
4671+
"mode": "fixed"
4672+
}
4673+
}
4674+
]
4675+
}
4676+
]
4677+
},
4678+
"gridPos": {
4679+
"h": 12,
4680+
"w": 24,
4681+
"x": 0,
4682+
"y": 197
4683+
},
4684+
"id": 46,
4685+
"options": {
4686+
"legend": {
4687+
"calcs": [
4688+
"max"
4689+
],
4690+
"displayMode": "table",
4691+
"placement": "bottom",
4692+
"showLegend": true
4693+
},
4694+
"tooltip": {
4695+
"hideZeros": false,
4696+
"mode": "single",
4697+
"sort": "none"
4698+
}
4699+
},
4700+
"pluginVersion": "12.0.2",
4701+
"targets": [
4702+
{
4703+
"datasource": {
4704+
"type": "prometheus",
4705+
"uid": "P7A0D6631BB10B34F"
4706+
},
4707+
"editorMode": "code",
4708+
"expr": "pgwatch_multixact_size_members_bytes{cluster=\"$cluster_name\", node_name=\"$node_name\"}",
4709+
"legendFormat": "members",
4710+
"range": true,
4711+
"refId": "A"
4712+
},
4713+
{
4714+
"datasource": {
4715+
"type": "prometheus",
4716+
"uid": "P7A0D6631BB10B34F"
4717+
},
4718+
"editorMode": "code",
4719+
"expr": "10737418240",
4720+
"hide": false,
4721+
"instant": false,
4722+
"legendFormat": "Safe threshold",
4723+
"range": true,
4724+
"refId": "B"
4725+
}
4726+
],
4727+
"title": "Multixact members folder size",
4728+
"type": "timeseries"
4729+
},
45914730
{
45924731
"fieldConfig": {
45934732
"defaults": {},
@@ -4597,7 +4736,7 @@
45974736
"h": 3,
45984737
"w": 24,
45994738
"x": 0,
4600-
"y": 197
4739+
"y": 209
46014740
},
46024741
"id": 40,
46034742
"options": {
@@ -4683,5 +4822,5 @@
46834822
"timezone": "utc",
46844823
"title": "01. Single node performance overview (high-level)",
46854824
"uid": "f90500a0-a12e-4081-a2f0-07ed96f27915",
4686-
"version": 3
4825+
"version": 4
46874826
}

config/pgwatch-prometheus/metrics.yml

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2200,6 +2200,106 @@ metrics:
22002200
gauges:
22012201
- '*'
22022202
statement_timeout_seconds: 15
2203+
2204+
multixact_size:
2205+
sqls:
2206+
11: |
2207+
with env as (
2208+
select
2209+
exists (
2210+
select
2211+
from pg_proc p
2212+
join pg_namespace n on n.oid = p.pronamespace
2213+
where p.proname = 'pg_ls_multixactdir' and n.nspname = 'rds_tools'
2214+
) as has_rds_fn,
2215+
coalesce(pg_has_role('pg_read_server_files','usage'), false) as has_read_files,
2216+
(select rolsuper from pg_roles where rolname = current_user) as is_super,
2217+
exists (select from pg_proc where proname = 'pg_ls_dir') as has_pg_ls_dir_func,
2218+
exists (select from pg_proc where proname = 'pg_stat_file') as has_pg_stat_file_func
2219+
),
2220+
can_local as (
2221+
select (has_pg_ls_dir_func and has_pg_stat_file_func and (has_read_files or is_super)) as ok from env
2222+
),
2223+
-- Use query_to_xml to safely execute RDS-specific multixact directory listing query.
2224+
-- The XML wrapper allows the query to fail gracefully if rds_tools.pg_ls_multixactdir()
2225+
-- is unavailable or returns errors, preventing the entire metric from failing.
2226+
rds_probe_xml as (
2227+
select query_to_xml($q$
2228+
with files as (
2229+
select name, size
2230+
from rds_tools.pg_ls_multixactdir()
2231+
),
2232+
members as (
2233+
select sum(size)::bigint as sz from files where name like 'pg_multixact/members%'
2234+
),
2235+
offsets as (
2236+
select sum(size)::bigint as sz from files where name like 'pg_multixact/offsets%'
2237+
),
2238+
has_rows as (
2239+
select exists(select 1 from files where name like 'pg_multixact/%') as any_rows
2240+
)
2241+
select
2242+
case when (select any_rows from has_rows) then coalesce((select sz from members), 0) end as members_bytes,
2243+
case when (select any_rows from has_rows) then coalesce((select sz from offsets), 0) end as offsets_bytes,
2244+
case when (select any_rows from has_rows) then 0 else 1 end as status_code
2245+
$q$, true, true, '') as x
2246+
where (select has_rds_fn from env)
2247+
),
2248+
-- Use query_to_xml to safely execute standard Postgres multixact directory listing query.
2249+
-- The XML wrapper allows the query to fail gracefully if pg_stat_file() or pg_ls_dir()
2250+
-- are unavailable or return permission errors, preventing the entire metric from failing.
2251+
local_probe_xml as (
2252+
select query_to_xml($q$
2253+
with dirs as (
2254+
select
2255+
(pg_stat_file('pg_multixact/members', true)).isdir as has_members,
2256+
(pg_stat_file('pg_multixact/offsets', true)).isdir as has_offsets
2257+
),
2258+
flags as (
2259+
select ((select has_members from dirs) or (select has_offsets from dirs)) as has_any
2260+
),
2261+
members as (
2262+
select sum((pg_stat_file(format('pg_multixact/members/%s', d), true)).size)::bigint as sz
2263+
from pg_ls_dir('pg_multixact/members') as d(d)
2264+
where (select has_members from dirs)
2265+
),
2266+
offsets as (
2267+
select sum((pg_stat_file(format('pg_multixact/offsets/%s', d), true)).size)::bigint as sz
2268+
from pg_ls_dir('pg_multixact/offsets') as d(d)
2269+
where (select has_offsets from dirs)
2270+
)
2271+
select
2272+
case when (select has_any from flags) then coalesce((select sz from members), 0) end as members_bytes,
2273+
case when (select has_any from flags) then coalesce((select sz from offsets), 0) end as offsets_bytes,
2274+
case when (select has_any from flags) then 0 else 1 end as status_code
2275+
$q$, true, true, '') as x
2276+
where not (select has_rds_fn from env) and (select ok from can_local)
2277+
),
2278+
picked as (
2279+
select * from rds_probe_xml
2280+
union all
2281+
select * from local_probe_xml
2282+
limit 1
2283+
),
2284+
parsed as (
2285+
select
2286+
(xpath('//members_bytes/text()', x))[1]::text::bigint as members_bytes,
2287+
(xpath('//offsets_bytes/text()', x))[1]::text::bigint as offsets_bytes,
2288+
(xpath('//status_code/text()', x))[1]::text::int as status_code
2289+
from picked
2290+
)
2291+
select * from parsed
2292+
union all
2293+
select
2294+
null::bigint as members_bytes,
2295+
null::bigint as offsets_bytes,
2296+
2::int as status_code
2297+
where not exists (select 1 from parsed);
2298+
gauges:
2299+
- members_bytes
2300+
- offsets_bytes
2301+
- status_code
2302+
statement_timeout_seconds: 15
22032303

22042304
presets:
22052305
full:
@@ -2244,6 +2344,7 @@ presets:
22442344
stats_reset: 3600
22452345
archive_lag: 15
22462346
pg_vacuum_progress: 30
2347+
multixact_size: 300
22472348
pg_index_pilot:
22482349
metrics:
22492350
pg_index_pilot: 30

0 commit comments

Comments
 (0)