|
| 1 | +-- Copyright 2025 shaneborden |
| 2 | +-- |
| 3 | +-- Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +-- you may not use this file except in compliance with the License. |
| 5 | +-- You may obtain a copy of the License at |
| 6 | +-- |
| 7 | +-- https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +-- |
| 9 | +-- Unless required by applicable law or agreed to in writing, software |
| 10 | +-- distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +-- See the License for the specific language governing permissions and |
| 13 | +-- limitations under the License. |
| 14 | + |
| 15 | +-- While this script has been slightly modified, |
| 16 | +-- original credit for this script is credited to PGExperts / Citus Data |
| 17 | + |
| 18 | +WITH constants AS ( |
| 19 | + -- define some constants for sizes of things |
| 20 | + -- for reference down the query and easy maintenance |
| 21 | + SELECT current_setting('block_size')::numeric AS bs, 23 AS hdr, 8 AS ma |
| 22 | +), |
| 23 | +no_stats AS ( |
| 24 | + -- screen out table who have attributes |
| 25 | + -- which dont have stats, such as JSON |
| 26 | + SELECT table_schema, table_name, |
| 27 | + n_live_tup::numeric as est_rows, |
| 28 | + pg_table_size(relid)::numeric as table_size |
| 29 | + FROM information_schema.columns |
| 30 | + JOIN pg_stat_user_tables as psut |
| 31 | + ON table_schema = psut.schemaname |
| 32 | + AND table_name = psut.relname |
| 33 | + LEFT OUTER JOIN pg_stats |
| 34 | + ON table_schema = pg_stats.schemaname |
| 35 | + AND table_name = pg_stats.tablename |
| 36 | + AND column_name = attname |
| 37 | + WHERE attname IS NULL |
| 38 | + AND table_schema NOT IN ('pg_catalog', 'information_schema') |
| 39 | + GROUP BY table_schema, table_name, relid, n_live_tup |
| 40 | +), |
| 41 | +null_headers AS ( |
| 42 | + -- calculate null header sizes |
| 43 | + -- omitting tables which dont have complete stats |
| 44 | + -- and attributes which aren't visible |
| 45 | + SELECT |
| 46 | + hdr+1+(sum(case when null_frac <> 0 THEN 1 else 0 END)/8) as nullhdr, |
| 47 | + SUM((1-null_frac)*avg_width) as datawidth, |
| 48 | + MAX(null_frac) as maxfracsum, |
| 49 | + schemaname, |
| 50 | + tablename, |
| 51 | + hdr, ma, bs |
| 52 | + FROM pg_stats CROSS JOIN constants |
| 53 | + LEFT OUTER JOIN no_stats |
| 54 | + ON schemaname = no_stats.table_schema |
| 55 | + AND tablename = no_stats.table_name |
| 56 | + WHERE schemaname NOT IN ('pg_catalog', 'information_schema') |
| 57 | + AND no_stats.table_name IS NULL |
| 58 | + AND EXISTS ( SELECT 1 |
| 59 | + FROM information_schema.columns |
| 60 | + WHERE schemaname = columns.table_schema |
| 61 | + AND tablename = columns.table_name ) |
| 62 | + GROUP BY schemaname, tablename, hdr, ma, bs |
| 63 | +), |
| 64 | +data_headers AS ( |
| 65 | + -- estimate header and row size |
| 66 | + SELECT |
| 67 | + ma, bs, hdr, schemaname, tablename, |
| 68 | + (datawidth+(hdr+ma-(case when hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr, |
| 69 | + (maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2 |
| 70 | + FROM null_headers |
| 71 | +), |
| 72 | +table_estimates AS ( |
| 73 | + -- make estimates of how large the table should be |
| 74 | + -- based on row and page size |
| 75 | + SELECT schemaname, tablename, bs, |
| 76 | + reltuples::numeric as est_rows, relpages * bs as table_bytes, |
| 77 | + CEIL((reltuples* |
| 78 | + (datahdr + nullhdr2 + 4 + ma - |
| 79 | + (CASE WHEN datahdr%ma=0 |
| 80 | + THEN ma ELSE datahdr%ma END) |
| 81 | + )/(bs-20))) * bs AS expected_bytes, |
| 82 | + reltoastrelid |
| 83 | + FROM data_headers |
| 84 | + JOIN pg_class ON tablename = relname |
| 85 | + JOIN pg_namespace ON relnamespace = pg_namespace.oid |
| 86 | + AND schemaname = nspname |
| 87 | + WHERE pg_class.relkind = 'r' |
| 88 | +), |
| 89 | +estimates_with_toast AS ( |
| 90 | + -- add in estimated TOAST table sizes |
| 91 | + -- estimate based on 4 toast tuples per page because we dont have |
| 92 | + -- anything better. also append the no_data tables |
| 93 | + SELECT schemaname, tablename, |
| 94 | + TRUE as can_estimate, |
| 95 | + est_rows, |
| 96 | + table_bytes + ( coalesce(toast.relpages, 0) * bs ) as table_bytes, |
| 97 | + expected_bytes + ( ceil( coalesce(toast.reltuples, 0) / 4 ) * bs ) as expected_bytes |
| 98 | + FROM table_estimates LEFT OUTER JOIN pg_class as toast |
| 99 | + ON table_estimates.reltoastrelid = toast.oid |
| 100 | + AND toast.relkind = 't' |
| 101 | +), |
| 102 | +table_estimates_plus AS ( |
| 103 | +-- add some extra metadata to the table data |
| 104 | +-- and calculations to be reused |
| 105 | +-- including whether we cant estimate it |
| 106 | +-- or whether we think it might be compressed |
| 107 | + SELECT current_database() as databasename, |
| 108 | + schemaname, tablename, can_estimate, |
| 109 | + est_rows, |
| 110 | + CASE WHEN table_bytes > 0 |
| 111 | + THEN table_bytes::NUMERIC |
| 112 | + ELSE NULL::NUMERIC END |
| 113 | + AS table_bytes, |
| 114 | + CASE WHEN expected_bytes > 0 |
| 115 | + THEN expected_bytes::NUMERIC |
| 116 | + ELSE NULL::NUMERIC END |
| 117 | + AS expected_bytes, |
| 118 | + CASE WHEN expected_bytes > 0 AND table_bytes > 0 |
| 119 | + AND expected_bytes <= table_bytes |
| 120 | + THEN (table_bytes - expected_bytes)::NUMERIC |
| 121 | + ELSE 0::NUMERIC END AS bloat_bytes |
| 122 | + FROM estimates_with_toast |
| 123 | + UNION ALL |
| 124 | + SELECT current_database() as databasename, |
| 125 | + table_schema, table_name, FALSE, |
| 126 | + est_rows, table_size, |
| 127 | + NULL::NUMERIC, NULL::NUMERIC |
| 128 | + FROM no_stats |
| 129 | +), |
| 130 | +bloat_data AS ( |
| 131 | + -- do final math calculations and formatting |
| 132 | + select current_database() as databasename, |
| 133 | + schemaname, tablename, can_estimate, |
| 134 | + table_bytes, round(table_bytes/(1024^2)::NUMERIC,3) as table_mb, |
| 135 | + expected_bytes, round(expected_bytes/(1024^2)::NUMERIC,3) as expected_mb, |
| 136 | + round(bloat_bytes*100/table_bytes) as pct_bloat, |
| 137 | + round(bloat_bytes/(1024::NUMERIC^2),2) as mb_bloat, |
| 138 | + table_bytes, expected_bytes, est_rows |
| 139 | + FROM table_estimates_plus |
| 140 | +) |
| 141 | +-- filter output for bloated tables |
| 142 | +SELECT databasename, schemaname, tablename, |
| 143 | + can_estimate, |
| 144 | + est_rows, |
| 145 | + pct_bloat, mb_bloat, |
| 146 | + table_mb |
| 147 | +FROM bloat_data |
| 148 | +-- this where clause defines which tables actually appear |
| 149 | +-- in the bloat chart |
| 150 | +-- example below filters for tables which are either 50% |
| 151 | +-- bloated and more than 20mb in size, or more than 25% |
| 152 | +-- bloated and more than 4GB in size |
| 153 | +--WHERE ( pct_bloat >= 50 AND mb_bloat >= 10 ) |
| 154 | +-- OR ( pct_bloat >= 25 AND mb_bloat >= 1000 ) |
| 155 | +-- WHERE tablename = '%' |
| 156 | +ORDER BY mb_bloat DESC NULLS LAST; |
0 commit comments