Skip to content

Commit e053ca6

Browse files
authored
Release 2.1.2 (#73)
1 parent ad57a82 commit e053ca6

File tree

4 files changed

+1007
-1
lines changed

4 files changed

+1007
-1
lines changed

CHANGELOG

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
2.1.2
2+
Miscellaneous:
3+
- Add support for PostgreSQL 18 (Georgy Shelkovy)
14
2.1.1
25
Miscellaneous:
36
- Add support for PostgreSQL 17 (Georgy Shelkovy)

pg_qualstats--2.1.1--2.1.2.sql

Lines changed: 282 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
2+
\echo Use "ALTER EXTENSION pg_qualstats UPDATE" to load this file. \quit
3+
4+
CREATE OR REPLACE FUNCTION @extschema@.pg_qualstats_index_advisor (
5+
min_filter integer DEFAULT 1000,
6+
min_selectivity integer DEFAULT 30,
7+
forbidden_am text[] DEFAULT '{}')
8+
RETURNS json
9+
AS $_$
10+
DECLARE
11+
v_processed bigint[] = '{}';
12+
v_indexes json[] = '{}';
13+
v_unoptimised json[] = '{}';
14+
15+
rec record;
16+
v_nb_processed integer = 1;
17+
18+
v_ddl text;
19+
v_col text;
20+
v_qualnodeid bigint;
21+
v_quals_todo bigint[];
22+
v_quals_done bigint[];
23+
v_quals_col_done text[];
24+
v_queryids bigint[] = '{}';
25+
BEGIN
26+
-- sanity checks and default values
27+
SELECT coalesce(min_filter, 1000), coalesce(min_selectivity, 30),
28+
coalesce(forbidden_am, '{}')
29+
INTO min_filter, min_selectivity, forbidden_am;
30+
31+
-- don't try to generate hash indexes Before pg 10, as those are only WAL
32+
-- logged since pg 11.
33+
IF pg_catalog.current_setting('server_version_num')::bigint < 100000 THEN
34+
forbidden_am := array_append(forbidden_am, 'hash');
35+
END IF;
36+
37+
-- first find out unoptimizable quals.
38+
-- We need an array of json containing the per-qual info, and a single
39+
-- array containing all the underlying qualnodeids, so we need to create
40+
-- the wanted final object manually as we can't have two different grouping
41+
-- approach.
42+
FOR rec IN WITH src AS (SELECT DISTINCT qualnodeid,
43+
(coalesce(lrelid, rrelid), coalesce(lattnum, rattnum),
44+
opno, eval_type)::@extschema@.qual AS qual,
45+
queryid
46+
FROM @extschema@.pg_qualstats() q
47+
JOIN pg_catalog.pg_database d ON q.dbid = d.oid
48+
LEFT JOIN pg_catalog.pg_operator op ON op.oid = q.opno
49+
LEFT JOIN pg_catalog.pg_amop amop ON amop.amopopr = op.oid
50+
LEFT JOIN pg_catalog.pg_am am ON am.oid = amop.amopmethod
51+
WHERE d.datname = current_database()
52+
AND eval_type = 'f'
53+
AND coalesce(lrelid, rrelid) != 0
54+
AND amname IS NULL
55+
)
56+
SELECT pg_catalog.json_build_object(
57+
'qual', @extschema@.pg_qualstats_deparse_qual(qual),
58+
-- be careful to generate an empty array if no queryid availiable
59+
'queryids',
60+
coalesce(pg_catalog.array_agg(DISTINCT queryid)
61+
FILTER (WHERE queryid IS NOT NULL), '{}')
62+
) AS obj,
63+
array_agg(qualnodeid) AS qualnodeids
64+
FROM src
65+
GROUP BY qual
66+
LOOP
67+
v_unoptimised := array_append(v_unoptimised, rec.obj);
68+
v_processed := array_cat(v_processed, rec.qualnodeids);
69+
END LOOP;
70+
71+
-- The index suggestion is done in multiple iteration, by scoring for each
72+
-- relation containing interesting quals a path of possibly AND-ed quals
73+
-- that contains other possibly AND-ed quals. Only the higher score path
74+
-- will be used to create an index, so we can then compute another set of
75+
-- paths ignoring the quals that are now optimized with an index.
76+
WHILE v_nb_processed > 0 LOOP
77+
v_nb_processed := 0;
78+
FOR rec IN
79+
-- first, find quals that seems worth to optimize along with the
80+
-- possible access methods, discarding any qualnode that are marked as
81+
-- already processed. Also apply access method restriction.
82+
WITH pgqs AS (
83+
SELECT dbid, amname, qualid, qualnodeid,
84+
(coalesce(lrelid, rrelid), coalesce(lattnum, rattnum),
85+
opno, eval_type)::@extschema@.qual AS qual, queryid,
86+
round(avg(execution_count)) AS execution_count,
87+
sum(occurences) AS occurences,
88+
round(sum(nbfiltered)::numeric / sum(occurences)) AS avg_filter,
89+
CASE WHEN sum(execution_count) = 0
90+
THEN 0
91+
ELSE round(sum(nbfiltered::numeric) / sum(execution_count) * 100)
92+
END AS avg_selectivity
93+
FROM @extschema@.pg_qualstats() q
94+
JOIN pg_catalog.pg_database d ON q.dbid = d.oid
95+
JOIN pg_catalog.pg_operator op ON op.oid = q.opno
96+
JOIN pg_catalog.pg_amop amop ON amop.amopopr = op.oid
97+
JOIN pg_catalog.pg_am am ON am.oid = amop.amopmethod
98+
WHERE d.datname = current_database()
99+
AND eval_type = 'f'
100+
AND amname != ALL (forbidden_am)
101+
AND coalesce(lrelid, rrelid) != 0
102+
AND qualnodeid != ALL(v_processed)
103+
GROUP BY dbid, amname, qualid, qualnodeid, lrelid, rrelid,
104+
lattnum, rattnum, opno, eval_type, queryid
105+
),
106+
-- apply cardinality and selectivity restrictions
107+
filtered AS (
108+
SELECT (qual).relid, amname, coalesce(qualid, qualnodeid) AS parent,
109+
count(*) AS weight,
110+
(array_agg(DISTINCT qualnodeid),
111+
array_agg(queryid)
112+
)::@extschema@.adv_quals AS quals
113+
FROM pgqs
114+
WHERE avg_filter >= min_filter
115+
AND avg_selectivity >= min_selectivity
116+
GROUP BY (qual).relid, amname, parent
117+
),
118+
-- for each possibly AND-ed qual, build the list of included qualnodeid
119+
nodes AS (
120+
SELECT p.relid, p.amname, p.parent, p.quals,
121+
c.quals AS children
122+
FROM filtered p
123+
LEFT JOIN filtered c ON (p.quals).qualnodeids @> (c.quals).qualnodeids
124+
AND p.amname = c.amname
125+
AND p.parent != c.parent
126+
AND (p.quals).qualnodeids != (c.quals).qualnodeids
127+
),
128+
-- build the "paths", which is the list of AND-ed quals that entirely
129+
-- contains another possibly AND-ed quals, and give a score for each
130+
-- path. The scoring method used here is simply the number of
131+
-- columns in the quals.
132+
paths AS (
133+
SELECT DISTINCT *,
134+
coalesce(pg_catalog.array_length((children).qualnodeids, 1),
135+
0) AS weight
136+
FROM nodes
137+
UNION
138+
SELECT DISTINCT p.relid, p.amname, p.parent, p.quals, c.children,
139+
coalesce(pg_catalog.array_length((c.children).qualnodeids, 1),
140+
0) AS weight
141+
FROM nodes p
142+
JOIN nodes c ON (p.children).qualnodeids @> (c.quals).qualnodeids
143+
AND (c.quals).qualnodeids IS NOT NULL
144+
AND (c.quals).qualnodeids != (p.quals).qualnodeids
145+
AND p.amname = c.amname
146+
),
147+
-- compute the final paths.
148+
-- The scoring method used here is simply the sum of total
149+
-- number of columns in each possibly AND-ed quals, so that we can
150+
-- later chose to create indexes that optimize as many queries as
151+
-- possible with as few indexes as possible.
152+
-- We also compute here an access method weight, so that we can later
153+
-- choose a btree index rather than another access method if btree is
154+
-- available.
155+
computed AS (
156+
SELECT relid, amname, parent, quals,
157+
array_agg(to_json(children) ORDER BY weight)
158+
FILTER (WHERE children IS NOT NULL) AS included,
159+
pg_catalog.array_length((quals).qualnodeids, 1)
160+
+ sum(weight) AS path_weight,
161+
CASE amname WHEN 'btree' THEN 1 ELSE 2 END AS amweight
162+
FROM paths
163+
GROUP BY relid, amname, parent, quals
164+
),
165+
-- compute a rank for each final paths, per relation.
166+
final AS (
167+
SELECT relid, amname, parent, quals, included, path_weight, amweight,
168+
row_number() OVER (
169+
PARTITION BY relid
170+
ORDER BY path_weight DESC, amweight) AS rownum
171+
FROM computed
172+
)
173+
-- and finally choose the higher rank final path for each relation.
174+
SELECT relid, amname, parent,
175+
(quals).qualnodeids as quals, (quals).queryids as queryids,
176+
included, path_weight
177+
FROM final
178+
WHERE rownum = 1
179+
LOOP
180+
v_nb_processed := v_nb_processed + 1;
181+
182+
v_ddl := '';
183+
v_quals_todo := '{}';
184+
v_quals_done := '{}';
185+
v_quals_col_done := '{}';
186+
187+
-- put columns from included quals, if any, first for order dependency
188+
DECLARE
189+
v_cur json;
190+
BEGIN
191+
IF rec.included IS NOT NULL THEN
192+
FOR v_cur IN SELECT v->'qualnodeids'
193+
FROM (SELECT * FROM unnest(rec.included)) AS r(v)
194+
ORDER BY pg_catalog.json_array_length(v->'qualnodeids') ASC
195+
LOOP
196+
-- Direct cast from json to bigint is only possible since pg10
197+
FOR v_qualnodeid IN
198+
SELECT pg_catalog.json_array_elements(v_cur)::text::bigint
199+
LOOP
200+
v_quals_todo := v_quals_todo || v_qualnodeid;
201+
END LOOP;
202+
END LOOP;
203+
END IF;
204+
END;
205+
206+
-- and append qual's own columns
207+
v_quals_todo := v_quals_todo || rec.quals;
208+
209+
-- generate the index DDL
210+
FOREACH v_qualnodeid IN ARRAY v_quals_todo LOOP
211+
-- skip quals already present in the index
212+
CONTINUE WHEN v_quals_done @> ARRAY[v_qualnodeid];
213+
214+
-- skip other quals for the same column
215+
v_col := @extschema@.pg_qualstats_get_idx_col(v_qualnodeid, false);
216+
CONTINUE WHEN v_quals_col_done @> ARRAY[v_col];
217+
218+
-- mark this qual as present in a generated index so it's ignore at
219+
-- next round of best quals to optimize
220+
v_processed := pg_catalog.array_append(v_processed, v_qualnodeid);
221+
222+
-- mark this qual and col as present in this index
223+
v_quals_done := v_quals_done || v_qualnodeid;
224+
v_quals_col_done := v_quals_col_done || v_col;
225+
226+
-- if underlying table has been dropped, stop here
227+
CONTINUE WHEN coalesce(v_col, '') = '';
228+
229+
-- append the column to the index
230+
IF v_ddl != '' THEN v_ddl := v_ddl || ', '; END IF;
231+
v_ddl := v_ddl || @extschema@.pg_qualstats_get_idx_col(v_qualnodeid, true);
232+
END LOOP;
233+
234+
-- if underlying table has been dropped, skip this (broken) index
235+
CONTINUE WHEN coalesce(v_ddl, '') = '';
236+
237+
-- generate the full CREATE INDEX ddl
238+
v_ddl = pg_catalog.format('CREATE INDEX ON %s USING %I (%s)',
239+
@extschema@.pg_qualstats_get_qualnode_rel(v_qualnodeid), rec.amname, v_ddl);
240+
241+
-- get the underlyings queryid(s)
242+
DECLARE
243+
v_queryid text;
244+
v_cur json;
245+
BEGIN
246+
v_queryids = rec.queryids;
247+
IF rec.included IS NOT NULL THEN
248+
FOREACH v_cur IN ARRAY rec.included LOOP
249+
-- Direct cast from json to bigint is only possible since pg10
250+
FOR v_queryid IN SELECT pg_catalog.json_array_elements(v_cur->'queryids')::text
251+
LOOP
252+
CONTINUE WHEN v_queryid = 'null';
253+
v_queryids := v_queryids || v_queryid::text::bigint;
254+
END LOOP;
255+
END LOOP;
256+
END IF;
257+
END;
258+
259+
-- remove any duplicates
260+
SELECT pg_catalog.array_agg(DISTINCT v) INTO v_queryids
261+
FROM (SELECT unnest(v_queryids)) s(v);
262+
263+
-- sanitize the queryids
264+
IF v_queryids IS NULL OR v_queryids = '{null}' THEN
265+
v_queryids = '{}';
266+
END IF;
267+
268+
-- and finally append the index to the list of generated indexes
269+
v_indexes := pg_catalog.array_append(v_indexes,
270+
pg_catalog.json_build_object(
271+
'ddl', v_ddl,
272+
'queryids', v_queryids
273+
)
274+
);
275+
END LOOP;
276+
END LOOP;
277+
278+
RETURN pg_catalog.json_build_object(
279+
'indexes', v_indexes,
280+
'unoptimised', v_unoptimised);
281+
END;
282+
$_$ LANGUAGE plpgsql; /* end of pg_qualstats_index_advisor */

0 commit comments

Comments
 (0)