|
| 1 | +-- complain if script is sourced in psql, rather than via CREATE EXTENSION |
| 2 | +\echo Use "ALTER EXTENSION pg_qualstats UPDATE" to load this file. \quit |
| 3 | + |
| 4 | +CREATE OR REPLACE FUNCTION @extschema@.pg_qualstats_index_advisor ( |
| 5 | + min_filter integer DEFAULT 1000, |
| 6 | + min_selectivity integer DEFAULT 30, |
| 7 | + forbidden_am text[] DEFAULT '{}') |
| 8 | + RETURNS json |
| 9 | +AS $_$ |
| 10 | +DECLARE |
| 11 | + v_processed bigint[] = '{}'; |
| 12 | + v_indexes json[] = '{}'; |
| 13 | + v_unoptimised json[] = '{}'; |
| 14 | + |
| 15 | + rec record; |
| 16 | + v_nb_processed integer = 1; |
| 17 | + |
| 18 | + v_ddl text; |
| 19 | + v_col text; |
| 20 | + v_qualnodeid bigint; |
| 21 | + v_quals_todo bigint[]; |
| 22 | + v_quals_done bigint[]; |
| 23 | + v_quals_col_done text[]; |
| 24 | + v_queryids bigint[] = '{}'; |
| 25 | +BEGIN |
| 26 | + -- sanity checks and default values |
| 27 | + SELECT coalesce(min_filter, 1000), coalesce(min_selectivity, 30), |
| 28 | + coalesce(forbidden_am, '{}') |
| 29 | + INTO min_filter, min_selectivity, forbidden_am; |
| 30 | + |
| 31 | + -- don't try to generate hash indexes Before pg 10, as those are only WAL |
| 32 | + -- logged since pg 11. |
| 33 | + IF pg_catalog.current_setting('server_version_num')::bigint < 100000 THEN |
| 34 | + forbidden_am := array_append(forbidden_am, 'hash'); |
| 35 | + END IF; |
| 36 | + |
| 37 | + -- first find out unoptimizable quals. |
| 38 | + -- We need an array of json containing the per-qual info, and a single |
| 39 | + -- array containing all the underlying qualnodeids, so we need to create |
| 40 | + -- the wanted final object manually as we can't have two different grouping |
| 41 | + -- approach. |
| 42 | + FOR rec IN WITH src AS (SELECT DISTINCT qualnodeid, |
| 43 | + (coalesce(lrelid, rrelid), coalesce(lattnum, rattnum), |
| 44 | + opno, eval_type)::@extschema@.qual AS qual, |
| 45 | + queryid |
| 46 | + FROM @extschema@.pg_qualstats() q |
| 47 | + JOIN pg_catalog.pg_database d ON q.dbid = d.oid |
| 48 | + LEFT JOIN pg_catalog.pg_operator op ON op.oid = q.opno |
| 49 | + LEFT JOIN pg_catalog.pg_amop amop ON amop.amopopr = op.oid |
| 50 | + LEFT JOIN pg_catalog.pg_am am ON am.oid = amop.amopmethod |
| 51 | + WHERE d.datname = current_database() |
| 52 | + AND eval_type = 'f' |
| 53 | + AND coalesce(lrelid, rrelid) != 0 |
| 54 | + AND amname IS NULL |
| 55 | + ) |
| 56 | + SELECT pg_catalog.json_build_object( |
| 57 | + 'qual', @extschema@.pg_qualstats_deparse_qual(qual), |
| 58 | + -- be careful to generate an empty array if no queryid availiable |
| 59 | + 'queryids', |
| 60 | + coalesce(pg_catalog.array_agg(DISTINCT queryid) |
| 61 | + FILTER (WHERE queryid IS NOT NULL), '{}') |
| 62 | + ) AS obj, |
| 63 | + array_agg(qualnodeid) AS qualnodeids |
| 64 | + FROM src |
| 65 | + GROUP BY qual |
| 66 | + LOOP |
| 67 | + v_unoptimised := array_append(v_unoptimised, rec.obj); |
| 68 | + v_processed := array_cat(v_processed, rec.qualnodeids); |
| 69 | + END LOOP; |
| 70 | + |
| 71 | + -- The index suggestion is done in multiple iteration, by scoring for each |
| 72 | + -- relation containing interesting quals a path of possibly AND-ed quals |
| 73 | + -- that contains other possibly AND-ed quals. Only the higher score path |
| 74 | + -- will be used to create an index, so we can then compute another set of |
| 75 | + -- paths ignoring the quals that are now optimized with an index. |
| 76 | + WHILE v_nb_processed > 0 LOOP |
| 77 | + v_nb_processed := 0; |
| 78 | + FOR rec IN |
| 79 | + -- first, find quals that seems worth to optimize along with the |
| 80 | + -- possible access methods, discarding any qualnode that are marked as |
| 81 | + -- already processed. Also apply access method restriction. |
| 82 | + WITH pgqs AS ( |
| 83 | + SELECT dbid, amname, qualid, qualnodeid, |
| 84 | + (coalesce(lrelid, rrelid), coalesce(lattnum, rattnum), |
| 85 | + opno, eval_type)::@extschema@.qual AS qual, queryid, |
| 86 | + round(avg(execution_count)) AS execution_count, |
| 87 | + sum(occurences) AS occurences, |
| 88 | + round(sum(nbfiltered)::numeric / sum(occurences)) AS avg_filter, |
| 89 | + CASE WHEN sum(execution_count) = 0 |
| 90 | + THEN 0 |
| 91 | + ELSE round(sum(nbfiltered::numeric) / sum(execution_count) * 100) |
| 92 | + END AS avg_selectivity |
| 93 | + FROM @extschema@.pg_qualstats() q |
| 94 | + JOIN pg_catalog.pg_database d ON q.dbid = d.oid |
| 95 | + JOIN pg_catalog.pg_operator op ON op.oid = q.opno |
| 96 | + JOIN pg_catalog.pg_amop amop ON amop.amopopr = op.oid |
| 97 | + JOIN pg_catalog.pg_am am ON am.oid = amop.amopmethod |
| 98 | + WHERE d.datname = current_database() |
| 99 | + AND eval_type = 'f' |
| 100 | + AND amname != ALL (forbidden_am) |
| 101 | + AND coalesce(lrelid, rrelid) != 0 |
| 102 | + AND qualnodeid != ALL(v_processed) |
| 103 | + GROUP BY dbid, amname, qualid, qualnodeid, lrelid, rrelid, |
| 104 | + lattnum, rattnum, opno, eval_type, queryid |
| 105 | + ), |
| 106 | + -- apply cardinality and selectivity restrictions |
| 107 | + filtered AS ( |
| 108 | + SELECT (qual).relid, amname, coalesce(qualid, qualnodeid) AS parent, |
| 109 | + count(*) AS weight, |
| 110 | + (array_agg(DISTINCT qualnodeid), |
| 111 | + array_agg(queryid) |
| 112 | + )::@extschema@.adv_quals AS quals |
| 113 | + FROM pgqs |
| 114 | + WHERE avg_filter >= min_filter |
| 115 | + AND avg_selectivity >= min_selectivity |
| 116 | + GROUP BY (qual).relid, amname, parent |
| 117 | + ), |
| 118 | + -- for each possibly AND-ed qual, build the list of included qualnodeid |
| 119 | + nodes AS ( |
| 120 | + SELECT p.relid, p.amname, p.parent, p.quals, |
| 121 | + c.quals AS children |
| 122 | + FROM filtered p |
| 123 | + LEFT JOIN filtered c ON (p.quals).qualnodeids @> (c.quals).qualnodeids |
| 124 | + AND p.amname = c.amname |
| 125 | + AND p.parent != c.parent |
| 126 | + AND (p.quals).qualnodeids != (c.quals).qualnodeids |
| 127 | + ), |
| 128 | + -- build the "paths", which is the list of AND-ed quals that entirely |
| 129 | + -- contains another possibly AND-ed quals, and give a score for each |
| 130 | + -- path. The scoring method used here is simply the number of |
| 131 | + -- columns in the quals. |
| 132 | + paths AS ( |
| 133 | + SELECT DISTINCT *, |
| 134 | + coalesce(pg_catalog.array_length((children).qualnodeids, 1), |
| 135 | + 0) AS weight |
| 136 | + FROM nodes |
| 137 | + UNION |
| 138 | + SELECT DISTINCT p.relid, p.amname, p.parent, p.quals, c.children, |
| 139 | + coalesce(pg_catalog.array_length((c.children).qualnodeids, 1), |
| 140 | + 0) AS weight |
| 141 | + FROM nodes p |
| 142 | + JOIN nodes c ON (p.children).qualnodeids @> (c.quals).qualnodeids |
| 143 | + AND (c.quals).qualnodeids IS NOT NULL |
| 144 | + AND (c.quals).qualnodeids != (p.quals).qualnodeids |
| 145 | + AND p.amname = c.amname |
| 146 | + ), |
| 147 | + -- compute the final paths. |
| 148 | + -- The scoring method used here is simply the sum of total |
| 149 | + -- number of columns in each possibly AND-ed quals, so that we can |
| 150 | + -- later chose to create indexes that optimize as many queries as |
| 151 | + -- possible with as few indexes as possible. |
| 152 | + -- We also compute here an access method weight, so that we can later |
| 153 | + -- choose a btree index rather than another access method if btree is |
| 154 | + -- available. |
| 155 | + computed AS ( |
| 156 | + SELECT relid, amname, parent, quals, |
| 157 | + array_agg(to_json(children) ORDER BY weight) |
| 158 | + FILTER (WHERE children IS NOT NULL) AS included, |
| 159 | + pg_catalog.array_length((quals).qualnodeids, 1) |
| 160 | + + sum(weight) AS path_weight, |
| 161 | + CASE amname WHEN 'btree' THEN 1 ELSE 2 END AS amweight |
| 162 | + FROM paths |
| 163 | + GROUP BY relid, amname, parent, quals |
| 164 | + ), |
| 165 | + -- compute a rank for each final paths, per relation. |
| 166 | + final AS ( |
| 167 | + SELECT relid, amname, parent, quals, included, path_weight, amweight, |
| 168 | + row_number() OVER ( |
| 169 | + PARTITION BY relid |
| 170 | + ORDER BY path_weight DESC, amweight) AS rownum |
| 171 | + FROM computed |
| 172 | + ) |
| 173 | + -- and finally choose the higher rank final path for each relation. |
| 174 | + SELECT relid, amname, parent, |
| 175 | + (quals).qualnodeids as quals, (quals).queryids as queryids, |
| 176 | + included, path_weight |
| 177 | + FROM final |
| 178 | + WHERE rownum = 1 |
| 179 | + LOOP |
| 180 | + v_nb_processed := v_nb_processed + 1; |
| 181 | + |
| 182 | + v_ddl := ''; |
| 183 | + v_quals_todo := '{}'; |
| 184 | + v_quals_done := '{}'; |
| 185 | + v_quals_col_done := '{}'; |
| 186 | + |
| 187 | + -- put columns from included quals, if any, first for order dependency |
| 188 | + DECLARE |
| 189 | + v_cur json; |
| 190 | + BEGIN |
| 191 | + IF rec.included IS NOT NULL THEN |
| 192 | + FOR v_cur IN SELECT v->'qualnodeids' |
| 193 | + FROM (SELECT * FROM unnest(rec.included)) AS r(v) |
| 194 | + ORDER BY pg_catalog.json_array_length(v->'qualnodeids') ASC |
| 195 | + LOOP |
| 196 | + -- Direct cast from json to bigint is only possible since pg10 |
| 197 | + FOR v_qualnodeid IN |
| 198 | + SELECT pg_catalog.json_array_elements(v_cur)::text::bigint |
| 199 | + LOOP |
| 200 | + v_quals_todo := v_quals_todo || v_qualnodeid; |
| 201 | + END LOOP; |
| 202 | + END LOOP; |
| 203 | + END IF; |
| 204 | + END; |
| 205 | + |
| 206 | + -- and append qual's own columns |
| 207 | + v_quals_todo := v_quals_todo || rec.quals; |
| 208 | + |
| 209 | + -- generate the index DDL |
| 210 | + FOREACH v_qualnodeid IN ARRAY v_quals_todo LOOP |
| 211 | + -- skip quals already present in the index |
| 212 | + CONTINUE WHEN v_quals_done @> ARRAY[v_qualnodeid]; |
| 213 | + |
| 214 | + -- skip other quals for the same column |
| 215 | + v_col := @extschema@.pg_qualstats_get_idx_col(v_qualnodeid, false); |
| 216 | + CONTINUE WHEN v_quals_col_done @> ARRAY[v_col]; |
| 217 | + |
| 218 | + -- mark this qual as present in a generated index so it's ignore at |
| 219 | + -- next round of best quals to optimize |
| 220 | + v_processed := pg_catalog.array_append(v_processed, v_qualnodeid); |
| 221 | + |
| 222 | + -- mark this qual and col as present in this index |
| 223 | + v_quals_done := v_quals_done || v_qualnodeid; |
| 224 | + v_quals_col_done := v_quals_col_done || v_col; |
| 225 | + |
| 226 | + -- if underlying table has been dropped, stop here |
| 227 | + CONTINUE WHEN coalesce(v_col, '') = ''; |
| 228 | + |
| 229 | + -- append the column to the index |
| 230 | + IF v_ddl != '' THEN v_ddl := v_ddl || ', '; END IF; |
| 231 | + v_ddl := v_ddl || @extschema@.pg_qualstats_get_idx_col(v_qualnodeid, true); |
| 232 | + END LOOP; |
| 233 | + |
| 234 | + -- if underlying table has been dropped, skip this (broken) index |
| 235 | + CONTINUE WHEN coalesce(v_ddl, '') = ''; |
| 236 | + |
| 237 | + -- generate the full CREATE INDEX ddl |
| 238 | + v_ddl = pg_catalog.format('CREATE INDEX ON %s USING %I (%s)', |
| 239 | + @extschema@.pg_qualstats_get_qualnode_rel(v_qualnodeid), rec.amname, v_ddl); |
| 240 | + |
| 241 | + -- get the underlyings queryid(s) |
| 242 | + DECLARE |
| 243 | + v_queryid text; |
| 244 | + v_cur json; |
| 245 | + BEGIN |
| 246 | + v_queryids = rec.queryids; |
| 247 | + IF rec.included IS NOT NULL THEN |
| 248 | + FOREACH v_cur IN ARRAY rec.included LOOP |
| 249 | + -- Direct cast from json to bigint is only possible since pg10 |
| 250 | + FOR v_queryid IN SELECT pg_catalog.json_array_elements(v_cur->'queryids')::text |
| 251 | + LOOP |
| 252 | + CONTINUE WHEN v_queryid = 'null'; |
| 253 | + v_queryids := v_queryids || v_queryid::text::bigint; |
| 254 | + END LOOP; |
| 255 | + END LOOP; |
| 256 | + END IF; |
| 257 | + END; |
| 258 | + |
| 259 | + -- remove any duplicates |
| 260 | + SELECT pg_catalog.array_agg(DISTINCT v) INTO v_queryids |
| 261 | + FROM (SELECT unnest(v_queryids)) s(v); |
| 262 | + |
| 263 | + -- sanitize the queryids |
| 264 | + IF v_queryids IS NULL OR v_queryids = '{null}' THEN |
| 265 | + v_queryids = '{}'; |
| 266 | + END IF; |
| 267 | + |
| 268 | + -- and finally append the index to the list of generated indexes |
| 269 | + v_indexes := pg_catalog.array_append(v_indexes, |
| 270 | + pg_catalog.json_build_object( |
| 271 | + 'ddl', v_ddl, |
| 272 | + 'queryids', v_queryids |
| 273 | + ) |
| 274 | + ); |
| 275 | + END LOOP; |
| 276 | + END LOOP; |
| 277 | + |
| 278 | + RETURN pg_catalog.json_build_object( |
| 279 | + 'indexes', v_indexes, |
| 280 | + 'unoptimised', v_unoptimised); |
| 281 | +END; |
| 282 | +$_$ LANGUAGE plpgsql; /* end of pg_qualstats_index_advisor */ |
0 commit comments