From 1ff90c2bc459e88ad36d11626d8714656024770c Mon Sep 17 00:00:00 2001 From: Rex Johnston Date: Fri, 31 Oct 2025 14:33:01 +1200 Subject: [PATCH] MDEV-37936 Followup to MDEV-36321: out_rows for GROUP BY: use of item names? MDEV-36321 compared item name strings when looking for key use in items of the select list within a derived table. Here we remove this type of comparison and compare the underlying fields and their position. --- mysql-test/main/derived_opt.result | 101 ++++++++++++++++++++++++++--- mysql-test/main/derived_opt.test | 81 ++++++++++++++++++++--- sql/opt_group_by_cardinality.cc | 97 ++++++++++++++++++--------- 3 files changed, 232 insertions(+), 47 deletions(-) diff --git a/mysql-test/main/derived_opt.result b/mysql-test/main/derived_opt.result index 984c8070870d0..80dd67231579f 100644 --- a/mysql-test/main/derived_opt.result +++ b/mysql-test/main/derived_opt.result @@ -595,7 +595,7 @@ select * from t2, (select max(value), grp_id from t1 group by grp_id) DT where -t2.a= DT.grp_id; +t2.a = DT.grp_id; a max(value) grp_id 1 100 1 2 100 2 @@ -616,6 +616,30 @@ t "rec_per_key_estimate": 1 } ] +explain +with DT as +( +select max(value), 1/grp_id as fn from t1 group by 2 +) +select * from t2, DT where t2.a = DT.fn; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t2 ALL NULL NULL NULL NULL 5 Using where +1 PRIMARY ref key0 key0 4 test.t2.a 1 Using where +2 DERIVED t1 ALL NULL NULL NULL NULL 10000 Using temporary; Using filesort +select +json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t +from information_schema.optimizer_trace; +t +[ + { + "table_alias": "DT", + "key_name": "key0", + "key_parts": 1, + "select": + ["group_list_in_key"], + "rec_per_key_estimate": 1 + } +] # Same as above, but try a UNION: select * from t2, @@ -623,7 +647,7 @@ t2, union all select max(value), grp_id from t1 group by grp_id) DT where -t2.a= DT.grp_id; +t2.a = DT.grp_id; a max(value) grp_id 1 100 1 1 100 1 @@ -660,7 +684,7 @@ t2, union all select max(value), grp_id from t1 group by MOD(grp_id,2)) DT where -t2.a= DT.grp_id; +t2.a = DT.grp_id; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 5 Using where 1 PRIMARY ref key0 key0 5 test.t2.a 101 @@ -689,7 +713,7 @@ select * from t2, (select grp_id, max(value) as maxval from v1 group by grp_id) DT where -t2.a= DT.grp_id; +t2.a = DT.grp_id; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 5 Using where 1 PRIMARY ref key0 key0 5 test.t2.a 1 @@ -701,7 +725,7 @@ select * from t2, (select grp_id, max(value) as maxval from cte1 group by grp_id) DT where -t2.a= DT.grp_id; +t2.a = DT.grp_id; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 5 Using where 1 PRIMARY ref key0 key0 5 test.t2.a 1 @@ -718,7 +742,7 @@ where t1.grp_id = t3.b group by grp_id ) DT where -t2.a= DT.grp_id; +t2.a = DT.grp_id; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 5 Using where 1 PRIMARY ref key0 key0 5 test.t2.a 1 @@ -733,7 +757,7 @@ t2, (select max(value) as maxval, grp_id from t1 group by grp_id) DT where t2.col2=maxval and -t2.a= DT.grp_id; +t2.a = DT.grp_id; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 5 Using where 1 PRIMARY ref key0 key0 10 test.t2.col2,test.t2.a 1 @@ -757,12 +781,29 @@ select * from t2, (select grp_id, max(value) as maxval from t1 group by grp_id) DT where -t2.col2=maxval and -t2.a= DT.grp_id; +t2.col2 = maxval and +t2.a = DT.grp_id; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 5 Using where 1 PRIMARY ref key0 key0 10 test.t2.a,test.t2.col2 1 2 DERIVED t1 ALL grp_id NULL NULL NULL 10000 Using temporary; Using filesort +explain +select * from +t2 join +( +select max(value) as X, grp_id from t1 group by grp_id +union all +select distinct 'Total' as X, b as grp_id from t3 +) DT +on t2.a = DT.grp_id +where DT.X = 100; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t2 ALL NULL NULL NULL NULL 5 Using where +1 PRIMARY ref key0 key0 5 test.t2.a 2 Using where +2 DERIVED t1 ALL NULL NULL NULL NULL 10000 Using temporary; Using filesort +3 UNION NULL NULL NULL NULL NULL NULL NULL Impossible WHERE +Warnings: +Warning 1292 Truncated incorrect DECIMAL value: 'Total' delete from t1; insert into t1 select 1, a.seq from seq_1_to_10 a; analyze table t1; @@ -1060,6 +1101,48 @@ t } ] drop table t1, t2, t3, t4; +create table t1 ( +grp_id int, +value int, +index (grp_id) +); +insert into t1 select +A.seq, B.seq +from +seq_1_to_100 A, +seq_1_to_100 B; +create table t1a( +grp_id_2 int, +value int, +index (grp_id_2) +); +insert into t1a select * from t1; +create table t2 (a int); +insert into t2 select seq from seq_1_to_5; +analyze table t1,t1a, t2; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status Table is already up to date +test.t1a analyze status Engine-independent statistics collected +test.t1a analyze status Table is already up to date +test.t2 analyze status Engine-independent statistics collected +test.t2 analyze status OK +# must have type=ref, rows=2 (not 10): +explain +select * from +t2, +( +select max(value), grp_id from t1 group by grp_id +union all +select max(value), grp_id_2 from t1a group by grp_id_2 +) DT +where t2.a= DT.grp_id; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t2 ALL NULL NULL NULL NULL 5 Using where +1 PRIMARY ref key0 key0 5 test.t2.a 2 +2 DERIVED t1 ALL NULL NULL NULL NULL 10000 Using temporary; Using filesort +3 UNION t1a ALL NULL NULL NULL NULL 10000 Using temporary; Using filesort +drop table t1, t1a, t2; # # End of 11.4 tests # diff --git a/mysql-test/main/derived_opt.test b/mysql-test/main/derived_opt.test index 99c18a840d17e..964ddc3a2f11c 100644 --- a/mysql-test/main/derived_opt.test +++ b/mysql-test/main/derived_opt.test @@ -473,12 +473,22 @@ select * from t2, (select max(value), grp_id from t1 group by grp_id) DT where - t2.a= DT.grp_id; + t2.a = DT.grp_id; select json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t from information_schema.optimizer_trace; +explain +with DT as +( + select max(value), 1/grp_id as fn from t1 group by 2 +) +select * from t2, DT where t2.a = DT.fn; + +select + json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t +from information_schema.optimizer_trace; --echo # Same as above, but try a UNION: select * from @@ -487,7 +497,7 @@ select * from union all select max(value), grp_id from t1 group by grp_id) DT where - t2.a= DT.grp_id; + t2.a = DT.grp_id; select json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t from information_schema.optimizer_trace; @@ -500,7 +510,7 @@ select * from union all select max(value), grp_id from t1 group by MOD(grp_id,2)) DT where - t2.a= DT.grp_id; + t2.a = DT.grp_id; select json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t from information_schema.optimizer_trace; @@ -514,7 +524,7 @@ select * from t2, (select grp_id, max(value) as maxval from v1 group by grp_id) DT where - t2.a= DT.grp_id; + t2.a = DT.grp_id; drop view v1; @@ -524,7 +534,7 @@ select * from t2, (select grp_id, max(value) as maxval from cte1 group by grp_id) DT where - t2.a= DT.grp_id; + t2.a = DT.grp_id; explain select * from @@ -538,7 +548,7 @@ select * from group by grp_id ) DT where - t2.a= DT.grp_id; + t2.a = DT.grp_id; --echo # Example with equalities on GROUP BY columns and other columns --echo # Must produce {table=, ref=test.t2.col2,test.t2.a, rows=1} @@ -549,7 +559,7 @@ select * from (select max(value) as maxval, grp_id from t1 group by grp_id) DT where t2.col2=maxval and - t2.a= DT.grp_id; + t2.a = DT.grp_id; select json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t from information_schema.optimizer_trace; @@ -561,9 +571,21 @@ select * from t2, (select grp_id, max(value) as maxval from t1 group by grp_id) DT where - t2.col2=maxval and - t2.a= DT.grp_id; + t2.col2 = maxval and + t2.a = DT.grp_id; +# room for improvement here, 'Total' != 100, will, ignoring the impossible +# condition in the 2nd select, we should see 2 rows per key in DT +explain +select * from + t2 join + ( + select max(value) as X, grp_id from t1 group by grp_id + union all + select distinct 'Total' as X, b as grp_id from t3 + ) DT + on t2.a = DT.grp_id + where DT.X = 100; delete from t1; insert into t1 select 1, a.seq from seq_1_to_10 a; @@ -709,6 +731,47 @@ from information_schema.optimizer_trace; drop table t1, t2, t3, t4; +## +## Tests for item names +## + +create table t1 ( + grp_id int, + value int, + index (grp_id) +); + +insert into t1 select + A.seq, B.seq +from + seq_1_to_100 A, + seq_1_to_100 B; + +create table t1a( + grp_id_2 int, + value int, + index (grp_id_2) +); +insert into t1a select * from t1; + +create table t2 (a int); +insert into t2 select seq from seq_1_to_5; + +analyze table t1,t1a, t2; + +--echo # must have type=ref, rows=2 (not 10): +explain +select * from +t2, +( + select max(value), grp_id from t1 group by grp_id + union all + select max(value), grp_id_2 from t1a group by grp_id_2 +) DT +where t2.a= DT.grp_id; + +drop table t1, t1a, t2; + --echo # --echo # End of 11.4 tests --echo # diff --git a/sql/opt_group_by_cardinality.cc b/sql/opt_group_by_cardinality.cc index 4ecb14884c286..40b5e45846903 100644 --- a/sql/opt_group_by_cardinality.cc +++ b/sql/opt_group_by_cardinality.cc @@ -384,20 +384,20 @@ double estimate_table_group_cardinality(JOIN *join, Item ***group_list, /** @brief - Return the number of keypart that matches the item, -1 if there is no match + Check if table field number $idx is a part of the index keyinfo. + Return TRUE if it is, FALSE otherwise */ -static int item_index_in_key(Item *item, const KEY *keyinfo, uint key_parts) +static bool item_index_in_key(field_index_t idx, + const KEY *keyinfo, + uint key_parts) { - if (item->real_item()->type() == Item::FIELD_ITEM) + for (field_index_t i= 0; i < key_parts; i++) { - for (uint i= 0; i < key_parts; i++) - { - if (!cmp(item->name, keyinfo->key_part[i].field->field_name)) - return (int)i; - } + if (idx == keyinfo->key_part[i].field->field_index) + return TRUE; } - return -1; + return FALSE; } @@ -409,11 +409,37 @@ static int item_index_in_key(Item *item, const KEY *keyinfo, uint key_parts) static bool all_list_contained_in_keyparts(const KEY *keyinfo, uint key_parts, + SELECT_LEX *sel, SQL_I_List *list) { for (ORDER *grp= list->first; grp; grp= grp->next) { - if (item_index_in_key((*grp->item), keyinfo, key_parts) < 0) + // Find this GROUP BY entry in the select list: + List_iterator it(sel->item_list); + Item *sel_item; + field_index_t idx= 0; + bool found= false; + while ((sel_item= it++)) + { + // compare the underlying fields to obtain the index + if (sel_item->eq(*grp->item, {true})) + { + found= true; + break; + } + idx++; + } + + if (!found) + return false; + + /* + Ok, the item has number idx in the select list (I mean the original + select list as was specified in the query). + The indexes in that list are also indexes in the temporary table. + */ + + if (!item_index_in_key(idx, keyinfo, key_parts)) return FALSE; } return TRUE; @@ -474,35 +500,48 @@ void infer_derived_key_statistics(st_select_lex_unit* derived, do { bool this_select_covered= FALSE; + uint non_const_select_elements= 0; /* - This is a SELECT DISTINCT query with $key_parts elements in the - select list. This select in the union will produce one record + This is a SELECT DISTINCT query with $key_parts non-const elements in + the select list. This select in the union will produce one record per key. @todo If we come across multiple SELECT DISTINCT selects in this union have a problem in that we do not know anything about how they might intersect */ - if (key_parts == select->item_list.elements && - select->options & SELECT_DISTINCT) + if (select->options & SELECT_DISTINCT) { - select_proc.add("distinct_in_query_block"); - this_select_covered= TRUE; - rec_per_key++; - } + List_iterator it(select->item_list); + for (Item *sel_item; (sel_item= it++); ) + { + if (!sel_item->const_item()) + non_const_select_elements++; + } - /* - This is a grouping select and the group list is a subset of our key. - Our key can have additional fields, the rows will still be unique. - */ - if (select->group_list.elements && - all_list_contained_in_keyparts(keyinfo, - key_parts, - &select->group_list)) + if (key_parts >= non_const_select_elements) + { + select_proc.add("distinct_in_query_block"); + this_select_covered= TRUE; + rec_per_key++; + } + } + else { - select_proc.add("group_list_in_key"); - this_select_covered= TRUE; - rec_per_key++; + /* + This is a grouping select and the group list is a subset of our key. + Our key can have additional fields, the rows will still be unique. + */ + if (select->group_list.elements && + all_list_contained_in_keyparts(keyinfo, + key_parts, + select, + &select->group_list)) + { + select_proc.add("group_list_in_key"); + this_select_covered= TRUE; + rec_per_key++; + } } if (!this_select_covered)