From 1ff90c2bc459e88ad36d11626d8714656024770c Mon Sep 17 00:00:00 2001
From: Rex Johnston <rex.johnston@mariadb.com>
Date: Fri, 31 Oct 2025 14:33:01 +1200
Subject: [PATCH] MDEV-37936 Followup to MDEV-36321: out_rows for GROUP BY: use
 of item names?

MDEV-36321 compared item name strings when looking for key use in items
of the select list within a derived table.  Here we remove this type of
comparison and compare the underlying fields and their position.
---
 mysql-test/main/derived_opt.result | 101 ++++++++++++++++++++++++++---
 mysql-test/main/derived_opt.test   |  81 ++++++++++++++++++++---
 sql/opt_group_by_cardinality.cc    |  97 ++++++++++++++++++---------
 3 files changed, 232 insertions(+), 47 deletions(-)
diff --git a/mysql-test/main/derived_opt.result b/mysql-test/main/derived_opt.result
index 984c8070870d0..80dd67231579f 100644
--- a/mysql-test/main/derived_opt.result
+++ b/mysql-test/main/derived_opt.result
@@ -595,7 +595,7 @@ select * from
 t2,
 (select max(value), grp_id from t1 group by grp_id) DT
 where
-t2.a= DT.grp_id;
+t2.a = DT.grp_id;
 a	max(value)	grp_id
 1	100	1
 2	100	2
@@ -616,6 +616,30 @@ t
         "rec_per_key_estimate": 1
     }
 ]
+explain
+with DT as 
+(
+select max(value), 1/grp_id as fn from t1 group by 2
+)
+select * from t2, DT where t2.a = DT.fn;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	5	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	4	test.t2.a	1	Using where
+2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	10000	Using temporary; Using filesort
+select
+json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
+from information_schema.optimizer_trace;
+t
+[
+    {
+        "table_alias": "DT",
+        "key_name": "key0",
+        "key_parts": 1,
+        "select": 
+        ["group_list_in_key"],
+        "rec_per_key_estimate": 1
+    }
+]
 # Same as above, but try a UNION:
 select * from
 t2,
@@ -623,7 +647,7 @@ t2,
 union all
 select max(value), grp_id from t1 group by grp_id) DT
 where
-t2.a= DT.grp_id;
+t2.a = DT.grp_id;
 a	max(value)	grp_id
 1	100	1
 1	100	1
@@ -660,7 +684,7 @@ t2,
 union all
 select max(value), grp_id from t1 group by MOD(grp_id,2)) DT
 where
-t2.a= DT.grp_id;
+t2.a = DT.grp_id;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	5	Using where
 1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	101	
@@ -689,7 +713,7 @@ select * from
 t2,
 (select grp_id, max(value) as maxval from v1 group by grp_id) DT
 where
-t2.a= DT.grp_id;
+t2.a = DT.grp_id;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	5	Using where
 1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	1	
@@ -701,7 +725,7 @@ select * from
 t2,
 (select grp_id, max(value) as maxval from cte1 group by grp_id) DT
 where
-t2.a= DT.grp_id;
+t2.a = DT.grp_id;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	5	Using where
 1	PRIMARY	<derived3>	ref	key0	key0	5	test.t2.a	1	
@@ -718,7 +742,7 @@ where t1.grp_id = t3.b
 group by grp_id
 ) DT
 where
-t2.a= DT.grp_id;
+t2.a = DT.grp_id;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	5	Using where
 1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	1	
@@ -733,7 +757,7 @@ t2,
 (select max(value) as maxval, grp_id from t1 group by grp_id) DT
 where
 t2.col2=maxval and
-t2.a= DT.grp_id;
+t2.a = DT.grp_id;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	5	Using where
 1	PRIMARY	<derived2>	ref	key0	key0	10	test.t2.col2,test.t2.a	1	
@@ -757,12 +781,29 @@ select * from
 t2,
 (select grp_id, max(value) as maxval from t1 group by grp_id) DT
 where
-t2.col2=maxval and
-t2.a= DT.grp_id;
+t2.col2 = maxval and
+t2.a = DT.grp_id;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	5	Using where
 1	PRIMARY	<derived2>	ref	key0	key0	10	test.t2.a,test.t2.col2	1	
 2	DERIVED	t1	ALL	grp_id	NULL	NULL	NULL	10000	Using temporary; Using filesort
+explain
+select * from
+t2 join
+(
+select max(value) as X, grp_id from t1 group by grp_id
+union all
+select distinct 'Total' as X, b as grp_id from t3
+) DT
+on t2.a = DT.grp_id
+where DT.X = 100;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	5	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	2	Using where
+2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	10000	Using temporary; Using filesort
+3	UNION	NULL	NULL	NULL	NULL	NULL	NULL	NULL	Impossible WHERE
+Warnings:
+Warning	1292	Truncated incorrect DECIMAL value: 'Total'
 delete from t1;
 insert into t1 select 1, a.seq from seq_1_to_10 a;
 analyze table t1;
@@ -1060,6 +1101,48 @@ t
     }
 ]
 drop table t1, t2, t3, t4;
+create table t1 (
+grp_id int, 
+value int,
+index (grp_id)
+);
+insert into t1 select 
+A.seq, B.seq
+from 
+seq_1_to_100 A, 
+seq_1_to_100 B;
+create table t1a(
+grp_id_2 int, 
+value int,
+index (grp_id_2)
+);
+insert into t1a select * from t1;
+create table t2 (a int);
+insert into t2 select seq from seq_1_to_5;
+analyze table t1,t1a, t2;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	Engine-independent statistics collected
+test.t1	analyze	status	Table is already up to date
+test.t1a	analyze	status	Engine-independent statistics collected
+test.t1a	analyze	status	Table is already up to date
+test.t2	analyze	status	Engine-independent statistics collected
+test.t2	analyze	status	OK
+# <derived2> must have type=ref, rows=2 (not 10):
+explain
+select * from
+t2,
+(
+select max(value), grp_id from t1 group by grp_id
+union all
+select max(value), grp_id_2 from t1a group by grp_id_2
+)  DT
+where t2.a= DT.grp_id;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	5	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t2.a	2	
+2	DERIVED	t1	ALL	NULL	NULL	NULL	NULL	10000	Using temporary; Using filesort
+3	UNION	t1a	ALL	NULL	NULL	NULL	NULL	10000	Using temporary; Using filesort
+drop table t1, t1a, t2;
 #
 # End of 11.4 tests
 #
diff --git a/mysql-test/main/derived_opt.test b/mysql-test/main/derived_opt.test
index 99c18a840d17e..964ddc3a2f11c 100644
--- a/mysql-test/main/derived_opt.test
+++ b/mysql-test/main/derived_opt.test
@@ -473,12 +473,22 @@ select * from
   t2,
   (select max(value), grp_id from t1 group by grp_id) DT
 where
-  t2.a= DT.grp_id;
+  t2.a = DT.grp_id;
 
 select
   json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
 from information_schema.optimizer_trace;
 
+explain
+with DT as 
+(
+  select max(value), 1/grp_id as fn from t1 group by 2
+)
+select * from t2, DT where t2.a = DT.fn;
+
+select
+  json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
+from information_schema.optimizer_trace;
 
 --echo # Same as above, but try a UNION:
 select * from
@@ -487,7 +497,7 @@ select * from
    union all
    select max(value), grp_id from t1 group by grp_id) DT
 where
-  t2.a= DT.grp_id;
+  t2.a = DT.grp_id;
 select
   json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
 from information_schema.optimizer_trace;
@@ -500,7 +510,7 @@ select * from
    union all
    select max(value), grp_id from t1 group by MOD(grp_id,2)) DT
 where
-  t2.a= DT.grp_id;
+  t2.a = DT.grp_id;
 select
   json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
 from information_schema.optimizer_trace;
@@ -514,7 +524,7 @@ select * from
   t2,
   (select grp_id, max(value) as maxval from v1 group by grp_id) DT
 where
-  t2.a= DT.grp_id;
+  t2.a = DT.grp_id;
 
 drop view v1;
 
@@ -524,7 +534,7 @@ select * from
   t2,
   (select grp_id, max(value) as maxval from cte1 group by grp_id) DT
 where
-  t2.a= DT.grp_id;
+  t2.a = DT.grp_id;
 
 explain
 select * from
@@ -538,7 +548,7 @@ select * from
     group by grp_id
   ) DT
 where
-  t2.a= DT.grp_id;
+  t2.a = DT.grp_id;
 
 --echo # Example with equalities on GROUP BY columns and other columns
 --echo # Must produce {table=<derived2>, ref=test.t2.col2,test.t2.a, rows=1}
@@ -549,7 +559,7 @@ select * from
   (select max(value) as maxval, grp_id from t1 group by grp_id) DT
 where
   t2.col2=maxval and
-  t2.a= DT.grp_id;
+  t2.a = DT.grp_id;
 select
   json_detailed(json_extract(trace, '$**.infer_derived_key_statistics')) as t
 from information_schema.optimizer_trace;
@@ -561,9 +571,21 @@ select * from
   t2,
   (select grp_id, max(value) as maxval from t1 group by grp_id) DT
 where
-  t2.col2=maxval and
-  t2.a= DT.grp_id;
+  t2.col2 = maxval and
+  t2.a = DT.grp_id;
 
+# room for improvement here, 'Total' != 100, will, ignoring the impossible
+# condition in the 2nd select, we should see 2 rows per key in DT
+explain
+select * from
+  t2 join
+  (
+    select max(value) as X, grp_id from t1 group by grp_id
+    union all
+    select distinct 'Total' as X, b as grp_id from t3
+  ) DT
+  on t2.a = DT.grp_id
+  where DT.X = 100;
 
 delete from t1;
 insert into t1 select 1, a.seq from seq_1_to_10 a;
@@ -709,6 +731,47 @@ from information_schema.optimizer_trace;
 
 drop table t1, t2, t3, t4;
 
+## 
+## Tests for item names
+##
+
+create table t1 (
+  grp_id int, 
+  value int,
+  index (grp_id)
+);
+ 
+insert into t1 select 
+  A.seq, B.seq
+from 
+  seq_1_to_100 A, 
+  seq_1_to_100 B;
+
+create table t1a(
+  grp_id_2 int, 
+  value int,
+  index (grp_id_2)
+);
+insert into t1a select * from t1;
+ 
+create table t2 (a int);
+insert into t2 select seq from seq_1_to_5;
+ 
+analyze table t1,t1a, t2;
+
+--echo # <derived2> must have type=ref, rows=2 (not 10):
+explain
+select * from
+t2,
+(
+  select max(value), grp_id from t1 group by grp_id
+  union all
+  select max(value), grp_id_2 from t1a group by grp_id_2
+)  DT
+where t2.a= DT.grp_id;
+
+drop table t1, t1a, t2;
+
 --echo #
 --echo # End of 11.4 tests
 --echo #
diff --git a/sql/opt_group_by_cardinality.cc b/sql/opt_group_by_cardinality.cc
index 4ecb14884c286..40b5e45846903 100644
--- a/sql/opt_group_by_cardinality.cc
+++ b/sql/opt_group_by_cardinality.cc
@@ -384,20 +384,20 @@ double estimate_table_group_cardinality(JOIN *join, Item ***group_list,
 
 /**
   @brief
-    Return the number of keypart that matches the item, -1 if there is no match
+    Check if table field number $idx is a part of the index keyinfo.
+    Return TRUE if it is, FALSE otherwise
 */
 
-static int item_index_in_key(Item *item, const KEY *keyinfo, uint key_parts)
+static bool item_index_in_key(field_index_t idx,
+                              const KEY *keyinfo,
+                              uint key_parts)
 {
-  if (item->real_item()->type() == Item::FIELD_ITEM)
+  for (field_index_t i= 0; i < key_parts; i++)
   {
-    for (uint i= 0; i < key_parts; i++)
-    {
-      if (!cmp(item->name, keyinfo->key_part[i].field->field_name))
-        return (int)i;
-    }
+    if (idx == keyinfo->key_part[i].field->field_index)
+      return TRUE;
   }
-  return -1;
+  return FALSE;
 }
 
 
@@ -409,11 +409,37 @@ static int item_index_in_key(Item *item, const KEY *keyinfo, uint key_parts)
 static
 bool all_list_contained_in_keyparts(const KEY *keyinfo,
                                     uint key_parts,
+                                    SELECT_LEX *sel,
                                     SQL_I_List<st_order> *list)
 {
   for (ORDER *grp= list->first; grp; grp= grp->next)
   {
-    if (item_index_in_key((*grp->item), keyinfo, key_parts) < 0)
+    // Find this GROUP BY entry in the select list:
+    List_iterator it(sel->item_list);
+    Item *sel_item;
+    field_index_t idx= 0;
+    bool found= false;
+    while ((sel_item= it++))
+    {
+      // compare the underlying fields to obtain the index
+      if (sel_item->eq(*grp->item, {true}))
+      {
+        found= true;
+        break;
+      }
+      idx++;
+    }
+
+    if (!found)
+      return false;
+
+    /*
+      Ok, the item has number idx in the select list (I mean the original
+      select list as was specified in the query).
+      The indexes in that list are also indexes in the temporary table.
+    */
+
+    if (!item_index_in_key(idx, keyinfo, key_parts))
       return FALSE;
   }
   return TRUE;
@@ -474,35 +500,48 @@ void infer_derived_key_statistics(st_select_lex_unit* derived,
     do
     {
       bool this_select_covered= FALSE;
+      uint non_const_select_elements= 0;
       /*
-        This is a SELECT DISTINCT query with $key_parts elements in the
-        select list.  This select in the union will produce one record
+        This is a SELECT DISTINCT query with $key_parts non-const elements in
+        the select list.  This select in the union will produce one record
         per key.
         @todo
         If we come across multiple SELECT DISTINCT selects in this union
         have a problem in that we do not know anything about how they might
         intersect
       */
-      if (key_parts == select->item_list.elements &&
-          select->options & SELECT_DISTINCT)
+      if (select->options & SELECT_DISTINCT)
       {
-        select_proc.add("distinct_in_query_block");
-        this_select_covered= TRUE;
-        rec_per_key++;
-      }
+        List_iterator it(select->item_list);
+        for (Item *sel_item; (sel_item= it++); )
+        {
+          if (!sel_item->const_item())
+            non_const_select_elements++;
+        }
 
-      /*
-        This is a grouping select and the group list is a subset of our key.
-        Our key can have additional fields, the rows will still be unique.
-      */
-      if (select->group_list.elements &&
-          all_list_contained_in_keyparts(keyinfo,
-                                         key_parts,
-                                         &select->group_list))
+        if (key_parts >= non_const_select_elements)
+        {
+          select_proc.add("distinct_in_query_block");
+          this_select_covered= TRUE;
+          rec_per_key++;
+        }
+      }
+      else
       {
-        select_proc.add("group_list_in_key");
-        this_select_covered= TRUE;
-        rec_per_key++;
+        /*
+          This is a grouping select and the group list is a subset of our key.
+          Our key can have additional fields, the rows will still be unique.
+        */
+        if (select->group_list.elements &&
+            all_list_contained_in_keyparts(keyinfo,
+                                           key_parts,
+                                           select,
+                                           &select->group_list))
+        {
+          select_proc.add("group_list_in_key");
+          this_select_covered= TRUE;
+          rec_per_key++;
+        }
       }
 
       if (!this_select_covered)