From b715136d3fd71de46ca0003a3a1aa6f8ff4743d4 Mon Sep 17 00:00:00 2001
From: Venkatesh Prasad <venkatesh.prasad@percona.com>
Date: Thu, 27 Feb 2025 16:16:22 +0530
Subject: [PATCH 1/5] PS-9647: MySQL Perf Improvements

https://perconadev.atlassian.net/browse/PS-9647

Make the following functions inline using the 'always_inline' attribute

1. row_mysql_store_col_in_innobase_format_func()
2. row_mysql_store_col_in_innobase_format()
3. rec_init_offsets_new()
4. Move the definitions of row_mysql_store_col_in_innobase_format() and
   row_mysql_store_col_in_innobase_format_func() to row0sel.ic

NOTE:
This forceful inlining increases the binary size by 10 kBs.

Before the change (debug): 802135752
After the change (debug): 802146560
Increase in binary size: 10808 bytes
---
 storage/innobase/include/row0sel.h  |  33 ----
 storage/innobase/include/row0sel.ic | 239 ++++++++++++++++++++++++++++
 storage/innobase/rem/rec.cc         |   4 +-
 storage/innobase/row/row0sel.cc     | 229 --------------------------
 4 files changed, 241 insertions(+), 264 deletions(-)

diff --git a/storage/innobase/include/row0sel.h b/storage/innobase/include/row0sel.h
index 14aaf88982c3..25b00dccd9d2 100644
--- a/storage/innobase/include/row0sel.h
+++ b/storage/innobase/include/row0sel.h
@@ -420,39 +420,6 @@ enum row_sel_match_mode {
                        of a fixed length column) */
 };
 
-/** Stores a non-SQL-NULL field in the MySQL format. The counterpart of this
-function is row_mysql_store_col_in_innobase_format() in row0mysql.cc.
-@param[in,out] dest             buffer where to store; NOTE
-                                that BLOBs are not in themselves stored
-                                here: the caller must allocate and copy
-                                the BLOB into buffer before, and pass
-                                the pointer to the BLOB in 'data'
-@param[in]      templ           MySQL column template. Its following fields
-                                are referenced: type, is_unsigned,
-mysql_col_len, mbminlen, mbmaxlen
-@param[in]      index           InnoDB index
-@param[in]      field_no        templ->rec_field_no or templ->clust_rec_field_no
-                                or templ->icp_rec_field_no
-@param[in]      data            data to store
-@param[in]      len             length of the data
-@param[in]      compress_heap
-@param[in]      sec_field       secondary index field no if the secondary index
-                                record but the prebuilt template is in
-                                clustered index format and used only for end
-                                range comparison. */
-void row_sel_field_store_in_mysql_format_func(
-    byte *dest, const mysql_row_templ_t *templ, const dict_index_t *index,
-    IF_DEBUG(ulint field_no, ) const byte *data,
-    ulint len, mem_heap_t **compress_heap IF_DEBUG(, ulint sec_field));
-
-/** Convert a non-SQL-NULL field from Innobase format to MySQL format. */
-static inline void row_sel_field_store_in_mysql_format(
-    byte *dest, const mysql_row_templ_t *templ, const dict_index_t *idx,
-    ulint field, const byte *src, ulint len, mem_heap_t **compress_heap, ulint sec) {
-  row_sel_field_store_in_mysql_format_func(
-      dest, templ, idx, IF_DEBUG(field, ) src, len, compress_heap IF_DEBUG(, sec));
-}
-
 /** Search the record present in innodb_table_stats table using
 db_name, table_name and fill it in table stats structure.
 @param[in]      db_name         database name
diff --git a/storage/innobase/include/row0sel.ic b/storage/innobase/include/row0sel.ic
index 184d2101b2bb..1386d3fd7989 100644
--- a/storage/innobase/include/row0sel.ic
+++ b/storage/innobase/include/row0sel.ic
@@ -127,3 +127,242 @@ static inline dberr_t row_search_for_mysql(byte *buf, page_cur_mode_t mode,
     return (row_search_no_mvcc(buf, mode, prebuilt, match_mode, direction));
   }
 }
+
+/** Stores a non-SQL-NULL field in the MySQL format. The counterpart of this
+function is row_mysql_store_col_in_innobase_format() in row0mysql.cc.
+@param[in,out] dest             buffer where to store; NOTE
+                                that BLOBs are not in themselves stored
+                                here: the caller must allocate and copy
+                                the BLOB into buffer before, and pass
+                                the pointer to the BLOB in 'data'
+@param[in]      templ           MySQL column template. Its following fields
+                                are referenced: type, is_unsigned,
+mysql_col_len, mbminlen, mbmaxlen
+@param[in]      index           InnoDB index
+@param[in]      field_no        templ->rec_field_no or templ->clust_rec_field_no
+                                or templ->icp_rec_field_no
+@param[in]      data            data to store
+@param[in]      len             length of the data
+@param[in]      compress_heap   heap for decompression
+@param[in]      sec_field       secondary index field no if the secondary index
+                                record but the prebuilt template is in
+                                clustered index format and used only for end
+                                range comparison. */
+inline MY_ATTRIBUTE((always_inline)) void row_sel_field_store_in_mysql_format_func(
+    byte *dest, const mysql_row_templ_t *templ, const dict_index_t *index,
+    IF_DEBUG(ulint field_no, ) const byte *data, ulint len,
+    mem_heap_t **compress_heap IF_DEBUG(, ulint sec_field)) {
+  byte *ptr;
+#ifdef UNIV_DEBUG
+  const dict_field_t *field =
+      templ->is_virtual ? nullptr : index->get_field(field_no);
+
+  bool clust_templ_for_sec = (sec_field != ULINT_UNDEFINED);
+#endif /* UNIV_DEBUG */
+
+  if (templ->is_multi_val) {
+    ib::fatal(UT_LOCATION_HERE, ER_CONVERT_MULTI_VALUE)
+        << "Table name: " << index->table->name
+        << " Index name: " << index->name;
+  }
+
+  auto const mysql_col_len = templ->mysql_col_len;
+
+  ut_ad(rec_field_not_null_not_add_col_def(len));
+  UNIV_MEM_ASSERT_RW(data, len);
+  UNIV_MEM_ASSERT_W(dest, mysql_col_len);
+  UNIV_MEM_INVALID(dest, mysql_col_len);
+
+  switch (templ->type) {
+    const byte *field_end;
+    byte *pad;
+    case DATA_INT:
+      /* Convert integer data from Innobase to a little-endian
+      format, sign bit restored to normal */
+
+      ptr = dest + len;
+
+      for (;;) {
+        ptr--;
+        *ptr = *data;
+        if (ptr == dest) {
+          break;
+        }
+        data++;
+      }
+
+      if (!templ->is_unsigned) {
+        dest[len - 1] = (byte)(dest[len - 1] ^ 128);
+      }
+
+      ut_ad(mysql_col_len == len);
+
+      break;
+
+    case DATA_VARCHAR:
+    case DATA_VARMYSQL:
+    case DATA_BINARY:
+      field_end = dest + mysql_col_len;
+
+      if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
+        /* If this is a compressed column,
+        decompress it first */
+        if (templ->compressed)
+          data = row_decompress_column(
+              data, &len,
+              reinterpret_cast<const byte *>(templ->zip_dict_data.str),
+              templ->zip_dict_data.length, compress_heap);
+
+        /* This is a >= 5.0.3 type true VARCHAR. Store the
+        length of the data to the first byte or the first
+        two bytes of dest. */
+
+        dest =
+            row_mysql_store_true_var_len(dest, len, templ->mysql_length_bytes);
+        /* Copy the actual data. Leave the rest of the
+        buffer uninitialized. */
+        memcpy(dest, data, len);
+        break;
+      }
+
+      /* Copy the actual data */
+      ut_memcpy(dest, data, len);
+
+      /* Pad with trailing spaces. */
+
+      pad = dest + len;
+
+      ut_ad(templ->mbminlen <= templ->mbmaxlen);
+
+      /* We treat some Unicode charset strings specially. */
+      switch (templ->mbminlen) {
+        case 4:
+          /* InnoDB should never have stripped partial
+          UTF-32 characters. */
+          ut_a(!(len & 3));
+          break;
+        case 2:
+          /* A space char is two bytes,
+          0x0020 in UCS2 and UTF-16 */
+
+          if (UNIV_UNLIKELY(len & 1)) {
+            /* A 0x20 has been stripped from the column.
+            Pad it back. */
+
+            if (pad < field_end) {
+              *pad++ = 0x20;
+            }
+          }
+      }
+
+      row_mysql_pad_col(templ->mbminlen, pad, field_end - pad);
+      break;
+
+    case DATA_BLOB:
+      /* Store a pointer to the BLOB buffer to dest: the BLOB was
+      already copied to the buffer in row_sel_store_mysql_rec */
+
+      row_mysql_store_blob_ref(
+          dest, mysql_col_len, data, len, templ->compressed,
+          reinterpret_cast<const byte *>(templ->zip_dict_data.str),
+          templ->zip_dict_data.length, compress_heap);
+      break;
+
+    case DATA_POINT:
+    case DATA_VAR_POINT:
+    case DATA_GEOMETRY:
+      /* We store all geometry data as BLOB data at server layer. */
+      row_mysql_store_geometry(dest, mysql_col_len, data, len);
+      break;
+
+    case DATA_MYSQL:
+      memcpy(dest, data, len);
+
+      ut_ad(mysql_col_len >= len);
+      ut_ad(templ->mbmaxlen >= templ->mbminlen);
+
+      /* If field_no equals to templ->icp_rec_field_no, we are examining a row
+      pointed by "icp_rec_field_no". There is possibility that icp_rec_field_no
+      refers to a field in a secondary index while templ->rec_field_no points
+      to field in a primary index. The length should still be equal, unless the
+      field pointed by icp_rec_field_no has a prefix or this is a virtual
+      column.
+      For end range condition check of secondary index with cluster index
+      template (clust_templ_for_sec), the index column data length (len)
+      could be smaller than the actual column length (mysql_col_len) if index
+      is on column prefix. This is not a real issue because the end range check
+      would only need the prefix part. The length check assert is relaxed for
+      clust_templ_for_sec. */
+      ut_ad(templ->is_virtual || templ->mbmaxlen > templ->mbminlen ||
+            mysql_col_len == len || clust_templ_for_sec ||
+            (field_no == templ->icp_rec_field_no && field->prefix_len > 0));
+
+      /* The following assertion would fail for old tables
+      containing UTF-8 ENUM columns due to Bug #9526. */
+      ut_ad(!templ->mbmaxlen || !(mysql_col_len % templ->mbmaxlen));
+      /* Length of the record will be less in case of
+      clust_templ_for_sec is true or if it is fetched
+      from prefix virtual column in virtual index. */
+      ut_ad(templ->is_virtual || clust_templ_for_sec ||
+            len * templ->mbmaxlen >= mysql_col_len ||
+            index->has_row_versions() ||
+            (field_no == templ->icp_rec_field_no && field->prefix_len > 0) ||
+            templ->rec_field_is_prefix);
+      ut_ad(templ->is_virtual || !(field->prefix_len % templ->mbmaxlen));
+
+      /* Pad with spaces. This undoes the stripping
+      done in row0mysql.cc, function
+      row_mysql_store_col_in_innobase_format(). */
+      if ((templ->mbminlen == 1 && templ->mbmaxlen != 1) ||
+          (templ->is_virtual && mysql_col_len > len)) {
+        /* NOTE: This comment is for the second condition:
+        This probably comes from a prefix virtual index, where no complete
+        value can be got because the full virtual column can only be
+        calculated in server layer for now. Since server now assumes the
+        returned value should always have padding spaces, thus the fixup.
+        However, a proper and more efficient solution is that server does
+        not depend on the trailing spaces to check the terminal of the CHAR
+        string, because at least in this case,server should know it's a prefix
+        index search and no complete value would be got. */
+        memset(dest + len, 0x20, mysql_col_len - len);
+      }
+      break;
+
+    default:
+#ifdef UNIV_DEBUG
+    case DATA_SYS_CHILD:
+    case DATA_SYS:
+      /* These column types should never be shipped to MySQL. */
+      ut_d(ut_error);
+      [[fallthrough]];
+
+    case DATA_CHAR:
+    case DATA_FIXBINARY:
+    case DATA_FLOAT:
+    case DATA_DOUBLE:
+    case DATA_DECIMAL:
+      /* Above are the valid column types for MySQL data. */
+#endif /* UNIV_DEBUG */
+
+      /* If sec_field value is present then mapping of
+      secondary index records to clustered index template
+      happens for end range comparison. So length can
+      vary according to secondary index record length. */
+      ut_ad((templ->is_virtual && !field) ||
+            ((field && field->prefix_len)
+                 ? field->prefix_len == len
+                 : (clust_templ_for_sec || mysql_col_len == len)));
+
+      memcpy(dest, data, len);
+  }
+}
+
+/** Convert a non-SQL-NULL field from Innobase format to MySQL format. */
+inline MY_ATTRIBUTE((always_inline)) void row_sel_field_store_in_mysql_format(
+    byte *dest, const mysql_row_templ_t *templ, const dict_index_t *idx,
+    ulint field, const byte *src, ulint len, mem_heap_t **compress_heap,
+    ulint sec) {
+  row_sel_field_store_in_mysql_format_func(dest, templ, idx,
+                                           IF_DEBUG(field, ) src, len,
+                                           compress_heap IF_DEBUG(, sec));
+}
\ No newline at end of file
diff --git a/storage/innobase/rem/rec.cc b/storage/innobase/rem/rec.cc
index 812d719fac0e..378ed925047b 100644
--- a/storage/innobase/rem/rec.cc
+++ b/storage/innobase/rem/rec.cc
@@ -47,8 +47,8 @@ external tools. */
 @param[in]      rec     physical record
 @param[in]      index   record descriptor
 @param[in, out] offsets array of offsets */
-static void rec_init_offsets_new(const rec_t *rec, const dict_index_t *index,
-                                 ulint *offsets) {
+static inline MY_ATTRIBUTE((always_inline)) void rec_init_offsets_new(
+    const rec_t *rec, const dict_index_t *index, ulint *offsets) {
   ulint status = rec_get_status(rec);
   ulint n_node_ptr_field = ULINT_UNDEFINED;
 
diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc
index 730470d4c254..33d9f8638f45 100644
--- a/storage/innobase/row/row0sel.cc
+++ b/storage/innobase/row/row0sel.cc
@@ -2481,235 +2481,6 @@ static void row_sel_store_row_id_to_prebuilt(
   ut_memcpy(prebuilt->row_id, data, len);
 }
 
-/** Stores a non-SQL-NULL field in the MySQL format. The counterpart of this
-function is row_mysql_store_col_in_innobase_format() in row0mysql.cc.
-@param[in,out] dest             buffer where to store; NOTE
-                                that BLOBs are not in themselves stored
-                                here: the caller must allocate and copy
-                                the BLOB into buffer before, and pass
-                                the pointer to the BLOB in 'data'
-@param[in]      templ           MySQL column template. Its following fields
-                                are referenced: type, is_unsigned,
-mysql_col_len, mbminlen, mbmaxlen
-@param[in]      index           InnoDB index
-@param[in]      field_no        templ->rec_field_no or templ->clust_rec_field_no
-                                or templ->icp_rec_field_no
-@param[in]      data            data to store
-@param[in]      len             length of the data
-@param[in]	    compress_heap
-@param[in]      sec_field       secondary index field no if the secondary index
-                                record but the prebuilt template is in
-                                clustered index format and used only for end
-                                range comparison. */
-void row_sel_field_store_in_mysql_format_func(
-    byte *dest, const mysql_row_templ_t *templ, const dict_index_t *index,
-    IF_DEBUG(ulint field_no, ) const byte *data,
-    ulint len, mem_heap_t** compress_heap IF_DEBUG(, ulint sec_field)) {
-  byte *ptr;
-#ifdef UNIV_DEBUG
-  const dict_field_t *field =
-      templ->is_virtual ? nullptr : index->get_field(field_no);
-
-  bool clust_templ_for_sec = (sec_field != ULINT_UNDEFINED);
-#endif /* UNIV_DEBUG */
-
-  if (templ->is_multi_val) {
-    ib::fatal(UT_LOCATION_HERE, ER_CONVERT_MULTI_VALUE)
-        << "Table name: " << index->table->name
-        << " Index name: " << index->name;
-  }
-
-  auto const mysql_col_len = templ->mysql_col_len;
-
-  ut_ad(rec_field_not_null_not_add_col_def(len));
-  UNIV_MEM_ASSERT_RW(data, len);
-  UNIV_MEM_ASSERT_W(dest, mysql_col_len);
-  UNIV_MEM_INVALID(dest, mysql_col_len);
-
-  switch (templ->type) {
-    const byte *field_end;
-    byte *pad;
-    case DATA_INT:
-      /* Convert integer data from Innobase to a little-endian
-      format, sign bit restored to normal */
-
-      ptr = dest + len;
-
-      for (;;) {
-        ptr--;
-        *ptr = *data;
-        if (ptr == dest) {
-          break;
-        }
-        data++;
-      }
-
-      if (!templ->is_unsigned) {
-        dest[len - 1] = (byte)(dest[len - 1] ^ 128);
-      }
-
-      ut_ad(mysql_col_len == len);
-
-      break;
-
-    case DATA_VARCHAR:
-    case DATA_VARMYSQL:
-    case DATA_BINARY:
-      field_end = dest + mysql_col_len;
-
-      if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
-        /* If this is a compressed column,
-        decompress it first */
-        if (templ->compressed)
-          data = row_decompress_column(
-              data, &len,
-              reinterpret_cast<const byte *>(templ->zip_dict_data.str),
-              templ->zip_dict_data.length, compress_heap);
-
-        /* This is a >= 5.0.3 type true VARCHAR. Store the
-        length of the data to the first byte or the first
-        two bytes of dest. */
-
-        dest =
-            row_mysql_store_true_var_len(dest, len, templ->mysql_length_bytes);
-        /* Copy the actual data. Leave the rest of the
-        buffer uninitialized. */
-        memcpy(dest, data, len);
-        break;
-      }
-
-      /* Copy the actual data */
-      ut_memcpy(dest, data, len);
-
-      /* Pad with trailing spaces. */
-
-      pad = dest + len;
-
-      ut_ad(templ->mbminlen <= templ->mbmaxlen);
-
-      /* We treat some Unicode charset strings specially. */
-      switch (templ->mbminlen) {
-        case 4:
-          /* InnoDB should never have stripped partial
-          UTF-32 characters. */
-          ut_a(!(len & 3));
-          break;
-        case 2:
-          /* A space char is two bytes,
-          0x0020 in UCS2 and UTF-16 */
-
-          if (UNIV_UNLIKELY(len & 1)) {
-            /* A 0x20 has been stripped from the column.
-            Pad it back. */
-
-            if (pad < field_end) {
-              *pad++ = 0x20;
-            }
-          }
-      }
-
-      row_mysql_pad_col(templ->mbminlen, pad, field_end - pad);
-      break;
-
-    case DATA_BLOB:
-      /* Store a pointer to the BLOB buffer to dest: the BLOB was
-      already copied to the buffer in row_sel_store_mysql_rec */
-
-      row_mysql_store_blob_ref(
-          dest, mysql_col_len, data, len, templ->compressed,
-          reinterpret_cast<const byte *>(templ->zip_dict_data.str),
-          templ->zip_dict_data.length, compress_heap);
-      break;
-
-    case DATA_POINT:
-    case DATA_VAR_POINT:
-    case DATA_GEOMETRY:
-      /* We store all geometry data as BLOB data at server layer. */
-      row_mysql_store_geometry(dest, mysql_col_len, data, len);
-      break;
-
-    case DATA_MYSQL:
-      memcpy(dest, data, len);
-
-      ut_ad(mysql_col_len >= len);
-      ut_ad(templ->mbmaxlen >= templ->mbminlen);
-
-      /* If field_no equals to templ->icp_rec_field_no, we are examining a row
-      pointed by "icp_rec_field_no". There is possibility that icp_rec_field_no
-      refers to a field in a secondary index while templ->rec_field_no points
-      to field in a primary index. The length should still be equal, unless the
-      field pointed by icp_rec_field_no has a prefix or this is a virtual
-      column.
-      For end range condition check of secondary index with cluster index
-      template (clust_templ_for_sec), the index column data length (len)
-      could be smaller than the actual column length (mysql_col_len) if index
-      is on column prefix. This is not a real issue because the end range check
-      would only need the prefix part. The length check assert is relaxed for
-      clust_templ_for_sec. */
-      ut_ad(templ->is_virtual || templ->mbmaxlen > templ->mbminlen ||
-            mysql_col_len == len || clust_templ_for_sec ||
-            (field_no == templ->icp_rec_field_no && field->prefix_len > 0));
-
-      /* The following assertion would fail for old tables
-      containing UTF-8 ENUM columns due to Bug #9526. */
-      ut_ad(!templ->mbmaxlen || !(mysql_col_len % templ->mbmaxlen));
-      /* Length of the record will be less in case of
-      clust_templ_for_sec is true or if it is fetched
-      from prefix virtual column in virtual index. */
-      ut_ad(templ->is_virtual || clust_templ_for_sec ||
-            len * templ->mbmaxlen >= mysql_col_len ||
-            index->has_row_versions() ||
-            (field_no == templ->icp_rec_field_no && field->prefix_len > 0) ||
-            templ->rec_field_is_prefix);
-      ut_ad(templ->is_virtual || !(field->prefix_len % templ->mbmaxlen));
-
-      /* Pad with spaces. This undoes the stripping
-      done in row0mysql.cc, function
-      row_mysql_store_col_in_innobase_format(). */
-      if ((templ->mbminlen == 1 && templ->mbmaxlen != 1) ||
-          (templ->is_virtual && mysql_col_len > len)) {
-        /* NOTE: This comment is for the second condition:
-        This probably comes from a prefix virtual index, where no complete
-        value can be got because the full virtual column can only be
-        calculated in server layer for now. Since server now assumes the
-        returned value should always have padding spaces, thus the fixup.
-        However, a proper and more efficient solution is that server does
-        not depend on the trailing spaces to check the terminal of the CHAR
-        string, because at least in this case,server should know it's a prefix
-        index search and no complete value would be got. */
-        memset(dest + len, 0x20, mysql_col_len - len);
-      }
-      break;
-
-    default:
-#ifdef UNIV_DEBUG
-    case DATA_SYS_CHILD:
-    case DATA_SYS:
-      /* These column types should never be shipped to MySQL. */
-      ut_d(ut_error);
-      [[fallthrough]];
-
-    case DATA_CHAR:
-    case DATA_FIXBINARY:
-    case DATA_FLOAT:
-    case DATA_DOUBLE:
-    case DATA_DECIMAL:
-      /* Above are the valid column types for MySQL data. */
-#endif /* UNIV_DEBUG */
-
-      /* If sec_field value is present then mapping of
-      secondary index records to clustered index template
-      happens for end range comparison. So length can
-      vary according to secondary index record length. */
-      ut_ad((templ->is_virtual && !field) ||
-            ((field && field->prefix_len)
-                 ? field->prefix_len == len
-                 : (clust_templ_for_sec || mysql_col_len == len)));
-
-      memcpy(dest, data, len);
-  }
-}
-
 // clang-format off
 /** Convert a field in the Innobase format to a field in the MySQL format.
 @param[out]     mysql_rec       Record in the MySQL format

From f9d0c3eab5036bc715d554bfdb2ea871abf4f7dd Mon Sep 17 00:00:00 2001
From: Venkatesh Prasad <venkatesh.prasad@percona.com>
Date: Mon, 17 Mar 2025 15:56:39 +0530
Subject: [PATCH 2/5] PS-9647: MySQL Perf Improvements

https://perconadev.atlassian.net/browse/PS-9647

This patch introduces a new hybrid data structure for MVCC ReadView from Enhanced MySQL

Typically, online transactions are short rather than long, and transaction IDs
increase continuously. To leverage these characteristics, a hybrid data
structure is used: a static array for consecutive short transaction IDs and a
vector for long transactions. With a 2048-byte array, up to 16,384 consecutive
active transaction IDs can be stored, each bit representing a transaction ID.

The minimum short transaction ID is used to differentiate between short and long
transactions.

IDs smaller than this minimum go into the long transaction vector, while IDs
equal to or greater than it are placed in the short transaction array.

For an ID in changes_visible, if it is below the minimum short transaction ID, a
direct query is made to the vector, which is efficient due to the generally
small number of long transactions.

If the ID is equal to or above the minimum short transaction ID, a bitwise query
is performed, with a time complexity of O(1), compared to the previous O(log n)
complexity. This improvement enhances efficiency and reduces cache migration
between NUMA nodes, as O(1) queries typically complete within a single CPU time
slice.

- In the short_rw_trx_ids_bitmap structure, MAX_SHORT_ACTIVE_BYTES is set to
  65536, theoretically accommodating up to 524,288 consecutive short transaction
  IDs.

- If the limit is exceeded, the oldest short transaction IDs are converted into
  long transactions and stored in long_rw_trx_ids.

- Global long and short transactions are distinguished by min_short_valid_id:
  IDs smaller than this value are treated as global long transactions, while IDs
  equal to or greater are considered global short transactions.

During the copying process from the global active transaction list, the
short_rw_trx_ids_bitmap structure, which uses only one bit per transaction ID,
allows for much higher copying efficiency compared to the native MySQL solution.

For example, with 1000 active transactions, the native MySQL version would
require copying at least 8000 bytes, whereas the optimized solution may only
need a few hundred bytes. This results in a significant improvement in copying
efficiency.
---
 storage/innobase/include/read0types.h |  60 ++++-
 storage/innobase/include/trx0sys.h    |   6 +-
 storage/innobase/read/read0read.cc    | 329 ++++++++++++++++++++++++--
 storage/innobase/trx/trx0sys.cc       |   4 +-
 storage/innobase/trx/trx0trx.cc       | 153 ++++++++++--
 5 files changed, 494 insertions(+), 58 deletions(-)

diff --git a/storage/innobase/include/read0types.h b/storage/innobase/include/read0types.h
index cb3e034e142d..6522c65f0aae 100644
--- a/storage/innobase/include/read0types.h
+++ b/storage/innobase/include/read0types.h
@@ -46,6 +46,9 @@ class MVCC;
 /** Read view lists the trx ids of those transactions for which a consistent
 read should not see the modifications to the database. */
 
+#define MAX_TOP_ACTIVE_BYTES 8192
+#define MAX_SHORT_ACTIVE_BYTES 65536
+
 class ReadView {
   /** This is similar to a std::vector but it is not a drop
   in replacement. It is specific to ReadView. */
@@ -173,14 +176,33 @@ class ReadView {
 
     if (id >= m_low_limit_id) {
       return (false);
-
-    } else if (m_ids.empty()) {
+    } else if (empty()) {
       return (true);
     }
 
-    const ids_t::value_type *p = m_ids.data();
+    /* first search short bitmap */
+    if (m_has_short_actives && id >= m_short_min_id) {
+      if (id > m_short_max_id) {
+        return false;
+      }
+      unsigned int trim_id = id & 0x7FFFF;
+      unsigned int trim_min_id = m_short_min_id & 0x7FFFF;
+      unsigned int array_index = (trim_id >> 3);
+      unsigned int array_min_index = (trim_min_id >> 3);
+      array_index = (MAX_SHORT_ACTIVE_BYTES + array_index - array_min_index) %
+                    MAX_TOP_ACTIVE_BYTES;
+      unsigned int array_remainder = trim_id & (0x7);
+      int is_value_set = top_active[array_index] & (1 << (7 - array_remainder));
+      if (is_value_set) {
+        return false;
+      } else {
+        return true;
+      }
+    }
+
+    const ids_t::value_type *p = m_long_ids.data();
 
-    return (!std::binary_search(p, p + m_ids.size(), id));
+    return (!std::binary_search(p, p + m_long_ids.size(), id));
   }
 
   /**
@@ -235,7 +257,18 @@ class ReadView {
 
   /**
   @return true if there are no transaction ids in the snapshot */
-  bool empty() const { return (m_ids.empty()); }
+  bool empty() const {
+    bool long_empty = m_long_ids.empty();
+    if (long_empty) {
+      if (!m_has_short_actives) {
+        return true;
+      } else {
+        return false;
+      }
+    } else {
+      return false;
+    }
+  }
 
   /**
   Clones a read view object. The resulting read view has identical change
@@ -264,9 +297,9 @@ class ReadView {
     fprintf(file, "Read view low limit trx n:o " TRX_ID_FMT "\n",
             low_limit_no());
     print_limits(file);
-    fprintf(file, "Read view individually stored trx ids:\n");
-    for (ulint i = 0; i < m_ids.size(); i++)
-      fprintf(file, "Read view trx id " TRX_ID_FMT "\n", m_ids.data()[i]);
+    fprintf(file, "Read view individually stored long trx ids:\n");
+    for (ulint i = 0; i < m_long_ids.size(); i++)
+      fprintf(file, "Read view trx id " TRX_ID_FMT "\n", m_long_ids.data()[i]);
   }
 
   bool is_cloned() const noexcept { return (m_cloned); }
@@ -274,7 +307,9 @@ class ReadView {
  private:
   /**
   Copy the transaction ids from the source vector */
-  inline void copy_trx_ids(const trx_ids_t &trx_ids);
+  inline void copy_long_trx_ids(const trx_ids_t &trx_ids,
+                                trx_id_t min_short_id);
+  inline void copy_short_trx_ids();
 
   /**
   Opens a read view where exactly the transactions serialized before this
@@ -307,6 +342,7 @@ class ReadView {
   ReadView &operator=(const ReadView &);
 
  private:
+  unsigned char top_active[MAX_TOP_ACTIVE_BYTES];
   /** The read should not see any transaction with trx id >= this
   value. In other words, this is the "high water mark". */
   trx_id_t m_low_limit_id;
@@ -322,7 +358,7 @@ class ReadView {
 
   /** Set of RW transactions that was active when this snapshot
   was taken */
-  ids_t m_ids;
+  ids_t m_long_ids;
 
   /** The view does not need to see the undo logs for transactions
   whose transaction number is strictly smaller (<) than this value:
@@ -337,6 +373,10 @@ class ReadView {
   trx_id_t m_view_low_limit_no;
 #endif /* UNIV_DEBUG */
 
+  trx_id_t m_short_min_id;
+  trx_id_t m_short_max_id;
+  bool m_has_short_actives;
+
   /** AC-NL-RO transaction view that has been "closed". */
   bool m_closed;
 
diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
index 9c445858460b..cbd2fa9a0d63 100644
--- a/storage/innobase/include/trx0sys.h
+++ b/storage/innobase/include/trx0sys.h
@@ -553,7 +553,11 @@ struct trx_sys_t {
   take a snapshot of these transactions whose changes are not visible to it.
   We should remove transactions from the list before committing in memory and
   releasing locks to ensure right order of removal and consistent snapshot. */
-  trx_ids_t rw_trx_ids;
+  trx_ids_t long_rw_trx_ids;
+  unsigned char short_rw_trx_ids_bitmap[MAX_SHORT_ACTIVE_BYTES];
+  int short_rw_trx_valid_number;
+  trx_id_t min_short_valid_id;
+  trx_id_t max_short_valid_id;
 
   char pad7[ut::INNODB_CACHE_LINE_SIZE];
 
diff --git a/storage/innobase/read/read0read.cc b/storage/innobase/read/read0read.cc
index a2f067b358cd..dc9756abcef9 100644
--- a/storage/innobase/read/read0read.cc
+++ b/storage/innobase/read/read0read.cc
@@ -317,7 +317,7 @@ ReadView::ReadView()
     : m_low_limit_id(),
       m_up_limit_id(),
       m_creator_trx_id(),
-      m_ids(),
+      m_long_ids(),
       m_low_limit_no(),
       m_cloned(false) {
   ut_d(::memset(&m_view_list, 0x0, sizeof(m_view_list)));
@@ -363,32 +363,59 @@ void MVCC::view_add(const ReadView *view) {
 }
 
 /**
-Copy the transaction ids from the source vector */
-
-void ReadView::copy_trx_ids(const trx_ids_t &trx_ids) {
+ * @brief Copies transaction IDs to the long transaction ID list, excluding the
+ * creator transaction ID if it falls within a specified range.
+ *
+ * This function copies all transaction IDs from the provided list `trx_ids` to
+ * the member `m_long_ids`, except for the creator transaction ID
+ * (`m_creator_trx_id`) if it is greater than 0 and less than `min_short_id`.
+ * The function ensures that the creator transaction ID is not included in the
+ * copied list.
+ *
+ * @param trx_ids The list of transaction IDs to be copied.
+ * @param min_short_id The minimum short transaction ID threshold. The creator
+ * transaction ID is excluded if it is less than this value.
+ *
+ * @pre The `m_cloned` member must be false.
+ * @pre The transaction system mutex must be owned by the caller.
+ *
+ * @note The function performs a single copy operation and filters out the
+ * creator's transaction ID if necessary. The code is optimized to avoid
+ * potential overhead from `std::vector::resize()`.
+ *
+ * @note In debug mode, the function randomly checks if all transaction IDs in
+ * the list are active.
+ */
+void ReadView::copy_long_trx_ids(const trx_ids_t &trx_ids,
+                                 trx_id_t min_short_id) {
   ut_ad(!m_cloned);
   ut_ad(trx_sys_mutex_own());
 
+  bool contained = false;
   ulint size = trx_ids.size();
 
-  if (m_creator_trx_id > 0) {
-    ut_ad(size > 0);
-    --size;
+  if (m_creator_trx_id > 0 && m_creator_trx_id < min_short_id) {
+    if (std::find(trx_ids.begin(), trx_ids.end(), m_creator_trx_id) !=
+        std::end(trx_ids)) {
+      contained = true;
+      ut_ad(size > 0);
+      --size;
+    }
   }
 
   if (size == 0) {
-    m_ids.clear();
+    m_long_ids.clear();
     return;
   }
 
-  m_ids.reserve(size);
-  m_ids.resize(size);
+  m_long_ids.reserve(size);
+  m_long_ids.resize(size);
 
-  ids_t::value_type *p = m_ids.data();
+  ids_t::value_type *p = m_long_ids.data();
 
   /* Copy all the trx_ids except the creator trx id */
 
-  if (m_creator_trx_id > 0) {
+  if (contained) {
     /* Note: We go through all this trouble because it is
     unclear whether std::vector::resize() will cause an
     overhead or not. We should test this extensively and
@@ -409,7 +436,7 @@ void ReadView::copy_trx_ids(const trx_ids_t &trx_ids) {
 
     n = (trx_ids.size() - i - 1) * sizeof(trx_ids_t::value_type);
 
-    ut_ad(i + (n / sizeof(trx_ids_t::value_type)) == m_ids.size());
+    ut_ad(i + (n / sizeof(trx_ids_t::value_type)) == m_long_ids.size());
 
     if (n > 0) {
       ::memmove(p + i, &trx_ids[i + 1], n);
@@ -420,7 +447,7 @@ void ReadView::copy_trx_ids(const trx_ids_t &trx_ids) {
     ::memmove(p, &trx_ids[0], n);
   }
 
-  m_up_limit_id = m_ids.front();
+  m_up_limit_id = m_long_ids.front();
 
 #ifdef UNIV_DEBUG
   /* The check is done randomly from time to time, because the check adds
@@ -454,6 +481,207 @@ void ReadView::copy_trx_ids(const trx_ids_t &trx_ids) {
 #endif /* UNIV_DEBUG */
 }
 
+/**
+ * Find the smallest active transaction ID within a specified range in the short
+ * transaction ID bitmap.
+ *
+ * @param short_bitmap Pointer to the bitmap representing short transaction IDs.
+ * @param from The starting transaction ID of the range to search.
+ * @param to The ending transaction ID of the range to search.
+ * @return The smallest active transaction ID within the specified range, or
+ * `to` if no active transaction is found.
+ */
+static inline trx_id_t find_smallest_short_active_trx_id(
+    unsigned char *short_bitmap, trx_id_t from, trx_id_t to) {
+  if (from > to) {
+    return to;
+  }
+
+  trx_id_t start = from;
+  do {
+    unsigned int trim_id = start & 0x7FFFF;
+    unsigned int array_index = (trim_id >> 3);
+    unsigned int array_remainder = trim_id & (0x7);
+    int is_value_set = short_bitmap[array_index] & (1 << (7 - array_remainder));
+    if (is_value_set) {
+      return start;
+    } else {
+      start++;
+      if (start > to) {
+        return to;
+      }
+    }
+  } while (true);
+}
+
+/**
+ * @brief Copies the short transaction IDs from the bitmap to the top active
+ * transaction ID array.
+ *
+ * This function ensures that the short transaction IDs are copied from the
+ * bitmap to the top active transaction ID array. It handles cases where the
+ * range of short transaction IDs spans across the bitmap boundaries and ensures
+ * that the top active transaction ID array is updated accordingly.
+ *
+ * The function also manages the trimming of old short transaction IDs and
+ * updates the minimum short valid transaction ID in the system.
+ *
+ * Preconditions:
+ * - The trx_sys_mutex must be owned by the caller.
+ *
+ * Postconditions:
+ * - The top active transaction ID array is updated with the current short
+ * transaction IDs.
+ * - The minimum short valid transaction ID in the system may be updated.
+ *
+ * @note This function assumes that the trx_sys structure and its members are
+ * properly initialized.
+ */
+void ReadView::copy_short_trx_ids() {
+  ut_ad(trx_sys_mutex_own());
+
+  unsigned char *short_trx_id_bitmap = trx_sys->short_rw_trx_ids_bitmap;
+  unsigned int start = trx_sys->min_short_valid_id & 0x7FFFF;
+  unsigned int end = trx_sys->max_short_valid_id & 0x7FFFF;
+  unsigned int array_index_start = (start >> 3);
+  unsigned int array_index_end = (end >> 3);
+
+  if (array_index_start <= array_index_end) {
+    int diff = array_index_end - array_index_start + 1;
+    if (diff > MAX_TOP_ACTIVE_BYTES) {
+      trx_id_t old_id_start = trx_sys->min_short_valid_id;
+      trx_id_t max_short_valid_id = trx_sys->max_short_valid_id;
+      trx_id_t max_valid_id = max_short_valid_id;
+      max_valid_id = max_valid_id - ((max_valid_id & 0x7));
+      trx_id_t base = max_valid_id - ((MAX_TOP_ACTIVE_BYTES - 1) << 3);
+
+      trx_id_t candidate_min_short_valid_id = find_smallest_short_active_trx_id(
+          short_trx_id_bitmap, base, max_short_valid_id);
+
+      trx_sys->min_short_valid_id = candidate_min_short_valid_id;
+
+      trx_id_t old_id_end = base - 1;
+
+      for (trx_id_t id = old_id_start; id <= old_id_end; id++) {
+        unsigned int trim_id = id & 0x7FFFF;
+        unsigned int array_index = (trim_id >> 3);
+        unsigned int array_remainder = trim_id & (0x7);
+        int is_value_set =
+            short_trx_id_bitmap[array_index] & (1 << (7 - array_remainder));
+        if (is_value_set) {
+          trx_sys->long_rw_trx_ids.push_back(id);
+          trx_sys->short_rw_trx_valid_number--;
+          short_trx_id_bitmap[array_index] &=
+              (255 - (1 << (7 - array_remainder)));
+        }
+      }
+
+      start = candidate_min_short_valid_id & 0x7FFFF;
+      end = max_short_valid_id & 0x7FFFF;
+
+      array_index_start = (start >> 3);
+      array_index_end = (end >> 3);
+      diff = array_index_end - array_index_start + 1;
+
+      ::memmove(top_active, &short_trx_id_bitmap[array_index_start], diff);
+    } else {
+      ::memmove(top_active, &short_trx_id_bitmap[array_index_start], diff);
+    }
+  } else {
+    int diff = MAX_SHORT_ACTIVE_BYTES - array_index_start;
+    int total = diff + array_index_end + 1;
+    if (total > MAX_TOP_ACTIVE_BYTES) {
+      if (array_index_end > MAX_TOP_ACTIVE_BYTES) {
+        trx_id_t max_short_valid_id = trx_sys->max_short_valid_id;
+        trx_id_t max_valid_id = max_short_valid_id;
+        max_valid_id = max_valid_id - ((max_valid_id & 0x7));
+        trx_id_t base = max_valid_id - ((MAX_TOP_ACTIVE_BYTES - 1) << 3);
+        trx_id_t old_id_start = trx_sys->min_short_valid_id;
+        trx_id_t candidate_min_short_valid_id =
+            find_smallest_short_active_trx_id(short_trx_id_bitmap, base,
+                                              max_short_valid_id);
+        trx_sys->min_short_valid_id = candidate_min_short_valid_id;
+
+        trx_id_t old_id_end = base - 1;
+        for (trx_id_t id = old_id_start; id <= old_id_end; id++) {
+          unsigned int trim_id = id & 0x7FFFF;
+          unsigned int array_index = (trim_id >> 3);
+          unsigned int array_remainder = trim_id & (0x7);
+          int is_value_set =
+              short_trx_id_bitmap[array_index] & (1 << (7 - array_remainder));
+          if (is_value_set) {
+            trx_sys->long_rw_trx_ids.push_back(id);
+            trx_sys->short_rw_trx_valid_number--;
+            short_trx_id_bitmap[array_index] &=
+                (255 - (1 << (7 - array_remainder)));
+          }
+        }
+
+        start = candidate_min_short_valid_id & 0x7FFFF;
+        end = max_short_valid_id & 0x7FFFF;
+
+        array_index_start = (start >> 3);
+        array_index_end = (end >> 3);
+        diff = array_index_end - array_index_start + 1;
+
+        ::memmove(top_active, &short_trx_id_bitmap[array_index_start], diff);
+
+      } else {
+        trx_id_t max_short_valid_id = trx_sys->max_short_valid_id;
+        trx_id_t max_valid_id = max_short_valid_id;
+        max_valid_id = max_valid_id - ((max_valid_id & 0x7));
+        trx_id_t base = max_valid_id - ((MAX_TOP_ACTIVE_BYTES - 1) << 3);
+        trx_id_t old_id_start = trx_sys->min_short_valid_id;
+        trx_id_t candidate_min_short_valid_id =
+            find_smallest_short_active_trx_id(short_trx_id_bitmap, base,
+                                              max_short_valid_id);
+        trx_sys->min_short_valid_id = candidate_min_short_valid_id;
+
+        trx_id_t old_id_end = base - 1;
+
+        for (trx_id_t id = old_id_start; id <= old_id_end; id++) {
+          unsigned int trim_id = id & 0x7FFFF;
+          unsigned int array_index = (trim_id >> 3);
+          unsigned int array_remainder = trim_id & (0x7);
+          int is_value_set =
+              short_trx_id_bitmap[array_index] & (1 << (7 - array_remainder));
+          if (is_value_set) {
+            trx_sys->long_rw_trx_ids.push_back(id);
+            trx_sys->short_rw_trx_valid_number--;
+            short_trx_id_bitmap[array_index] &=
+                (255 - (1 << (7 - array_remainder)));
+          }
+        }
+
+        start = candidate_min_short_valid_id & 0x7FFFF;
+        end = max_short_valid_id & 0x7FFFF;
+
+        array_index_start = (start >> 3);
+        array_index_end = (end >> 3);
+
+        if (array_index_start <= array_index_end) {
+          diff = array_index_end - array_index_start + 1;
+          ::memmove(top_active, &short_trx_id_bitmap[array_index_start], diff);
+        } else {
+          diff = MAX_SHORT_ACTIVE_BYTES - array_index_start;
+          unsigned char *p = top_active;
+          ::memmove(p, &short_trx_id_bitmap[array_index_start], diff);
+          p += diff;
+          ::memmove(p, &short_trx_id_bitmap[0], array_index_end + 1);
+        }
+      }
+    } else {
+      unsigned char *p = top_active;
+      ::memmove(p, &short_trx_id_bitmap[array_index_start], diff);
+      p += diff;
+      ::memmove(p, &short_trx_id_bitmap[0], array_index_end + 1);
+    }
+  }
+
+  m_short_min_id = trx_sys->min_short_valid_id;
+  m_short_max_id = trx_sys->max_short_valid_id;
+}
+
 /**
 Opens a read view where exactly the transactions serialized before this
 point in time are seen in the view.
@@ -471,14 +699,29 @@ void ReadView::prepare(trx_id_t id) {
 
   ut_a(m_low_limit_no <= m_low_limit_id);
 
-  if (!trx_sys->rw_trx_ids.empty()) {
-    copy_trx_ids(trx_sys->rw_trx_ids);
+  if (trx_sys->short_rw_trx_valid_number) {
+    copy_short_trx_ids();
+    m_has_short_actives = true;
+  } else {
+    m_has_short_actives = false;
+  }
+
+  if (!trx_sys->long_rw_trx_ids.empty()) {
+    copy_long_trx_ids(trx_sys->long_rw_trx_ids, trx_sys->min_short_valid_id);
   } else {
-    m_ids.clear();
+    m_long_ids.clear();
   }
 
   /* The first active transaction has the smallest id. */
-  m_up_limit_id = !m_ids.empty() ? m_ids.front() : m_low_limit_id;
+  if (!m_long_ids.empty()) {
+    m_up_limit_id = m_long_ids.front();
+  } else {
+    if (trx_sys->short_rw_trx_valid_number) {
+      m_up_limit_id = trx_sys->min_short_valid_id;
+    } else {
+      m_up_limit_id = m_low_limit_id;
+    }
+  }
 
   ut_a(m_up_limit_id <= m_low_limit_id);
 
@@ -637,12 +880,33 @@ Copy state from another view. Must call copy_complete() to finish.
 void ReadView::copy_prepare(const ReadView &other) {
   ut_ad(&other != this);
 
-  if (!other.m_ids.empty()) {
-    const ids_t::value_type *p = other.m_ids.data();
+  if (other.m_has_short_actives) {
+    unsigned int max_trim_id = other.m_short_max_id & 0x7FFFF;
+    unsigned int min_trim_id = other.m_short_min_id & 0x7FFFF;
+    unsigned int max_array_index = (max_trim_id >> 3);
+    unsigned int min_array_index = (min_trim_id >> 3);
+    int diff =
+        (MAX_SHORT_ACTIVE_BYTES + max_array_index - min_array_index + 1) %
+        MAX_TOP_ACTIVE_BYTES;
+    if (diff == 0) {
+      diff = MAX_TOP_ACTIVE_BYTES;
+    }
+
+    ::memmove(top_active, other.top_active, diff);
+    m_has_short_actives = true;
+    m_short_min_id = other.m_short_min_id;
+    m_short_max_id = other.m_short_max_id;
+
+  } else {
+    m_has_short_actives = false;
+  }
+
+  if (!other.m_long_ids.empty()) {
+    const ids_t::value_type *p = other.m_long_ids.data();
 
-    m_ids.assign(p, p + other.m_ids.size());
+    m_long_ids.assign(p, p + other.m_long_ids.size());
   } else {
-    m_ids.clear();
+    m_long_ids.clear();
   }
 
   m_up_limit_id = other.m_up_limit_id;
@@ -664,12 +928,27 @@ void ReadView::copy_complete() {
   ut_ad(!trx_sys_mutex_own());
 
   if (m_creator_trx_id > 0) {
-    m_ids.insert(m_creator_trx_id);
+    if (m_short_min_id <= m_creator_trx_id) {
+      unsigned int trim_id = m_creator_trx_id & 0x7FFFF;
+      unsigned int trim_min_id = m_short_min_id & 0x7FFFF;
+      unsigned int array_index = (trim_id >> 3);
+      unsigned int array_min_index = (trim_min_id >> 3);
+      array_index = (MAX_SHORT_ACTIVE_BYTES + array_index - array_min_index) %
+                    MAX_TOP_ACTIVE_BYTES;
+      unsigned int array_remainder = trim_id & (0x7);
+      top_active[array_index] |= (1 << (7 - array_remainder));
+    } else {
+      m_long_ids.insert(m_creator_trx_id);
+    }
   }
 
-  if (!m_ids.empty()) {
+  if (!m_long_ids.empty()) {
     /* The last active transaction has the smallest id. */
-    m_up_limit_id = std::min(m_ids.front(), m_up_limit_id);
+    m_up_limit_id = std::min(m_long_ids.front(), m_up_limit_id);
+  } else {
+    if (m_has_short_actives) {
+      m_up_limit_id = std::min(m_short_min_id, m_up_limit_id);
+    }
   }
 
   ut_ad(m_up_limit_id <= m_low_limit_id);
diff --git a/storage/innobase/trx/trx0sys.cc b/storage/innobase/trx/trx0sys.cc
index 9a8ef6dd7a73..11ae3b309703 100644
--- a/storage/innobase/trx/trx0sys.cc
+++ b/storage/innobase/trx/trx0sys.cc
@@ -610,7 +610,7 @@ void trx_sys_create(void) {
 
   ut_d(trx_sys->rw_max_trx_no = 0);
 
-  new (&trx_sys->rw_trx_ids)
+  new (&trx_sys->long_rw_trx_ids)
       trx_ids_t(ut::allocator<trx_id_t>(mem_key_trx_sys_t_rw_trx_ids));
 
   for (auto &shard : trx_sys->shards) {
@@ -688,7 +688,7 @@ void trx_sys_close(void) {
   mutex_free(&trx_sys->serialisation_mutex);
   mutex_free(&trx_sys->mutex);
 
-  trx_sys->rw_trx_ids.~trx_ids_t();
+  trx_sys->long_rw_trx_ids.~trx_ids_t();
 
   ut::free(trx_sys);
 
diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc
index d8cf983639b2..9f204948886a 100644
--- a/storage/innobase/trx/trx0trx.cc
+++ b/storage/innobase/trx/trx0trx.cc
@@ -1128,7 +1128,10 @@ void trx_lists_init_at_db_start(void) {
   for (trx_t *trx : trxs) {
     if (trx->state.load(std::memory_order_relaxed) == TRX_STATE_ACTIVE ||
         trx->state.load(std::memory_order_relaxed) == TRX_STATE_PREPARED) {
-      trx_sys->rw_trx_ids.push_back(trx->id);
+      trx_sys->long_rw_trx_ids.push_back(trx->id);
+      if (trx->id >= trx_sys->min_short_valid_id) {
+        trx_sys->min_short_valid_id = trx->id + 1;
+      }
     }
     trx_add_to_rw_trx_list(trx);
   }
@@ -1287,24 +1290,84 @@ void trx_assign_rseg_durable(trx_t *trx) {
   trx->rsegs.m_redo.rseg = srv_read_only_mode ? nullptr : get_next_redo_rseg();
 }
 
-/** Assign an id for this RW transaction and insert it into trx_sys->rw_trx_ids
-@param trx	transaction to assign an id for */
-static void trx_assign_id_for_rw(trx_t *trx) {
-  ut_ad(trx_sys_mutex_own());
+static inline void update_short_bitmamp(unsigned char *short_bitmap,
+                                        trx_id_t id, bool is_set_value) {
+  unsigned int trim_id = id & 0x7FFFF;
+  unsigned int array_index = (trim_id >> 3);
+  unsigned int array_remainder = trim_id & (0x7);
+  if (is_set_value) {
+    short_bitmap[array_index] |= (1 << (7 - array_remainder));
+  } else {
+    short_bitmap[array_index] &= (255 - (1 << (7 - array_remainder)));
+  }
+}
 
-  trx->id =
-      trx->preallocated_id ? trx->preallocated_id : trx_sys_allocate_trx_id();
+static inline trx_id_t find_smallest_short_active_trx_id(
+    unsigned char *short_bitmap, trx_id_t from, trx_id_t to) {
+  if (from > to) {
+    return to;
+  }
 
-  if (trx->preallocated_id) {
-    // preallocated_id might not be received in ascending order,
-    // so we need to maintain ordering in rw_trx_ids and update
-    // min_active_trx_id
-    auto upper_bound_it = std::upper_bound(trx_sys->rw_trx_ids.begin(),
-                                           trx_sys->rw_trx_ids.end(), trx->id);
-    trx_sys->rw_trx_ids.insert(upper_bound_it, trx->id);
+  trx_id_t start = from;
+  do {
+    unsigned int trim_id = start & 0x7FFFF;
+    unsigned int array_index = (trim_id >> 3);
+    unsigned int array_remainder = trim_id & (0x7);
+    int is_value_set = short_bitmap[array_index] & (1 << (7 - array_remainder));
+    if (is_value_set) {
+      return start;
+    } else {
+      start++;
+      if (start > to) {
+        return to;
+      }
+    }
+  } while (true);
+}
+
+static void trx_assign_id_for_rw(trx_t *trx) {
+  trx->id = trx_sys_allocate_trx_id();
+  if (trx->id < trx_sys->min_short_valid_id) {
+    trx_sys->long_rw_trx_ids.push_back(trx->id);
   } else {
-    // The id is known to be greatest
-    trx_sys->rw_trx_ids.push_back(trx->id);
+    if (trx_sys->short_rw_trx_valid_number == 0) {
+      trx_sys->min_short_valid_id = trx->id;
+    }
+    if (trx->id > trx_sys->max_short_valid_id) {
+      trx_sys->max_short_valid_id = trx->id;
+    }
+
+    int64_t diff = trx_sys->max_short_valid_id - trx_sys->min_short_valid_id;
+    if (diff < (MAX_SHORT_ACTIVE_BYTES << 3)) {
+      update_short_bitmamp(trx_sys->short_rw_trx_ids_bitmap, trx->id, true);
+    } else {
+      trx_id_t old_id_start = trx_sys->min_short_valid_id;
+      trx_id_t max_short_valid_id = trx_sys->max_short_valid_id;
+      trx_id_t max_valid_id = max_short_valid_id;
+      max_valid_id = max_valid_id - ((max_valid_id & 0x7));
+      trx_id_t base = max_valid_id - ((MAX_SHORT_ACTIVE_BYTES - 1) << 3);
+
+      unsigned char *short_trx_id_bitmap = trx_sys->short_rw_trx_ids_bitmap;
+      trx_sys->min_short_valid_id = find_smallest_short_active_trx_id(
+          short_trx_id_bitmap, base, max_short_valid_id);
+
+      trx_id_t old_id_end = base - 1;
+
+      for (trx_id_t id = old_id_start; id <= old_id_end; id++) {
+        unsigned int trim_id = id & 0x7FFFF;
+        unsigned int array_index = (trim_id >> 3);
+        unsigned int array_remainder = trim_id & (0x7);
+        int is_value_set =
+            short_trx_id_bitmap[array_index] & (1 << (7 - array_remainder));
+        if (is_value_set) {
+          trx_sys->long_rw_trx_ids.push_back(id);
+          trx_sys->short_rw_trx_valid_number--;
+          short_trx_id_bitmap[array_index] &=
+              (255 - (1 << (7 - array_remainder)));
+        }
+      }
+    }
+    trx_sys->short_rw_trx_valid_number++;
   }
 }
 
@@ -1465,6 +1528,7 @@ static void trx_start_low(
         ut_ad(!srv_read_only_mode);
 
         trx->state.store(TRX_STATE_ACTIVE, std::memory_order_relaxed);
+
         trx_assign_id_for_rw(trx);
 
         trx_sys_mutex_exit();
@@ -1858,11 +1922,59 @@ static void trx_erase_lists(trx_t *trx) {
   ut_ad(trx->id > 0);
   ut_ad(trx_sys_mutex_own());
 
-  trx_ids_t::iterator it = std::lower_bound(trx_sys->rw_trx_ids.begin(),
-                                            trx_sys->rw_trx_ids.end(), trx->id);
+  trx_id_t min_short_valid_id = trx_sys->min_short_valid_id;
+  trx_id_t max_short_valid_id = trx_sys->max_short_valid_id;
+
+  if (trx->id < min_short_valid_id) {
+    trx_ids_t::iterator it =
+        std::lower_bound(trx_sys->long_rw_trx_ids.begin(),
+                         trx_sys->long_rw_trx_ids.end(), trx->id);
 
-  ut_ad(*it == trx->id);
-  trx_sys->rw_trx_ids.erase(it);
+    ut_ad(*it == trx->id);
+    trx_sys->long_rw_trx_ids.erase(it);
+  } else {
+    trx_sys->short_rw_trx_valid_number--;
+    int short_rw_trx_valid_number = trx_sys->short_rw_trx_valid_number;
+    update_short_bitmamp(trx_sys->short_rw_trx_ids_bitmap, trx->id, false);
+    if (short_rw_trx_valid_number > 0) {
+      if (trx->id == min_short_valid_id) {
+        trx_id_t candidate_min_short_valid_id =
+            find_smallest_short_active_trx_id(trx_sys->short_rw_trx_ids_bitmap,
+                                              min_short_valid_id + 1,
+                                              max_short_valid_id);
+        trx_sys->min_short_valid_id = candidate_min_short_valid_id;
+      } else {
+        int64_t diff = max_short_valid_id - min_short_valid_id;
+        if (diff >= MAX_SHORT_ACTIVE_BYTES) {
+          trx_id_t old_id_start = min_short_valid_id;
+          trx_id_t max_valid_id = max_short_valid_id;
+          max_valid_id = max_valid_id - ((max_valid_id & 0x7));
+          trx_id_t base =
+              max_valid_id - (((MAX_SHORT_ACTIVE_BYTES >> 3) - 1) << 3);
+
+          unsigned char *short_trx_id_bitmap = trx_sys->short_rw_trx_ids_bitmap;
+          trx_sys->min_short_valid_id = find_smallest_short_active_trx_id(
+              short_trx_id_bitmap, base, max_short_valid_id);
+
+          trx_id_t old_id_end = base - 1;
+
+          for (trx_id_t id = old_id_start; id <= old_id_end; id++) {
+            unsigned int trim_id = id & 0x7FFFF;
+            unsigned int array_index = (trim_id >> 3);
+            unsigned int array_remainder = trim_id & (0x7);
+            int is_value_set =
+                short_trx_id_bitmap[array_index] & (1 << (7 - array_remainder));
+            if (is_value_set) {
+              trx_sys->long_rw_trx_ids.push_back(id);
+              trx_sys->short_rw_trx_valid_number--;
+              short_trx_id_bitmap[array_index] &=
+                  (255 - (1 << (7 - array_remainder)));
+            }
+          }
+        }
+      }
+    }
+  }
 
   if (trx->read_only || trx->rsegs.m_redo.rseg == nullptr) {
     ut_ad(!trx->in_rw_trx_list);
@@ -3523,6 +3635,7 @@ void trx_set_rw_mode(trx_t *trx) /*!< in/out: transaction that is RW */
 
   trx_sys_mutex_enter();
 
+  ut_ad(trx->id == 0);
   trx_assign_id_for_rw(trx);
 
   /* So that we can see our own changes unless our view is a clone */

From f4e59d90d490b38a218f8510c0358ed8e140b081 Mon Sep 17 00:00:00 2001
From: Venkatesh Prasad <venkatesh.prasad@percona.com>
Date: Sun, 23 Mar 2025 22:40:00 +0530
Subject: [PATCH 3/5] PS-9647: MySQL Perf Improvements

https://perconadev.atlassian.net/browse/PS-9647

This patch uses list instead of deque for memory efficiency reasons,
primarily to optimize operations like bulk insert
---
 include/mem_root_deque.h      | 857 +++++++++-------------------------
 sql/item_func.cc              |   3 +-
 sql/sql_base.cc               |   3 +-
 sql/sql_derived.cc            |   5 +-
 sql/sql_executor.cc           |   2 +-
 sql/sql_optimizer.cc          |   6 +-
 sql/sql_parse.cc              |   4 +-
 sql/sql_resolver.cc           |  32 +-
 sql/sql_update.cc             |   3 +-
 unittest/gunit/CMakeLists.txt |   1 -
 10 files changed, 260 insertions(+), 656 deletions(-)

diff --git a/include/mem_root_deque.h b/include/mem_root_deque.h
index de5cb87219b1..d20716ecf076 100644
--- a/include/mem_root_deque.h
+++ b/include/mem_root_deque.h
@@ -24,649 +24,244 @@
 #ifndef MEM_ROOT_DEQUE_H
 #define MEM_ROOT_DEQUE_H
 
+#include <list>
 #include <algorithm>
-#include <type_traits>
-#include <utility>
-
 #include <assert.h>
-#include <stdint.h>
-
-#include "my_alloc.h"
-
-template <class Element_type>
-static constexpr size_t FindElementsPerBlock() {
-  // Aim for 1 kB.
-  size_t base_number_elems =
-      1024 / sizeof(Element_type);  // NOLINT(bugprone-sizeof-expression)
 
-  // Find the next power of two, rounded up. We should have at least 16 elements
-  // per block to avoid allocating way too often (although the code itself
-  // should work fine with 1, for debugging purposes).
-  for (size_t block_size = 16; block_size < 1024; ++block_size) {
-    if (block_size >= base_number_elems) {
-      return block_size;
-    }
-  }
-  return 1024;
-}
+#include "sql/mem_root_allocator.h"
 
 /**
-  A (partial) implementation of std::deque allocating its blocks on a MEM_ROOT.
-
-  This class works pretty much like an std::deque with a Mem_root_allocator,
-  and used to be a forwarder to it. However, libstdc++ has a very complicated
-  implementation of std::deque, leading to code blowup (e.g., operator[] is
-  23 instructions on x86-64, including two branches), and we cannot easily use
-  libc++ on all platforms. This version is instead:
-
-   - Optimized for small, straight-through machine code (few and simple
-     instructions, few branches).
-   - Optimized for few elements; in particular, zero elements is an important
-     special case, much more so than 10,000.
-
-  It gives mostly the same guarantees as std::deque; elements can be
-  inserted efficiently on both front and back [1]. {push,pop}_{front,back}
-  guarantees reference stability except for to removed elements (obviously),
-  and invalidates iterators. (Iterators are forcefully invalidated using
-  asserts.) erase() does the same. Note that unlike std::deque, insert()
-  at begin() will invalidate references. Some functionality, like several
-  constructors, resize(), shrink_to_fit(), swap(), etc. is missing.
-
-  The implementation is the same as classic std::deque: Elements are held in
-  blocks of about 1 kB each. Once an element is in a block, it never moves
-  (giving the aforementioned pointer stability). The blocks are held in an
-  array, which can be reallocated when more blocks are needed. The
-  implementation keeps track of the used items in terms of “physical indexes”;
-  element 0 starts at the first byte of the first block, element 1 starts
-  immediately after 0, and so on. So you can have something like (assuming very
-  small blocks of only 4 elements each for the sake of drawing):
-
-         block 0   block 1   block 2
-         ↓         ↓         ↓
-        [x x 2 3] [4 5 6 7] [8 9 x x]
-             ↑                   ↑
-             begin_idx = 2       end_idx = 10
-
-  end_idx counts as is customary one-past-the-end, so in this case, the elements
-  [2,9) would be valid, and e.g. (*this)[4] would give physical index 6, which
-  points to the third element (index 2) in the middle block (block 1). Inserting
-  a new element at the front is as easy as putting it in physical index 1 and
-  adjusting begin_idx to the left. This means a lookup by index requires some
-  shifting, masking and a double indirect load.
-
-  Iterators keep track of which deque they belong to, and what physical index
-  they point to. (So lookup by iterator also requires a double indirect load,
-  but the alternative would be caching the block pointer and having an extra
-  branch when advancing the iterator.) Inserting a new block at the beginning
-  would move around all the physical indexes (which is why iterators get
-  invalidated; we could probably get around this by having an “offset to first
-  block”, but it's likely not worth it.)
-
-  [1] Actually, it's O(n), since there's no exponential growth of the blocks
-  array. But the blocks are reallocated very rarely, so it is generally
-  efficient nevertheless.
+ * We use list instead of deque for memory efficiency reasons,
+ * primarily to optimize operations like bulk insert.
  */
-template <class Element_type>
+template <class T>
 class mem_root_deque {
- public:
-  /// Used to conform to STL algorithm demands.
-  using size_type = size_t;
-  using difference_type = ptrdiff_t;
-  using value_type = Element_type;
-  using pointer = Element_type *;
-  using reference = Element_type &;
-  using const_pointer = const Element_type *;
-  using const_reference = const Element_type &;
-
-  /// Constructor. Leaves the array in an empty, valid state.
-  explicit mem_root_deque(MEM_ROOT *mem_root) : m_root(mem_root) {}
-
-  // Copy constructor and assignment. We could probably be a bit smarter
-  // about these for large arrays, if the source array has e.g. empty blocks.
-  mem_root_deque(const mem_root_deque &other)
-      : m_begin_idx(other.m_begin_idx),
-        m_end_idx(other.m_end_idx),
-        m_capacity(other.m_capacity),
-        m_root(other.m_root) {
-    m_blocks = m_root->ArrayAlloc<Block>(num_blocks());
-    for (size_t block_idx = 0; block_idx < num_blocks(); ++block_idx) {
-      m_blocks[block_idx].init(m_root);
-    }
-    for (size_t idx = m_begin_idx; idx != m_end_idx; ++idx) {
-      new (&get(idx)) Element_type(other.get(idx));
-    }
-  }
-  mem_root_deque &operator=(const mem_root_deque &other) {
-    if (this != &other) {
-      clear();
-      for (const Element_type &elem : other) {
-        push_back(elem);
-      }
-    }
-    return *this;
-  }
-
-  // Move constructor and assignment.
-  mem_root_deque(mem_root_deque &&other)
-      : m_blocks(other.m_blocks),
-        m_begin_idx(other.m_begin_idx),
-        m_end_idx(other.m_end_idx),
-        m_root(other.m_root) {
-    other.m_blocks = nullptr;
-    other.m_begin_idx = other.m_end_idx = other.m_capacity = 0;
-    other.invalidate_iterators();
-  }
-  mem_root_deque &operator=(mem_root_deque &&other) {
-    if (this != &other) {
-      this->~mem_root_deque();
-      new (this) mem_root_deque(std::move(other));
-    }
-    return *this;
-  }
-
-  ~mem_root_deque() { clear(); }
-
-  Element_type &operator[](size_t idx) const { return get(idx + m_begin_idx); }
-
-  /**
-    Adds the given element to the end of the deque.
-    The element is a copy of the given one.
-
-    @returns true on OOM (no change is done if so)
-   */
-  bool push_back(const Element_type &element) {
-    if (m_end_idx == m_capacity) {
-      if (add_block_back()) {
-        return true;
-      }
-    }
-    new (&get(m_end_idx++)) Element_type(element);
-    invalidate_iterators();
-    return false;
-  }
-
-  /**
-    Adds the given element to the end of the deque.
-    The element is moved into place.
-
-    @returns true on OOM (no change is done if so)
-   */
-  bool push_back(Element_type &&element) {
-    if (m_end_idx == m_capacity) {
-      if (add_block_back()) {
-        return true;
-      }
-    }
-    new (&get(m_end_idx++)) Element_type(std::move(element));
-    invalidate_iterators();
-    return false;
-  }
-
-  /**
-    Adds the given element to the beginning of the deque.
-    The element is a copy of the given one.
-
-    @returns true on OOM (no change is done if so)
-   */
-  bool push_front(const Element_type &element) {
-    if (m_begin_idx == 0) {
-      if (add_block_front()) {
-        return true;
-      }
-      assert(m_begin_idx != 0);
-    }
-    new (&get(--m_begin_idx)) Element_type(element);
-    invalidate_iterators();
-    return false;
-  }
-
-  /**
-    Adds the given element to the end of the deque.
-    The element is moved into place.
-   */
-  bool push_front(Element_type &&element) {
-    if (m_begin_idx == 0) {
-      if (add_block_front()) {
-        return true;
-      }
-      assert(m_begin_idx != 0);
-    }
-    new (&get(--m_begin_idx)) Element_type(std::move(element));
-    invalidate_iterators();
-    return false;
-  }
-
-  /// Removes the last element from the deque.
-  void pop_back() {
-    assert(!empty());
-    ::destroy(&get(--m_end_idx));
-    invalidate_iterators();
-  }
-
-  /// Removes the first element from the deque.
-  void pop_front() {
-    assert(!empty());
-    ::destroy(&get(m_begin_idx++));
-    invalidate_iterators();
-  }
-
-  /// Returns the first element in the deque.
-  Element_type &front() {
-    assert(!empty());
-    return get(m_begin_idx);
-  }
-
-  const Element_type &front() const {
-    assert(!empty());
-    return get(m_begin_idx);
-  }
-
-  /// Returns the last element in the deque.
-  Element_type &back() {
-    assert(!empty());
-    return get(m_end_idx - 1);
-  }
-
-  const Element_type &back() const {
-    assert(!empty());
-    return get(m_end_idx - 1);
-  }
-
-  /// Removes all elements from the deque. Destructors are called,
-  /// but since the elements themselves are allocated on the MEM_ROOT,
-  /// their memory cannot be freed.
-  void clear() {
-    for (size_t idx = m_begin_idx; idx != m_end_idx; ++idx) {
-      ::destroy(&get(idx));
-    }
-    m_begin_idx = m_end_idx = m_capacity / 2;
-    invalidate_iterators();
-  }
-
-  template <class Iterator_element_type>
-  class Iterator {
-   public:
-    using difference_type = ptrdiff_t;
-    using value_type = Iterator_element_type;
-    using pointer = Iterator_element_type *;
-    using reference = Iterator_element_type &;
-    using iterator_category = std::random_access_iterator_tag;
-
-    // DefaultConstructible (required for ForwardIterator).
-    Iterator() = default;
-
-    Iterator(const mem_root_deque *deque, size_t physical_idx)
-        : m_deque(deque), m_physical_idx(physical_idx) {
-#ifndef NDEBUG
-      m_generation = m_deque->generation();
-#endif
-    }
-
-    /// For const_iterator: Implicit conversion from iterator.
-    /// This is written in a somewhat cumbersome fashion to avoid
-    /// declaring an explicit copy constructor for iterator,
-    /// which causes compiler warnings other places for some compilers.
-    // NOLINTNEXTLINE(google-explicit-constructor): Intentional.
-    template <
-        class T,
-        typename = std::enable_if_t<
-            std::is_const<Iterator_element_type>::value &&
-            std::is_same<typename T::value_type,
-                         std::remove_const_t<Iterator_element_type>>::value>>
-    Iterator(const T &other)
-        : m_deque(other.m_deque), m_physical_idx(other.m_physical_idx) {
-#ifndef NDEBUG
-      m_generation = other.m_generation;
-#endif
-    }
-
-    // Iterator (required for InputIterator).
-    Iterator_element_type &operator*() const {
-      assert_not_invalidated();
-      return m_deque->get(m_physical_idx);
-    }
-    Iterator &operator++() {
-      assert_not_invalidated();
-      ++m_physical_idx;
-      return *this;
+private:
+    std::list<T, Mem_root_allocator<T>> list_;
+    size_t size_;
+
+public:
+    using iterator = typename std::list<T, Mem_root_allocator<T>>::iterator;
+    using const_iterator = typename std::list<T, Mem_root_allocator<T>>::const_iterator;
+    using reverse_iterator = typename std::list<T, Mem_root_allocator<T>>::reverse_iterator;
+    using const_reverse_iterator = typename std::list<T, Mem_root_allocator<T>>::const_reverse_iterator;
+
+
+    explicit mem_root_deque(MEM_ROOT *mem_root)
+        : list_(Mem_root_allocator<T>(mem_root)), size_(0) {}
+
+    mem_root_deque(typename std::list<T, Mem_root_allocator<T>>::size_type count, const T &value, MEM_ROOT *mem_root)
+        : list_(count, value, Mem_root_allocator<T>(mem_root)), size_(count) {}
+
+    mem_root_deque(typename std::list<T, Mem_root_allocator<T>>::size_type count, MEM_ROOT *mem_root)
+        : list_(count, Mem_root_allocator<T>(mem_root)), size_(count) {}
+
+    template <class InputIt>
+    mem_root_deque(InputIt first, InputIt last, MEM_ROOT *mem_root)
+        : list_(first, last, Mem_root_allocator<T>(mem_root)), size_(std::distance(first, last)) {}
+
+    mem_root_deque(const mem_root_deque &other)
+        : list_(other.list_), size_(other.size_) {}
+
+    mem_root_deque(const mem_root_deque &other, MEM_ROOT *mem_root)
+        : list_(other.list_, Mem_root_allocator<T>(mem_root)), size_(other.size_) {}
+
+    mem_root_deque(mem_root_deque &&other)
+        : list_(std::move(other.list_)), size_(other.size_) {}
+
+    mem_root_deque(mem_root_deque &&other, MEM_ROOT *mem_root)
+        : list_(std::move(other.list_), Mem_root_allocator<T>(mem_root)), size_(other.size_) {}
+
+    mem_root_deque(std::initializer_list<T> init, MEM_ROOT *mem_root)
+      : list_(std::move(init), Mem_root_allocator<T>(mem_root)), size_(init.size()) {}
+
+    inline const_iterator begin() const {
+      return list_.begin();
     }
 
-    // EqualityComparable (required for InputIterator).
-    bool operator==(const Iterator &other) const {
-      assert_not_invalidated();
-      assert(m_deque == other.m_deque);
-      return m_physical_idx == other.m_physical_idx;
+    inline const_iterator end() const {
+      return list_.end();
     }
 
-    // InputIterator (required for ForwardIterator).
-    bool operator!=(const Iterator &other) const { return !(*this == other); }
+    inline iterator begin() {
+      return list_.begin();
+    }
 
-    Iterator_element_type *operator->() const {
-      assert_not_invalidated();
-      return &m_deque->get(m_physical_idx);
-    }
-
-    // ForwardIterator (required for RandomAccessIterator).
-    Iterator operator++(int) {
-      assert_not_invalidated();
-      Iterator ret = *this;
-      ++m_physical_idx;
-      return ret;
-    }
-
-    // BidirectionalIterator (required for RandomAccessIterator).
-    Iterator &operator--() {
-      assert_not_invalidated();
-      --m_physical_idx;
-      return *this;
-    }
-    Iterator operator--(int) {
-      assert_not_invalidated();
-      Iterator ret = *this;
-      --m_physical_idx;
-      return ret;
-    }
-
-    // RandomAccessIterator.
-    Iterator &operator+=(difference_type diff) {
-      assert_not_invalidated();
-      m_physical_idx += diff;
-      return *this;
-    }
-
-    Iterator &operator-=(difference_type diff) {
-      assert_not_invalidated();
-      m_physical_idx -= diff;
-      return *this;
-    }
-
-    Iterator operator+(difference_type offset) const {
-      assert_not_invalidated();
-      return Iterator{m_deque, m_physical_idx + offset};
-    }
-
-    Iterator operator-(difference_type offset) const {
-      assert_not_invalidated();
-      return Iterator{m_deque, m_physical_idx - offset};
-    }
-
-    difference_type operator-(const Iterator &other) const {
-      assert_not_invalidated();
-      assert(m_deque == other.m_deque);
-      return m_physical_idx - other.m_physical_idx;
-    }
-
-    Iterator_element_type &operator[](size_t idx) const {
-      return *(*this + idx);
-    }
-
-    bool operator<(const Iterator &other) const {
-      assert_not_invalidated();
-      assert(m_deque == other.m_deque);
-      return m_physical_idx < other.m_physical_idx;
-    }
-
-    bool operator<=(const Iterator &other) const { return !(*this > other); }
-
-    bool operator>(const Iterator &other) const {
-      assert_not_invalidated();
-      assert(m_deque == other.m_deque);
-      return m_physical_idx > other.m_physical_idx;
-    }
-
-    bool operator>=(const Iterator &other) const { return !(*this < other); }
-
-   private:
-    const mem_root_deque *m_deque = nullptr;
-    size_t m_physical_idx = 0;
-#ifndef NDEBUG
-    size_t m_generation = 0;
-#endif
-
-    void assert_not_invalidated() const {
-      assert(m_generation == m_deque->generation());
-    }
-
-    friend class mem_root_deque;
-  };
-
-  using iterator = Iterator<Element_type>;
-  using const_iterator = Iterator<const Element_type>;
-  using reverse_iterator = std::reverse_iterator<iterator>;
-  using reverse_const_iterator = std::reverse_iterator<const_iterator>;
-
-  iterator begin() { return iterator{this, m_begin_idx}; }
-  iterator end() { return iterator{this, m_end_idx}; }
-  reverse_iterator rbegin() { return std::make_reverse_iterator(end()); }
-  reverse_iterator rend() { return std::make_reverse_iterator(begin()); }
-  const_iterator cbegin() { return const_iterator{this, m_begin_idx}; }
-  const_iterator cend() { return const_iterator{this, m_end_idx}; }
-  const_iterator begin() const { return const_iterator{this, m_begin_idx}; }
-  const_iterator end() const { return const_iterator{this, m_end_idx}; }
-  reverse_const_iterator crbegin() const {
-    return std::make_reverse_iterator(end());
-  }
-  reverse_const_iterator crend() const {
-    return std::make_reverse_iterator(begin());
-  }
-  reverse_const_iterator rbegin() const {
-    return std::make_reverse_iterator(end());
-  }
-  reverse_const_iterator rend() const {
-    return std::make_reverse_iterator(begin());
-  }
-
-  size_t size() const { return m_end_idx - m_begin_idx; }
-  bool empty() const { return size() == 0; }
-
-  /**
-    Erase all the elements in the specified range.
-
-    @param first  iterator that points to the first element to remove
-    @param last   iterator that points to the element after the
-                  last one to remove
-    @return an iterator to the first element after the removed range
-  */
-  iterator erase(const_iterator first, const_iterator last) {
-    iterator pos = begin() + (first - cbegin());
-    if (first != last) {
-      iterator new_end = std::move(last, cend(), pos);
-      for (size_t idx = new_end.m_physical_idx; idx != m_end_idx; ++idx) {
-        ::destroy(&get(idx));
-      }
-      m_end_idx = new_end.m_physical_idx;
-    }
-    invalidate_iterators();
-#ifndef NDEBUG
-    pos.m_generation = m_generation;  // Re-validate.
-#endif
-    return pos;
-  }
-
-  /**
-    Removes a single element from the array.
-
-    @param position  iterator that points to the element to remove
-
-    @return an iterator to the first element after the removed range
-  */
-  iterator erase(const_iterator position) {
-    return erase(position, std::next(position));
-  }
-
-  /**
-    Insert an element at a given position.
-    The element is a copy of the given one.
-
-    @param pos    the new element is inserted before the element
-                  at this position
-    @param value  the value of the new element
-    @return an iterator that points to the inserted element
-  */
-  iterator insert(const_iterator pos, const Element_type &value) {
-    difference_type idx = pos - cbegin();
-    push_back(value);
-    std::rotate(begin() + idx, end() - 1, end());
-    invalidate_iterators();
-    return begin() + idx;
-  }
-
-  /**
-    Insert an element at a given position.
-    The element is moved into place.
-
-    @param pos    the new element is inserted before the element
-                  at this position
-    @param value  the value of the new element
-    @return an iterator that points to the inserted element
-  */
-  iterator insert(const_iterator pos, Element_type &&value) {
-    difference_type idx = pos - cbegin();
-    push_back(std::move(value));
-    std::rotate(begin() + idx, end() - 1, end());
-    invalidate_iterators();
-    return begin() + idx;
-  }
-
-  template <class ForwardIt>
-  iterator insert(const_iterator pos, ForwardIt first, ForwardIt last) {
-    difference_type idx = pos - cbegin();
-    for (ForwardIt it = first; it != last; ++it) {
-      push_back(*it);
-    }
-    std::rotate(begin() + idx, end() - (last - first), end());
-    invalidate_iterators();
-    return begin() + idx;
-  }
-
- private:
-  /// Number of elements in each block.
-  static constexpr size_t block_elements = FindElementsPerBlock<Element_type>();
-
-  // A block capable of storing <block_elements> elements. Deliberately
-  // has no constructor, since it wouldn't help any of the code that actually
-  // allocates any blocks (all it would do would be to hinder using
-  // ArrayAlloc when allocating new blocks).
-  struct Block {
-    Element_type *elements;
-
-    bool init(MEM_ROOT *mem_root) {
-      // Use Alloc instead of ArrayAlloc, so that no constructors are called.
-      elements = static_cast<Element_type *>(mem_root->Alloc(
-          block_elements *
-          sizeof(Element_type)));  // NOLINT(bugprone-sizeof-expression)
-      return elements == nullptr;
-    }
-  };
-
-  /// Pointer to the first block. Can be nullptr if there are no elements
-  /// (this makes the constructor cheaper). Stored on the MEM_ROOT,
-  /// and needs no destruction, so just a raw pointer.
-  Block *m_blocks = nullptr;
-
-  /// Physical index to the first valid element.
-  size_t m_begin_idx = 0;
-
-  /// Physical index one past the last valid element. If begin == end,
-  /// the array is empty (and then it doesn't matter what the values are).
-  size_t m_end_idx = 0;
-
-  /// Number of blocks, multiplied by block_elements. (Storing this instead
-  /// of the number of blocks itself makes testing in push_back cheaper.)
-  size_t m_capacity = 0;
-
-  /// Pointer to the MEM_ROOT that we store our blocks and elements on.
-  MEM_ROOT *m_root;
-
-#ifndef NDEBUG
-  /// Incremented each time we make an operation that would invalidate
-  /// iterators. Asserts use this value in debug mode to be able to
-  /// verify that they have not been invalidated. (In optimized mode,
-  /// using an invalidated iterator incurs undefined behavior.)
-  size_t m_generation = 0;
-  void invalidate_iterators() { ++m_generation; }
-#else
-  void invalidate_iterators() {}
-#endif
-
-  /// Adds the first block of elements.
-  bool add_initial_block() {
-    m_blocks = m_root->ArrayAlloc<Block>(1);
-    if (m_blocks == nullptr) {
-      return true;
-    }
-    if (m_blocks[0].init(m_root)) {
-      return true;
-    }
-    m_begin_idx = m_end_idx = block_elements / 2;
-    m_capacity = block_elements;
-    return false;
-  }
-
-  // Not inlined, to get them off of the hot path.
-  bool add_block_back();
-  bool add_block_front();
-
-  size_t num_blocks() const { return m_capacity / block_elements; }
-
-  /// Gets a reference to the memory used to store an element with the given
-  /// physical index, starting from zero. Note that block_elements is always
-  /// a power of two, so the division and modulus operations are cheap.
-  Element_type &get(size_t physical_idx) const {
-    return m_blocks[physical_idx / block_elements]
-        .elements[physical_idx % block_elements];
-  }
-
-#ifndef NDEBUG
-  size_t generation() const { return m_generation; }
-#endif
-};
+    inline iterator end() {
+      return list_.end();
+    }
+
+    inline const_iterator cbegin() const {
+        return list_.cbegin();
+    }
+
+    inline const_iterator cend() const {
+        return list_.cend();
+    }
+
+    inline reverse_iterator rbegin() {
+        return list_.rbegin();
+    }
+
+    inline reverse_iterator rend() {
+        return list_.rend();
+    }
+
+    inline const_reverse_iterator rbegin() const {
+        return list_.rbegin();
+    }
+
+    inline const_reverse_iterator rend() const {
+        return list_.rend();
+    }
+
+    inline T& front() {
+      return list_.front();
+    }
 
-// TODO(sgunders): Consider storing spare blocks at either end to have
-// exponential growth and get true O(1) allocation.
-
-template <class Element_type>
-bool mem_root_deque<Element_type>::add_block_back() {
-  if (m_blocks == nullptr) {
-    return add_initial_block();
-  }
-  Block *new_blocks = m_root->ArrayAlloc<Block>(num_blocks() + 1);
-  if (new_blocks == nullptr) {
-    return true;
-  }
-  memcpy(new_blocks, m_blocks, sizeof(Block) * num_blocks());
-  if (new_blocks[num_blocks()].init(m_root)) {
-    return true;
-  }
-
-  m_blocks = new_blocks;
-  m_capacity += block_elements;
-  return false;
-}
-
-template <class Element_type>
-bool mem_root_deque<Element_type>::add_block_front() {
-  if (m_blocks == nullptr) {
-    if (add_initial_block()) {
-      return true;
-    }
-    if (m_begin_idx == 0) {
-      // Only relevant for very small values of block_elements.
-      m_begin_idx = m_end_idx = 1;
-    }
-    return false;
-  }
-  Block *new_blocks = m_root->ArrayAlloc<Block>(num_blocks() + 1);
-  memcpy(new_blocks + 1, m_blocks, sizeof(Block) * num_blocks());
-  if (new_blocks[0].init(m_root)) {
-    return true;
-  }
-
-  m_blocks = new_blocks;
-  m_begin_idx += block_elements;
-  m_end_idx += block_elements;
-  m_capacity += block_elements;
-  return false;
-}
+    inline const T& front() const {
+      return list_.front();
+    }
+
+    inline T& back() {
+      return list_.back();
+    }
+
+    inline const T& back() const {
+      return list_.back();
+    }
+
+    inline bool empty() const {
+      return size_ == 0;
+    }
+
+    inline size_t size() const {
+      assert(list_.size() == size_);
+      return size_;
+    }
+
+    inline void push_back(const T &value) {
+        list_.push_back(value);
+        ++size_;
+    }
+
+    inline void push_front(const T &value) {
+        list_.push_front(value);
+        ++size_;
+    }
+
+    inline void pop_back() {
+        if (!list_.empty()) {
+            list_.pop_back();
+            --size_;
+        }
+    }
+
+    inline void pop_front() {
+        if (!list_.empty()) {
+            list_.pop_front();
+            --size_;
+        }
+    }
+
+    inline void clear() {
+        list_.clear();
+        size_ = 0;
+    }
+
+    inline typename std::list<T, Mem_root_allocator<T>>::iterator
+      erase(typename std::list<T, Mem_root_allocator<T>>::iterator position) {
+        --size_;
+        return list_.erase(position);
+    }
+
+    inline typename std::list<T, Mem_root_allocator<T>>::iterator
+      erase(typename std::list<T, Mem_root_allocator<T>>::iterator first,
+        typename std::list<T, Mem_root_allocator<T>>::iterator last) {
+        size_ -= std::distance(first, last);
+        return list_.erase(first, last);
+    }
+
+    inline typename std::list<T, Mem_root_allocator<T>>::iterator
+      insert(typename std::list<T, Mem_root_allocator<T>>::iterator position, const T &value) {
+        auto it = list_.insert(position, value);
+        ++size_;
+        return it;
+    }
+
+    inline typename std::list<T, Mem_root_allocator<T>>::iterator
+      insert(typename std::list<T, Mem_root_allocator<T>>::iterator position, size_t count, const T &value) {
+        auto it = list_.insert(position, count, value);
+        size_ += count;
+        return it;
+    }
+
+    template <class InputIt>
+    inline typename std::list<T, Mem_root_allocator<T>>::iterator
+    insert(typename std::list<T, Mem_root_allocator<T>>::const_iterator pos, InputIt first, InputIt last) {
+        auto it = list_.insert(pos, first, last);
+        size_ += std::distance(first, last);
+        return it;
+    }
+
+    inline typename std::list<T, Mem_root_allocator<T>>::iterator find(const T &value) {
+        return std::find(list_.begin(), list_.end(), value);
+    }
+
+    inline bool contains(const T &value) const {
+        return std::find(list_.begin(), list_.end(), value) != list_.end();
+    }
+
+    inline void sort() {
+        list_.sort();
+    }
+
+    inline void reverse() {
+        list_.reverse();
+    }
+
+    inline void merge(mem_root_deque &other) {
+        list_.merge(other.list_);
+        size_ += other.size_;
+        other.size_ = 0;
+    }
+
+    inline void splice(typename std::list<T, Mem_root_allocator<T>>::iterator pos, mem_root_deque &other) {
+        list_.splice(pos, other.list_);
+        size_ += other.size_;
+        other.size_ = 0;
+    }
+
+    inline void splice(typename std::list<T, Mem_root_allocator<T>>::iterator pos,
+        mem_root_deque &other, typename std::list<T, Mem_root_allocator<T>>::iterator it) {
+        list_.splice(pos, other.list_, it);
+        --other.size_;
+        ++size_;
+    }
+
+    inline void unique() {
+        list_.unique();
+    }
+
+    inline void remove(const T &value) {
+        size_ -= std::count(list_.begin(), list_.end(), value);
+        list_.remove(value);
+    }
+
+    inline T& operator[](size_t index) {
+      auto it = list_.begin();
+      std::advance(it, index);  // Advance the iterator by index
+      return *it;  // Return the element at the given index
+    }
+
+    inline const T& operator[](size_t index) const {
+      auto it = list_.cbegin();
+      std::advance(it, index);  // Advance the const iterator by index
+      return *it;  // Return the element at the given index
+    }
+
+    inline mem_root_deque &operator=(const mem_root_deque &arg) = default;
+};
 
 #endif  // MEM_ROOT_DEQUE_H
+
diff --git a/sql/item_func.cc b/sql/item_func.cc
index 983feac6f59c..46f507cc2312 100644
--- a/sql/item_func.cc
+++ b/sql/item_func.cc
@@ -575,9 +575,10 @@ inline bool param_type_uses_non_param_inner(THD *thd, uint arg_count,
           return false;
         if (args[j]->type() == Item::ROW_ITEM)
           arguments[j] = down_cast<Item_row *>(args[j])->element_index(i);
-        else if (args[j]->type() == Item::SUBSELECT_ITEM)
+        else if (args[j]->type() == Item::SUBSELECT_ITEM) {
           arguments[j] = (*down_cast<Item_subselect *>(args[j])
                                ->unit->get_unit_column_types())[i];
+        }
       }
       if (param_type_uses_non_param_inner(thd, arg_count, arguments, def))
         return true;
diff --git a/sql/sql_base.cc b/sql/sql_base.cc
index 5421646b43d3..3a78c8a76eb7 100644
--- a/sql/sql_base.cc
+++ b/sql/sql_base.cc
@@ -9482,7 +9482,8 @@ bool insert_fields(THD *thd, Query_block *query_block, const char *db_name,
         **it = item; /* Replace '*' with the first found item. */
       } else {
         /* Add 'item' to the SELECT list, after the current one. */
-        *it = fields->insert(*it + 1, item);
+        mem_root_deque<Item *>::iterator next_it = ++(*it);
+        *it = fields->insert(next_it, item);
       }
 
       /*
diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc
index 31d015397176..74064eedf3b9 100644
--- a/sql/sql_derived.cc
+++ b/sql/sql_derived.cc
@@ -545,10 +545,9 @@ bool copy_field_info(THD *thd, Item *orig_expr, Item *cloned_expr) {
                  }
                  if (inner_item->type() == Item::FIELD_ITEM) {
                    Item_field *field = down_cast<Item_field *>(inner_item);
-                   if (field_info.push_back(
+                   field_info.push_back(
                            Field_info(context, field->table_ref, depended_from,
-                                      field->cached_table, field->field)))
-                     return true;
+                                      field->cached_table, field->field));
                  }
                  return false;
                }))
diff --git a/sql/sql_executor.cc b/sql/sql_executor.cc
index c0b6d5dee89c..7aa9fdd4d1db 100644
--- a/sql/sql_executor.cc
+++ b/sql/sql_executor.cc
@@ -4345,7 +4345,7 @@ bool change_to_use_tmp_fields(mem_root_deque<Item *> *fields, THD *thd,
         Item_field *new_field = new Item_field(field);
         if (!suv || !new_field) return true;  // Fatal error
         mem_root_deque<Item *> list(thd->mem_root);
-        if (list.push_back(new_field)) return true;
+        list.push_back(new_field);
         if (suv->set_arguments(&list, true)) return true;
         new_item = suv;
       } else
diff --git a/sql/sql_optimizer.cc b/sql/sql_optimizer.cc
index 809975e3ed3b..3424635608de 100644
--- a/sql/sql_optimizer.cc
+++ b/sql/sql_optimizer.cc
@@ -7048,11 +7048,13 @@ static uint get_semi_join_select_list_index(Item_field *item_field) {
   if (emb_sj_nest && emb_sj_nest->is_sj_or_aj_nest()) {
     const mem_root_deque<Item *> &items =
         emb_sj_nest->nested_join->sj_inner_exprs;
-    for (size_t i = 0; i < items.size(); i++) {
-      const Item *sel_item = items[i];
+    size_t i = 0;
+    for (auto it = items.begin(); it != items.end(); ++it) {
+      const Item *sel_item = *it;
       if (sel_item->type() == Item::FIELD_ITEM &&
           down_cast<const Item_field *>(sel_item)->field->eq(item_field->field))
         return i;
+      i++;
     }
   }
   return UINT_MAX;
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index ce3875d8f03f..7d7f7b8ca8a4 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -1304,7 +1304,9 @@ void free_items(Item *item) {
 */
 void cleanup_items(Item *item) {
   DBUG_TRACE;
-  for (; item; item = item->next_free) item->cleanup();
+  for (; item; item = item->next_free) {
+    item->cleanup();
+  }
 }
 
 /**
diff --git a/sql/sql_resolver.cc b/sql/sql_resolver.cc
index a748d680dcd5..acd2bd0ae9d6 100644
--- a/sql/sql_resolver.cc
+++ b/sql/sql_resolver.cc
@@ -5324,6 +5324,7 @@ bool Query_block::resolve_table_value_constructor_values(THD *thd) {
     }
 
     size_t item_index = 0;
+    auto field_it = fields.begin();
     for (auto it = values_row->begin(); it != values_row->end(); ++it) {
       Item *item = *it;
       if ((!item->fixed && item->fix_fields(thd, &*it)) ||
@@ -5356,7 +5357,9 @@ bool Query_block::resolve_table_value_constructor_values(THD *thd) {
         // Make sure to also replace the reference in item_list. In the case
         // where fix_fields transforms an item, it.ref() will only update the
         // reference of values_row.
-        if (first_execution) fields[item_index] = item;
+        if (first_execution) {
+          *field_it = item;
+        }
       } else {
         Item_values_column *column = down_cast<Item_values_column *>(
             GetNthVisibleField(fields, item_index));
@@ -5365,6 +5368,7 @@ bool Query_block::resolve_table_value_constructor_values(THD *thd) {
         column->fixed = true;  // Does not have regular fix_fields()
       }
 
+      field_it++;
       ++item_index;
     }
 
@@ -5561,8 +5565,12 @@ bool Query_block::transform_table_subquery_to_join_with_derived(
 
     // Append inner expressions of decorrelated equalities to the SELECT
     // list. Correct context info of outer expressions.
-    auto it_outer = sj_outer_exprs.begin() + initial_sj_inner_exprs_count;
-    auto it_inner = sj_inner_exprs.begin() + initial_sj_inner_exprs_count;
+    auto it_outer = sj_outer_exprs.begin();
+    auto it_inner = sj_inner_exprs.begin();
+    for (int i = 0; i < initial_sj_inner_exprs_count; i++) {
+      it_outer++;
+      it_inner++;
+    }
     for (int i = 0; it_outer != sj_outer_exprs.end();
          ++it_outer, ++it_inner, ++i) {
       Item *inner = *it_inner;
@@ -6738,12 +6746,13 @@ bool Query_block::nest_derived(THD *thd, Item *join_cond,
                            return tl->join_cond() == join_cond;
                          });
   assert(it != copy_list.end());  // assert that we found it
-  const size_t idx = it - copy_list.begin();
-
-  // Insert back all outer tables to the inner containing the condition.
-  // Normally only one.
-  for (size_t i = 0; i < idx; i++) {
-    jlist.push_front(copy_list[i]);
+  size_t idx = 0;
+  for (auto tmp = copy_list.begin(); tmp != copy_list.end(); ++tmp) {
+    if (it == tmp) {
+      break;
+    }
+    jlist.push_front(*tmp);
+    idx++;
   }
 
   // Insert the derived table and nest it with the outer(s)
@@ -6854,11 +6863,6 @@ bool Query_block::decorrelate_derived_scalar_subquery_pre(
       if (selected_field == nullptr || f->field != selected_field->field) {
         m_added_non_hidden_fields++;
 
-        // If f->hidden, f should be among the hidden fields in 'fields'.
-        assert(std::any_of(fields.cbegin(), fields.cbegin() + first_non_hidden,
-                           [&f](const Item *item) { return f == item; }) ==
-               f->hidden);
-
         Item_field *inner_field;
 
         if (f->hidden) {
diff --git a/sql/sql_update.cc b/sql/sql_update.cc
index df63eb5181af..1951bc172a01 100644
--- a/sql/sql_update.cc
+++ b/sql/sql_update.cc
@@ -2125,7 +2125,8 @@ static bool AddRowIdAsTempTableField(THD *thd, TABLE *table,
   Item_field *ifield = new (thd->mem_root) Item_field(field);
   if (ifield == nullptr) return true;
   ifield->set_nullable(false);
-  return fields->push_back(ifield);
+  fields->push_back(ifield);
+  return false;
 }
 
 /// Stores the current row ID of "table" in the specified field of "tmp_table".
diff --git a/unittest/gunit/CMakeLists.txt b/unittest/gunit/CMakeLists.txt
index 20dab828001e..55017b478f2b 100644
--- a/unittest/gunit/CMakeLists.txt
+++ b/unittest/gunit/CMakeLists.txt
@@ -142,7 +142,6 @@ SET(TESTS
   like_range
   m_string
   mdl
-  mem_root_deque
   mutex_lock
   my_alloc
   my_bitmap

From 1d7fde617f00d200464feb27ef379ad9a5c97e43 Mon Sep 17 00:00:00 2001
From: Venkatesh Prasad <venkatesh.prasad@percona.com>
Date: Mon, 21 Apr 2025 11:41:06 +0530
Subject: [PATCH 4/5] PS-9647: MySQL Perf Improvements

https://perconadev.atlassian.net/browse/PS-9647

***
Resolved conflicts
---
 include/mem_root_deque.h | 39 ---------------------------------------
 1 file changed, 39 deletions(-)

diff --git a/include/mem_root_deque.h b/include/mem_root_deque.h
index a7014069e650..d20716ecf076 100644
--- a/include/mem_root_deque.h
+++ b/include/mem_root_deque.h
@@ -47,47 +47,8 @@ class mem_root_deque {
     using const_reverse_iterator = typename std::list<T, Mem_root_allocator<T>>::const_reverse_iterator;
 
 
-<<<<<<< HEAD
     explicit mem_root_deque(MEM_ROOT *mem_root)
         : list_(Mem_root_allocator<T>(mem_root)), size_(0) {}
-||||||| 2df90d528b6
-  // Move constructor and assignment.
-  mem_root_deque(mem_root_deque &&other)
-      : m_blocks(other.m_blocks),
-        m_begin_idx(other.m_begin_idx),
-        m_end_idx(other.m_end_idx),
-        m_root(other.m_root) {
-    other.m_blocks = nullptr;
-    other.m_begin_idx = other.m_end_idx = other.m_capacity = 0;
-    other.invalidate_iterators();
-  }
-  mem_root_deque &operator=(mem_root_deque &&other) {
-    if (this != &other) {
-      this->~mem_root_deque();
-      new (this) mem_root_deque(std::move(other));
-    }
-    return *this;
-  }
-=======
-  // Move constructor and assignment.
-  mem_root_deque(mem_root_deque &&other)
-      : m_blocks(other.m_blocks),
-        m_begin_idx(other.m_begin_idx),
-        m_end_idx(other.m_end_idx),
-        m_capacity(other.m_capacity),
-        m_root(other.m_root) {
-    other.m_blocks = nullptr;
-    other.m_begin_idx = other.m_end_idx = other.m_capacity = 0;
-    other.invalidate_iterators();
-  }
-  mem_root_deque &operator=(mem_root_deque &&other) {
-    if (this != &other) {
-      this->~mem_root_deque();
-      new (this) mem_root_deque(std::move(other));
-    }
-    return *this;
-  }
->>>>>>> percona/8.0
 
     mem_root_deque(typename std::list<T, Mem_root_allocator<T>>::size_type count, const T &value, MEM_ROOT *mem_root)
         : list_(count, value, Mem_root_allocator<T>(mem_root)), size_(count) {}

From 3904082a42d2c664e2b5635a1914df8c7bf1de80 Mon Sep 17 00:00:00 2001
From: Venkatesh Prasad <venkatesh.prasad@percona.com>
Date: Wed, 26 Mar 2025 17:19:59 +0530
Subject: [PATCH 5/5] PS-9647: MySQL Perf Improvements

https://perconadev.atlassian.net/browse/PS-9647

***
Resurrect mem_root_deque-t.cc unit test.
---
 include/mem_root_deque.h           | 384 ++++++++++++++---------------
 unittest/gunit/CMakeLists.txt      |   1 +
 unittest/gunit/mem_root_deque-t.cc |  27 +-
 3 files changed, 208 insertions(+), 204 deletions(-)

diff --git a/include/mem_root_deque.h b/include/mem_root_deque.h
index d20716ecf076..468c9588c478 100644
--- a/include/mem_root_deque.h
+++ b/include/mem_root_deque.h
@@ -24,9 +24,9 @@
 #ifndef MEM_ROOT_DEQUE_H
 #define MEM_ROOT_DEQUE_H
 
-#include <list>
-#include <algorithm>
 #include <assert.h>
+#include <algorithm>
+#include <list>
 
 #include "sql/mem_root_allocator.h"
 
@@ -36,232 +36,224 @@
  */
 template <class T>
 class mem_root_deque {
-private:
-    std::list<T, Mem_root_allocator<T>> list_;
-    size_t size_;
+ private:
+  std::list<T, Mem_root_allocator<T>> list_;
+  size_t size_;
 
-public:
-    using iterator = typename std::list<T, Mem_root_allocator<T>>::iterator;
-    using const_iterator = typename std::list<T, Mem_root_allocator<T>>::const_iterator;
-    using reverse_iterator = typename std::list<T, Mem_root_allocator<T>>::reverse_iterator;
-    using const_reverse_iterator = typename std::list<T, Mem_root_allocator<T>>::const_reverse_iterator;
+ public:
+  using iterator = typename std::list<T, Mem_root_allocator<T>>::iterator;
+  using const_iterator =
+      typename std::list<T, Mem_root_allocator<T>>::const_iterator;
+  using reverse_iterator =
+      typename std::list<T, Mem_root_allocator<T>>::reverse_iterator;
+  using const_reverse_iterator =
+      typename std::list<T, Mem_root_allocator<T>>::const_reverse_iterator;
+  using value_type = T;
 
+  explicit mem_root_deque(MEM_ROOT *mem_root)
+      : list_(Mem_root_allocator<T>(mem_root)), size_(0) {}
 
-    explicit mem_root_deque(MEM_ROOT *mem_root)
-        : list_(Mem_root_allocator<T>(mem_root)), size_(0) {}
+  mem_root_deque(typename std::list<T, Mem_root_allocator<T>>::size_type count,
+                 const T &value, MEM_ROOT *mem_root)
+      : list_(count, value, Mem_root_allocator<T>(mem_root)), size_(count) {}
 
-    mem_root_deque(typename std::list<T, Mem_root_allocator<T>>::size_type count, const T &value, MEM_ROOT *mem_root)
-        : list_(count, value, Mem_root_allocator<T>(mem_root)), size_(count) {}
+  mem_root_deque(typename std::list<T, Mem_root_allocator<T>>::size_type count,
+                 MEM_ROOT *mem_root)
+      : list_(count, Mem_root_allocator<T>(mem_root)), size_(count) {}
 
-    mem_root_deque(typename std::list<T, Mem_root_allocator<T>>::size_type count, MEM_ROOT *mem_root)
-        : list_(count, Mem_root_allocator<T>(mem_root)), size_(count) {}
+  template <class InputIt>
+  mem_root_deque(InputIt first, InputIt last, MEM_ROOT *mem_root)
+      : list_(first, last, Mem_root_allocator<T>(mem_root)),
+        size_(std::distance(first, last)) {}
 
-    template <class InputIt>
-    mem_root_deque(InputIt first, InputIt last, MEM_ROOT *mem_root)
-        : list_(first, last, Mem_root_allocator<T>(mem_root)), size_(std::distance(first, last)) {}
+  mem_root_deque(const mem_root_deque &other)
+      : list_(other.list_), size_(other.size_) {}
 
-    mem_root_deque(const mem_root_deque &other)
-        : list_(other.list_), size_(other.size_) {}
+  mem_root_deque(const mem_root_deque &other, MEM_ROOT *mem_root)
+      : list_(other.list_, Mem_root_allocator<T>(mem_root)),
+        size_(other.size_) {}
 
-    mem_root_deque(const mem_root_deque &other, MEM_ROOT *mem_root)
-        : list_(other.list_, Mem_root_allocator<T>(mem_root)), size_(other.size_) {}
+  mem_root_deque(mem_root_deque &&other) noexcept
+      : list_(std::move(other.list_)), size_(other.size_) {
+    other.list_.clear();
+    other.size_ = 0;
+  }
 
-    mem_root_deque(mem_root_deque &&other)
-        : list_(std::move(other.list_)), size_(other.size_) {}
+  mem_root_deque(mem_root_deque &&other, MEM_ROOT *mem_root)
+      : list_(std::move(other.list_), Mem_root_allocator<T>(mem_root)),
+        size_(other.size_) {}
 
-    mem_root_deque(mem_root_deque &&other, MEM_ROOT *mem_root)
-        : list_(std::move(other.list_), Mem_root_allocator<T>(mem_root)), size_(other.size_) {}
+  mem_root_deque(std::initializer_list<T> init, MEM_ROOT *mem_root)
+      : list_(std::move(init), Mem_root_allocator<T>(mem_root)),
+        size_(init.size()) {}
 
-    mem_root_deque(std::initializer_list<T> init, MEM_ROOT *mem_root)
-      : list_(std::move(init), Mem_root_allocator<T>(mem_root)), size_(init.size()) {}
+  inline const_iterator begin() const { return list_.begin(); }
 
-    inline const_iterator begin() const {
-      return list_.begin();
-    }
+  inline const_iterator end() const { return list_.end(); }
 
-    inline const_iterator end() const {
-      return list_.end();
-    }
+  inline iterator begin() { return list_.begin(); }
 
-    inline iterator begin() {
-      return list_.begin();
-    }
+  inline iterator end() { return list_.end(); }
 
-    inline iterator end() {
-      return list_.end();
-    }
+  inline const_iterator cbegin() const { return list_.cbegin(); }
 
-    inline const_iterator cbegin() const {
-        return list_.cbegin();
-    }
-
-    inline const_iterator cend() const {
-        return list_.cend();
-    }
-
-    inline reverse_iterator rbegin() {
-        return list_.rbegin();
-    }
-
-    inline reverse_iterator rend() {
-        return list_.rend();
-    }
+  inline const_iterator cend() const { return list_.cend(); }
 
-    inline const_reverse_iterator rbegin() const {
-        return list_.rbegin();
-    }
+  inline reverse_iterator rbegin() { return list_.rbegin(); }
 
-    inline const_reverse_iterator rend() const {
-        return list_.rend();
-    }
+  inline reverse_iterator rend() { return list_.rend(); }
 
-    inline T& front() {
-      return list_.front();
-    }
+  inline const_reverse_iterator rbegin() const { return list_.rbegin(); }
 
-    inline const T& front() const {
-      return list_.front();
-    }
+  inline const_reverse_iterator rend() const { return list_.rend(); }
 
-    inline T& back() {
-      return list_.back();
-    }
+  inline T &front() { return list_.front(); }
 
-    inline const T& back() const {
-      return list_.back();
-    }
+  inline const T &front() const { return list_.front(); }
 
-    inline bool empty() const {
-      return size_ == 0;
-    }
+  inline T &back() { return list_.back(); }
 
-    inline size_t size() const {
-      assert(list_.size() == size_);
-      return size_;
-    }
+  inline const T &back() const { return list_.back(); }
 
-    inline void push_back(const T &value) {
-        list_.push_back(value);
-        ++size_;
-    }
+  inline bool empty() const { return size_ == 0; }
 
-    inline void push_front(const T &value) {
-        list_.push_front(value);
-        ++size_;
-    }
+  inline size_t size() const {
+    assert(list_.size() == size_);
+    return size_;
+  }
+
+  inline void push_back(const T &value) {
+    list_.push_back(value);
+    ++size_;
+  }
 
-    inline void pop_back() {
-        if (!list_.empty()) {
-            list_.pop_back();
-            --size_;
-        }
-    }
+  inline void push_front(const T &value) {
+    list_.push_front(value);
+    ++size_;
+  }
+
+  inline void pop_back() {
+    if (!list_.empty()) {
+      list_.pop_back();
+      --size_;
+    }
+  }
+
+  inline void pop_front() {
+    if (!list_.empty()) {
+      list_.pop_front();
+      --size_;
+    }
+  }
+
+  inline void clear() {
+    list_.clear();
+    size_ = 0;
+  }
+
+  inline typename std::list<T, Mem_root_allocator<T>>::iterator erase(
+      typename std::list<T, Mem_root_allocator<T>>::iterator position) {
+    --size_;
+    return list_.erase(position);
+  }
 
-    inline void pop_front() {
-        if (!list_.empty()) {
-            list_.pop_front();
-            --size_;
-        }
-    }
+  inline typename std::list<T, Mem_root_allocator<T>>::iterator erase(
+      typename std::list<T, Mem_root_allocator<T>>::iterator first,
+      typename std::list<T, Mem_root_allocator<T>>::iterator last) {
+    size_ -= std::distance(first, last);
+    return list_.erase(first, last);
+  }
 
-    inline void clear() {
-        list_.clear();
-        size_ = 0;
-    }
+  inline typename std::list<T, Mem_root_allocator<T>>::iterator insert(
+      typename std::list<T, Mem_root_allocator<T>>::iterator position,
+      const T &value) {
+    auto it = list_.insert(position, value);
+    ++size_;
+    return it;
+  }
 
-    inline typename std::list<T, Mem_root_allocator<T>>::iterator
-      erase(typename std::list<T, Mem_root_allocator<T>>::iterator position) {
-        --size_;
-        return list_.erase(position);
-    }
+  inline typename std::list<T, Mem_root_allocator<T>>::iterator insert(
+      typename std::list<T, Mem_root_allocator<T>>::iterator position,
+      size_t count, const T &value) {
+    auto it = list_.insert(position, count, value);
+    size_ += count;
+    return it;
+  }
 
-    inline typename std::list<T, Mem_root_allocator<T>>::iterator
-      erase(typename std::list<T, Mem_root_allocator<T>>::iterator first,
-        typename std::list<T, Mem_root_allocator<T>>::iterator last) {
-        size_ -= std::distance(first, last);
-        return list_.erase(first, last);
-    }
+  template <class InputIt>
+  inline typename std::list<T, Mem_root_allocator<T>>::iterator insert(
+      typename std::list<T, Mem_root_allocator<T>>::const_iterator pos,
+      InputIt first, InputIt last) {
+    auto it = list_.insert(pos, first, last);
+    size_ += std::distance(first, last);
+    return it;
+  }
 
-    inline typename std::list<T, Mem_root_allocator<T>>::iterator
-      insert(typename std::list<T, Mem_root_allocator<T>>::iterator position, const T &value) {
-        auto it = list_.insert(position, value);
-        ++size_;
-        return it;
-    }
-
-    inline typename std::list<T, Mem_root_allocator<T>>::iterator
-      insert(typename std::list<T, Mem_root_allocator<T>>::iterator position, size_t count, const T &value) {
-        auto it = list_.insert(position, count, value);
-        size_ += count;
-        return it;
-    }
-
-    template <class InputIt>
-    inline typename std::list<T, Mem_root_allocator<T>>::iterator
-    insert(typename std::list<T, Mem_root_allocator<T>>::const_iterator pos, InputIt first, InputIt last) {
-        auto it = list_.insert(pos, first, last);
-        size_ += std::distance(first, last);
-        return it;
-    }
-
-    inline typename std::list<T, Mem_root_allocator<T>>::iterator find(const T &value) {
-        return std::find(list_.begin(), list_.end(), value);
-    }
-
-    inline bool contains(const T &value) const {
-        return std::find(list_.begin(), list_.end(), value) != list_.end();
-    }
-
-    inline void sort() {
-        list_.sort();
-    }
-
-    inline void reverse() {
-        list_.reverse();
-    }
-
-    inline void merge(mem_root_deque &other) {
-        list_.merge(other.list_);
-        size_ += other.size_;
-        other.size_ = 0;
-    }
-
-    inline void splice(typename std::list<T, Mem_root_allocator<T>>::iterator pos, mem_root_deque &other) {
-        list_.splice(pos, other.list_);
-        size_ += other.size_;
-        other.size_ = 0;
-    }
-
-    inline void splice(typename std::list<T, Mem_root_allocator<T>>::iterator pos,
-        mem_root_deque &other, typename std::list<T, Mem_root_allocator<T>>::iterator it) {
-        list_.splice(pos, other.list_, it);
-        --other.size_;
-        ++size_;
-    }
-
-    inline void unique() {
-        list_.unique();
-    }
-
-    inline void remove(const T &value) {
-        size_ -= std::count(list_.begin(), list_.end(), value);
-        list_.remove(value);
-    }
-
-    inline T& operator[](size_t index) {
-      auto it = list_.begin();
-      std::advance(it, index);  // Advance the iterator by index
-      return *it;  // Return the element at the given index
-    }
-
-    inline const T& operator[](size_t index) const {
-      auto it = list_.cbegin();
-      std::advance(it, index);  // Advance the const iterator by index
-      return *it;  // Return the element at the given index
-    }
-
-    inline mem_root_deque &operator=(const mem_root_deque &arg) = default;
+  inline typename std::list<T, Mem_root_allocator<T>>::iterator find(
+      const T &value) {
+    return std::find(list_.begin(), list_.end(), value);
+  }
+
+  inline bool contains(const T &value) const {
+    return std::find(list_.begin(), list_.end(), value) != list_.end();
+  }
+
+  inline void sort() { list_.sort(); }
+
+  inline void reverse() { list_.reverse(); }
+
+  inline void merge(mem_root_deque &other) {
+    list_.merge(other.list_);
+    size_ += other.size_;
+    other.size_ = 0;
+  }
+
+  inline void splice(typename std::list<T, Mem_root_allocator<T>>::iterator pos,
+                     mem_root_deque &other) {
+    list_.splice(pos, other.list_);
+    size_ += other.size_;
+    other.size_ = 0;
+  }
+
+  inline void splice(
+      typename std::list<T, Mem_root_allocator<T>>::iterator pos,
+      mem_root_deque &other,
+      typename std::list<T, Mem_root_allocator<T>>::iterator it) {
+    list_.splice(pos, other.list_, it);
+    --other.size_;
+    ++size_;
+  }
+
+  inline void unique() { list_.unique(); }
+
+  inline void remove(const T &value) {
+    size_ -= std::count(list_.begin(), list_.end(), value);
+    list_.remove(value);
+  }
+
+  inline T &operator[](size_t index) {
+    auto it = list_.begin();
+    std::advance(it, index);  // Advance the iterator by index
+    return *it;               // Return the element at the given index
+  }
+
+  inline const T &operator[](size_t index) const {
+    auto it = list_.cbegin();
+    std::advance(it, index);  // Advance the const iterator by index
+    return *it;               // Return the element at the given index
+  }
+
+  inline mem_root_deque &operator=(mem_root_deque &&other) noexcept {
+    if (this != &other) {
+      list_ = std::move(other.list_);
+      size_ = other.size_;
+      other.list_.clear();  // Clear the list in the source object
+      other.size_ = 0;      // Leave the source object in an empty state
+    }
+    return *this;
+  }
+
+  inline mem_root_deque &operator=(const mem_root_deque &arg) = default;
 };
 
 #endif  // MEM_ROOT_DEQUE_H
-
diff --git a/unittest/gunit/CMakeLists.txt b/unittest/gunit/CMakeLists.txt
index 8f66807e9b2e..781bcc391985 100644
--- a/unittest/gunit/CMakeLists.txt
+++ b/unittest/gunit/CMakeLists.txt
@@ -142,6 +142,7 @@ SET(TESTS
   like_range
   m_string
   mdl
+  mem_root_deque
   mutex_lock
   my_alloc
   my_bitmap
diff --git a/unittest/gunit/mem_root_deque-t.cc b/unittest/gunit/mem_root_deque-t.cc
index fd3e16415e26..40d41c123de8 100644
--- a/unittest/gunit/mem_root_deque-t.cc
+++ b/unittest/gunit/mem_root_deque-t.cc
@@ -26,6 +26,7 @@
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 #include <deque>
+#include <iterator>
 
 #include "mem_root_deque.h"
 #include "my_alloc.h"
@@ -83,20 +84,30 @@ TEST(MemRootDequeTest, EraseInsert) {
   d.push_back(4);
   d.push_back(5);
 
-  auto it = d.erase(d.begin() + 1, d.begin() + 3);
+  auto it = d.erase(std::next(d.begin(), 1), std::next(d.begin(), 3));
   EXPECT_THAT(d, ElementsAre(1, 4, 5));
 
   int new_elems[] = {200, 300, 400, 500};
   it = d.insert(it, std::begin(new_elems), std::end(new_elems));
   EXPECT_THAT(d, ElementsAre(1, 200, 300, 400, 500, 4, 5));
-  EXPECT_EQ(it, d.begin() + 1);
+  EXPECT_EQ(it, std::next(d.begin(), 1));
 
-  it = d.insert(d.begin() + 3, 350);
+  it = d.insert(std::next(d.begin(), 3), 350);
   EXPECT_THAT(d, ElementsAre(1, 200, 300, 350, 400, 500, 4, 5));
-  EXPECT_EQ(it, d.begin() + 3);
+  EXPECT_EQ(it, std::next(d.begin(), 3));
   EXPECT_EQ(350, *it);
 }
 
+/**
+   In the new implementation, mem_root_deque uses `std::list` internally, which
+   provides bidirectional iterators. Bidirectional iterators do not support
+   operations like `operator-`, which `std::sort` relies on.
+
+   So, we cannot use `std::sort` directly on mem_root_deque.
+
+   Instead, we may directly call mem_root_deque.sort()
+*/
+
 TEST(MemRootDequeTest, Sort) {
   MEM_ROOT mem_root;
   mem_root_deque<std::string> d(&mem_root);
@@ -107,7 +118,7 @@ TEST(MemRootDequeTest, Sort) {
   d.push_back("12345");
   d.push_back("hello");
 
-  std::sort(d.begin(), d.end());
+  d.sort();
 
   EXPECT_THAT(
       d, ElementsAre("12345", "a", "hello", "x", "zzzzzzzzzzzzzzzzzzzzzz"));
@@ -151,13 +162,13 @@ TEST(MemRootDequeTest, Iteration) {
   EXPECT_EQ(1, *it++);
   EXPECT_EQ(2, *it++);
   EXPECT_EQ(4, *++it);
-  it -= 2;
+  std::advance(it, -2);
   EXPECT_EQ(2, *it);
-  it += 2;
+  std::advance(it, 2);
   EXPECT_EQ(4, *it--);
   EXPECT_EQ(3, *it--);
   EXPECT_EQ(1, *--it);
-  EXPECT_EQ(d.end(), it + 5);
+  EXPECT_EQ(d.end(), std::next(it, 5));
 }
 
 TEST(MemRootDequeTest, OperatorArrow) {