PS-9391 Fixes replication break error because of HASH_SCAN

VarunNagaraju · VarunNagaraju · commit 1f26d0806136 · 2025-01-20T15:17:13.000+05:30
https://perconadev.atlassian.net/browse/PS-9391 Problem ======= When a replica's slave_rows_search_algorithms is set to HASH_SCAN, the replication may break with HA_ERR_KEY_NOT_FOUND. Analysis ======== When a replica's slave_rows_search_algorithms is set to HASH_SCAN, it prepares a unique key list for all the rows in a particular Row_event. The same unique key list will later be used to retrieve all tuples associated to each key in the list from storage engine. In the case of multiple updates targeted at the same row like how it is shown in the testcase, it may happen that this unique key list filled with entries which don't exist yet in the table. This is a problem when there is an intermediate update which changes the value of the index column to a lesser value than the original entry and that changed value is used in another update as shown in the second part of the testcase. It is an issue because the unique key list is a std::set which internally sorts it's entries. When this sorting happens, the first entry of the list could potentially be a value which doesn't exist in the table and the when it is searched in next_record_scan() method, it fails returning HA_ERR_KEY_NOT_FOUND error. Solution ======== Instead of using std::set to store the distinct keys, a combination of unordered_set and a list is used to preserve the original order of updates and avoid duplicates at the same time which prevents the side effects of sorting.
diff --git a/mysql-test/suite/rpl/r/rpl_row_multi_update_of_same_row_sort_before_apply.result b/mysql-test/suite/rpl/r/rpl_row_multi_update_of_same_row_sort_before_apply.result
@@ -0,0 +1,55 @@
+include/master-slave.inc
+Warnings:
+Note	####	Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note	####	Storing MySQL user name or password information in the connection metadata repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START REPLICA; see the 'START REPLICA Syntax' in the MySQL Manual for more information.
+[connection master]
+[connection slave]
+set @prior_slave_rows_search_algorithms=@@global.slave_rows_search_algorithms;
+Warnings:
+Warning	1287	'@@slave_rows_search_algorithms' is deprecated and will be removed in a future release.
+set @@global.slave_rows_search_algorithms= 'HASH_SCAN';
+Warnings:
+Warning	1287	'@@slave_rows_search_algorithms' is deprecated and will be removed in a future release.
+[connection master]
+CREATE TABLE t1 (i INT, INDEX t1_i(i));
+CREATE FUNCTION f1 () RETURNS INT BEGIN
+UPDATE t1 SET i = 11 WHERE i = 10;
+UPDATE t1 SET i = 12 WHERE i = 11;
+RETURN 0;
+END|
+include/sync_slave_sql_with_master.inc
+[connection master]
+INSERT INTO t1 VALUES (10);
+SELECT f1();
+f1()
+0
+include/sync_slave_sql_with_master.inc
+include/assert.inc ['There is only one row in table t1']
+[connection master]
+include/diff_tables.inc [master:test.t1, slave:test.t1]
+CREATE FUNCTION f2 () RETURNS INT BEGIN
+UPDATE t1 SET i = 11 WHERE i = 12;
+UPDATE t1 SET i = 13 WHERE i = 11;
+RETURN 0;
+END|
+include/sync_slave_sql_with_master.inc
+[connection master]
+SELECT f2();
+f2()
+0
+include/sync_slave_sql_with_master.inc
+include/assert.inc ['There is only one row in table t1']
+[connection master]
+include/diff_tables.inc [master:test.t1, slave:test.t1]
+[connection master]
+SELECT * FROM t1;
+i
+13
+DROP FUNCTION f1;
+DROP FUNCTION f2;
+DROP TABLE t1;
+[connection slave]
+set @@global.slave_rows_search_algorithms= @prior_slave_rows_search_algorithms;
+Warnings:
+Warning	1287	'@@slave_rows_search_algorithms' is deprecated and will be removed in a future release.
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/t/rpl_row_multi_update_of_same_row_sort_before_apply.test b/mysql-test/suite/rpl/t/rpl_row_multi_update_of_same_row_sort_before_apply.test
@@ -0,0 +1,84 @@
+# Bug#115741 Test whether setting slave_rows_search_algorithms to HASH_SCAN works properly
+# when multiple updates are targeted to the same row within a single update rows log event
+
+--source include/have_binlog_format_row.inc
+--source include/set_privilege_checks_user_as_system_user.inc
+--source include/master-slave.inc
+
+--source include/rpl_connection_slave.inc
+set @prior_slave_rows_search_algorithms=@@global.slave_rows_search_algorithms;
+set @@global.slave_rows_search_algorithms= 'HASH_SCAN';
+
+# Scenarios considered in this test case involve a table with no primary key but an index on the column
+# When slave_rows_search_algorithms is set to HASH_SCAN, a list "m_distinct_keys" is created internally
+# to store the unique key values which, later will be used to perform a table scan to apply the events
+# for the rows present in the table.
+--source include/rpl_connection_master.inc
+# Create a table, with no primary key and an index.
+CREATE TABLE t1 (i INT, INDEX t1_i(i));
+
+# Scenario no.1
+# Sorting the elements of m_distinct_keys doesn't alter the original order of updates
+# Create a stored function so that only one Update_rows_log_event is generated.
+--delimiter |
+CREATE FUNCTION f1 () RETURNS INT BEGIN
+  UPDATE t1 SET i = 11 WHERE i = 10;
+  UPDATE t1 SET i = 12 WHERE i = 11;
+  RETURN 0;
+END|
+--delimiter ;
+--source include/sync_slave_sql_with_master.inc
+
+--source include/rpl_connection_master.inc
+INSERT INTO t1 VALUES (10);
+SELECT f1();
+
+--source include/sync_slave_sql_with_master.inc
+--let $assert_text= 'There is only one row in table t1'
+--let $assert_cond= [SELECT COUNT(*) FROM t1] = 1
+--source include/assert.inc
+
+--source include/rpl_connection_master.inc
+
+# Verify that there is no difference between tables of master and slave.
+--let diff_tables=master:test.t1, slave:test.t1
+--source include/diff_tables.inc
+
+
+# Scenario no.2
+# Sorting the elements of m_distinct_keys ALTERs the original order of updates causing
+# the case where the key value from the list doesn't exist in the table yet until other
+# UPDATEs are executed.
+--delimiter |
+CREATE FUNCTION f2 () RETURNS INT BEGIN
+  UPDATE t1 SET i = 11 WHERE i = 12;
+  UPDATE t1 SET i = 13 WHERE i = 11;
+  RETURN 0;
+END|
+--delimiter ;
+--source include/sync_slave_sql_with_master.inc
+
+--source include/rpl_connection_master.inc
+SELECT f2();
+
+--source include/sync_slave_sql_with_master.inc
+--let $assert_text= 'There is only one row in table t1'
+--let $assert_cond= [SELECT COUNT(*) FROM t1] = 1
+--source include/assert.inc
+
+--source include/rpl_connection_master.inc
+
+# Verify that there is no difference between tables of master and slave.
+--let diff_tables=master:test.t1, slave:test.t1
+--source include/diff_tables.inc
+
+# Cleanup
+--source include/rpl_connection_master.inc
+SELECT * FROM t1;
+DROP FUNCTION f1;
+DROP FUNCTION f2;
+DROP TABLE t1;
+
+--source include/rpl_connection_slave.inc
+set @@global.slave_rows_search_algorithms= @prior_slave_rows_search_algorithms;
+--source include/rpl_end.inc
diff --git a/sql/log_event.cc b/sql/log_event.cc
@@ -7870,7 +7870,7 @@ Rows_log_event::Rows_log_event(THD *thd_arg, TABLE *tbl_arg,
       m_curr_row_end(nullptr),
       m_key(nullptr),
       m_key_info(nullptr),
-      m_distinct_keys(Key_compare(&m_key_info)),
+      m_distinct_keys(0, Key_hash(&m_key_info), Key_equal(&m_key_info)),
       m_distinct_key_spare_buf(nullptr) {
   DBUG_TRACE;
   common_header->type_code = event_type;
@@ -7961,7 +7961,7 @@ Rows_log_event::Rows_log_event(
       m_curr_row_end(nullptr),
       m_key(nullptr),
       m_key_info(nullptr),
-      m_distinct_keys(Key_compare(&m_key_info)),
+      m_distinct_keys(0, Key_hash(&m_key_info), Key_equal(&m_key_info)),
       m_distinct_key_spare_buf(nullptr)
 #endif
 {
@@ -9052,9 +9052,9 @@ int Rows_log_event::next_record_scan(bool first_read) {
           marker according to the next key value that we have in the list.
          */
         if (error) {
-          if (m_itr != m_distinct_keys.end()) {
-            m_key = *m_itr;
-            m_itr++;
+          if (m_distinct_key_idx != m_distinct_keys_original_order.size()) {
+            m_key = m_distinct_keys_original_order[m_distinct_key_idx];
+            m_distinct_key_idx++;
             first_read = true;
           } else {
             if (!is_trx_retryable_upon_engine_error(error))
@@ -9088,14 +9088,13 @@ int Rows_log_event::open_record_scan() {
 
   if (m_key_index < MAX_KEY) {
     if (m_rows_lookup_algorithm == ROW_LOOKUP_HASH_SCAN) {
-      /* initialize the iterator over the list of distinct keys that we have */
-      m_itr = m_distinct_keys.begin();
-
-      /* get the first element from the list of keys and increment the
-         iterator
+      /* Initialize the index to go over the vector of distinct keys */
+      m_distinct_key_idx = 0;
+      /* get the first element from the vector of keys and increment the
+         index
        */
-      m_key = *m_itr;
-      m_itr++;
+      m_key = m_distinct_keys_original_order[m_distinct_key_idx];
+      m_distinct_key_idx++;
     } else {
       /* this is an INDEX_SCAN we need to store the key in m_key */
       assert((m_rows_lookup_algorithm == ROW_LOOKUP_INDEX_SCAN) && m_key);
@@ -9142,9 +9141,9 @@ int Rows_log_event::add_key_to_distinct_keyset() {
   DBUG_TRACE;
   assert(m_key_index < MAX_KEY);
   key_copy(m_distinct_key_spare_buf, m_table->record[0], m_key_info, 0);
-  std::pair<std::set<uchar *, Key_compare>::iterator, bool> ret =
-      m_distinct_keys.insert(m_distinct_key_spare_buf);
+  auto ret = m_distinct_keys.insert(m_distinct_key_spare_buf);
   if (ret.second) {
+    m_distinct_keys_original_order.push_back(m_distinct_key_spare_buf);
     /* Insert is successful, so allocate a new buffer for next key */
     m_distinct_key_spare_buf = (uchar *)thd->alloc(m_key_info->key_length);
     if (!m_distinct_key_spare_buf) {
diff --git a/sql/log_event.h b/sql/log_event.h
@@ -39,9 +39,9 @@
 #include <functional>
 #include <list>
 #include <map>
-#include <set>
 #include <string>
 #include <string_view>
+#include <unordered_set>
 
 #include "my_aes.h"
 #include "libbinlogevents/include/binlog_event.h"
@@ -2882,30 +2882,51 @@ class Rows_log_event : public virtual binary_log::Rows_event, public Log_event {
   uchar *m_key;                /* Buffer to keep key value during searches */
   uint m_key_index;
   KEY *m_key_info; /* Points to description of index #m_key_index */
-  class Key_compare {
+  class Key_equal {
    public:
     /**
        @param  ki  Where to find KEY description
        @note m_distinct_keys is instantiated when Rows_log_event is constructed;
-       it stores a Key_compare object internally. However at that moment, the
+       it stores a Key_equal object internally. However at that moment, the
        index (KEY*) to use for comparisons, is not yet known. So, at
-       instantiation, we indicate the Key_compare the place where it can
+       instantiation, we indicate the Key_equal the place where it can
        find the KEY* when needed (this place is Rows_log_event::m_key_info),
-       Key_compare remembers the place in member m_key_info.
+       Key_equal remembers the place in member m_key_info.
        Before we need to do comparisons - i.e. before we need to insert
        elements, we update Rows_log_event::m_key_info once for all.
     */
-    Key_compare(KEY **ki = nullptr) : m_key_info(ki) {}
+    Key_equal(KEY **ki = nullptr) : m_key_info(ki) {}
     bool operator()(uchar *k1, uchar *k2) const {
       return key_cmp2((*m_key_info)->key_part, k1, (*m_key_info)->key_length,
-                      k2, (*m_key_info)->key_length) < 0;
+                      k2, (*m_key_info)->key_length) == 0;
     }
 
    private:
     KEY **m_key_info;
   };
-  std::set<uchar *, Key_compare> m_distinct_keys;
-  std::set<uchar *, Key_compare>::iterator m_itr;
+  class Key_hash {
+   public:
+    Key_hash(KEY **ki = nullptr) : m_key_info(ki) {}
+    size_t operator()(uchar *ptr) const {
+      size_t hash = 0;
+      if (ptr) {
+        std::string_view sv{reinterpret_cast<const char *>(ptr),
+                            (*m_key_info)->key_length};
+        return std::hash<std::string_view>{}(sv);
+      }
+      return hash;
+    }
+
+   private:
+    KEY **m_key_info;
+  };
+  std::unordered_set<uchar *, Key_hash, Key_equal> m_distinct_keys;
+
+  /**
+    A linear list to store the distinct keys preserving the insert order
+  */
+  std::vector<uchar *> m_distinct_keys_original_order;
+  std::size_t m_distinct_key_idx = 0;
   /**
     A spare buffer which will be used when saving the distinct keys
     for doing an index scan with HASH_SCAN search algorithm.