Skip to content

Commit 58c71ff

Browse files
committed
BUG#37331118 Applier prepare error retry [compare_error]
The replication applier normally retries temporary errors occurring while applying transactions. Such retry logic is not performed for transactions containing row events where the STMT_END_F flag is missing. For such events, the statement will instead be committed as an additional step while applying the subsequent COMMIT query event when it is detected that there are still locked tables. When committing the statement, temporary errors are not handled properly. This second patch fixes the handling of statement commit failure to compare the returned error code in the same way as other failures. This means that rather than attempting to commit the entire transaction (and encountering an error indicating it has been rolled back), the applier will now report the error and properly retry it as many times as configured by the transaction retry settings. In addition, the failure analysis capabilities are improved by extending the error reporting to include all the warnings from the failed statement commit, this is done in the same format and style as when failing to apply row log events. The effect of this change can be seen in the test implemented by the prior patch: - the error code that stops replication is now the error from failing to commit the statement. Its also the same error code as when same error occurs without missing STMT_END_F. - the transaction retry attempt counter shows that the transaction was retried several times. Each retry attempt output two lines to the MySQL Server log files, first line indicates that error occured when committing statement and includes all warnings, the second line only contains the actual error: <date> [Warning] [MY-010584] [Repl] Replica SQL for channel '': Worker 1 failed executing transaction 'ANONYMOUS' at source log binlog.000002, end_log_pos 711; Error in cleaning up after an event preceding the commit; Got temporary error 266 'Time-out in NDB, probably caused by deadlock' from NDB, Error_code: 1297; Lock wait timeout exceeded; try restarting transaction, Error_code: 1205; Got error 146 - 'Lock timed out; Retry transaction' during COMMIT, Error_code: 1180; the group log file/position: binlog.000002 0, Error_code: MY-000001 <date> [Warning] [MY-010584] [Repl] Replica SQL for channel '': Worker 1 failed executing transaction 'ANONYMOUS' at source log binlog.000002, end_log_pos 711; Error 'Lock wait timeout exceeded; try restarting transaction' on query. Default database: ''. Query: 'COMMIT', Error_code: MY-001205 Change-Id: I47337b8ae0841976219f70aa94d2861dd7b1ed6f
1 parent ac6d59c commit 58c71ff

File tree

3 files changed

+23
-11
lines changed

3 files changed

+23
-11
lines changed

mysql-test/suite/ndb_rpl/r/mta_prepare_error_retry.result

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,17 +79,17 @@ CHANGE REPLICATION SOURCE TO
7979
IGNORE_SERVER_IDS = (<last_server_id>);
8080
include/start_slave.inc
8181
# Wait for replication error
82-
include/wait_for_slave_sql_error.inc [errno=1180]
82+
include/wait_for_slave_sql_error.inc [errno=1205]
8383
SELECT LAST_ERROR_NUMBER, LAST_ERROR_MESSAGE,
8484
APPLYING_TRANSACTION_RETRIES_COUNT
8585
FROM performance_schema.replication_applier_status_by_worker
8686
WHERE LAST_ERROR_NUMBER != 0;
87-
LAST_ERROR_NUMBER 1180
88-
LAST_ERROR_MESSAGE Worker NNN failed executing transaction 'ANONYMOUS' at source log master-bin.000002, end_log_pos NNN; Error 'Got error 125 - 'Transaction has been rolled back' during COMMIT' on query. Default database: ''. Query: 'COMMIT'
89-
APPLYING_TRANSACTION_RETRIES_COUNT 0
87+
LAST_ERROR_NUMBER 1205
88+
LAST_ERROR_MESSAGE Worker NNN failed executing transaction 'ANONYMOUS' at source log master-bin.000002, end_log_pos NNN; Lock wait timeout exceeded; try restarting transaction
89+
APPLYING_TRANSACTION_RETRIES_COUNT 3
9090
SELECT LAST_ERROR_NUMBER, LAST_ERROR_MESSAGE
9191
FROM performance_schema.replication_applier_status_by_coordinator;
92-
LAST_ERROR_NUMBER 1180
92+
LAST_ERROR_NUMBER 1205
9393
LAST_ERROR_MESSAGE Coordinator stopped because there were error(s) in the worker(s). The most recent failure being: Worker NNN failed executing transaction 'ANONYMOUS' at source log master-bin.000002, end_log_pos NNN. See error log and/or performance_schema.replication_applier_status_by_worker table for more details about this failure or others, if any.
9494
SELECT id, what FROM test_multi_server_id ORDER BY id;
9595
id what

mysql-test/suite/ndb_rpl/t/mta_prepare_error_retry.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ eval CHANGE REPLICATION SOURCE TO
123123
--source include/start_slave.inc
124124

125125
--echo # Wait for replication error
126-
let $slave_sql_errno= 1180;
126+
let $slave_sql_errno= 1205;
127127
--source include/wait_for_slave_sql_error.inc
128128

129129
--replace_regex /end_log_pos [0-9]*/end_log_pos NNN/ /Worker [0-9]* failed/Worker NNN failed/

sql/log_event.cc

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4452,7 +4452,9 @@ static bool is_silent_error(THD *thd) {
44524452
int Query_log_event::do_apply_event(Relay_log_info const *rli,
44534453
const char *query_arg, size_t q_len_arg) {
44544454
DBUG_TRACE;
4455-
int expected_error, actual_error = 0;
4455+
DBUG_EXECUTE_IF("simulate_error_in_ddl", error_code = 1051;);
4456+
const int expected_error = error_code;
4457+
int actual_error = 0;
44564458
auto post_filters_actions_guard = create_scope_guard(
44574459
[&]() { thd->rpl_thd_ctx.post_filters_actions().clear(); });
44584460

@@ -4531,14 +4533,26 @@ int Query_log_event::do_apply_event(Relay_log_info const *rli,
45314533
char llbuff[22];
45324534
if ((error =
45334535
rows_event_stmt_cleanup(const_cast<Relay_log_info *>(rli), thd))) {
4536+
char buff[MAX_SLAVE_ERRMSG]{0};
4537+
const char *buff_end = buff + sizeof(buff);
4538+
char *slider = buff;
4539+
Diagnostics_area::Sql_condition_iterator it =
4540+
thd->get_stmt_da()->sql_conditions();
4541+
for (const Sql_condition *err = it++;
4542+
err != nullptr && slider < buff_end - 1; err = it++) {
4543+
slider += snprintf(slider, buff_end - slider, " %s, Error_code: %d;",
4544+
err->message_text(), err->mysql_errno());
4545+
}
45344546
const_cast<Relay_log_info *>(rli)->report(
45354547
ERROR_LEVEL, error,
45364548
"Error in cleaning up after an event preceding the commit; "
4537-
"the group log file/position: %s %s",
4549+
"%s the group log file/position: %s %s",
4550+
buff,
45384551
const_cast<Relay_log_info *>(rli)->get_group_master_log_name_info(),
45394552
llstr(const_cast<Relay_log_info *>(rli)
45404553
->get_group_master_log_pos_info(),
45414554
llbuff));
4555+
goto compare_errors;
45424556
}
45434557
/*
45444558
Executing a part of rli->stmt_done() logics that does not deal
@@ -4581,9 +4595,7 @@ int Query_log_event::do_apply_event(Relay_log_info const *rli,
45814595
thd->set_query_id(next_query_id());
45824596
DBUG_PRINT("query", ("%s", thd->query().str));
45834597

4584-
DBUG_EXECUTE_IF("simulate_error_in_ddl", error_code = 1051;);
4585-
4586-
if (ignored_error_code((expected_error = error_code)) ||
4598+
if (ignored_error_code((expected_error)) ||
45874599
!unexpected_error_code(expected_error)) {
45884600
if (flags2_inited)
45894601
/*

0 commit comments

Comments
 (0)