Skip to content

Commit 05658fd

Browse files
committed
Revert "[storage/innobase] PS-9214 : Alter table online results in "duplicate key" error on the primary key (only index)."
This reverts commit f9aeac3.
1 parent f285246 commit 05658fd

File tree

3 files changed

+14
-166
lines changed

3 files changed

+14
-166
lines changed

mysql-test/suite/innodb/r/percona_alter_debug.result

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -31,27 +31,3 @@ connection default;
3131
# Cleanup.
3232
SET DEBUG_SYNC= 'RESET';
3333
DROP TABLE t1;
34-
#
35-
# Test for PS-9214 : Alter table online results in "duplicate key"
36-
# error on the primary key (only index).
37-
#
38-
CREATE TABLE t1 (pk CHAR(5) PRIMARY KEY);
39-
INSERT INTO t1 VALUES ('aaaaa'), ('bbbbb'), ('ccccc'), ('ddddd'), ('eeeee');
40-
connect con1, localhost, root,,;
41-
SET DEBUG='+d,ddl_buf_add_two';
42-
SET DEBUG_SYNC='ddl_bulk_inserter_latches_released SIGNAL latches_released WAIT_FOR go';
43-
# Send ALTER TABLE INPLACE which rebuilds table.
44-
ALTER TABLE t1 ENGINE=InnoDB, ALGORITHM=INPLACE;
45-
connection default;
46-
SET DEBUG_SYNC='now WAIT_FOR latches_released';
47-
INSERT INTO t1 VALUES ('ccaaa');
48-
SET DEBUG_SYNC='now SIGNAL go';
49-
connection con1;
50-
# Reap ALTER TABLE
51-
# Before fix it failed with duplicate key error.
52-
SET DEBUG='-d,ddl_buf_add_two';
53-
disconnect con1;
54-
connection default;
55-
# Cleanup.
56-
SET DEBUG_SYNC= 'RESET';
57-
DROP TABLE t1;
Lines changed: 1 addition & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
# Tests in this file use both debug-only facilities as well as debug sync point.
22
--source include/have_debug.inc
33
--source include/have_debug_sync.inc
4-
--enable_connect_log
54

65
--echo #
76
--echo # Test for PS-9144 : Missing rows after ALGORITHM=INPLACE ALTER under
87
--echo # same workload as PS-9092.
98
--echo #
9+
--enable_connect_log
1010
CREATE TABLE t1 (pk CHAR(5) PRIMARY KEY);
1111
INSERT INTO t1 VALUES ('aaaaa'), ('bbbbb'), ('bbbcc'), ('ccccc'), ('ddddd'), ('eeeee');
1212

@@ -41,38 +41,4 @@ SET DEBUG='-d,ddl_buf_add_two';
4141
--echo # Cleanup.
4242
SET DEBUG_SYNC= 'RESET';
4343
DROP TABLE t1;
44-
45-
--echo #
46-
--echo # Test for PS-9214 : Alter table online results in "duplicate key"
47-
--echo # error on the primary key (only index).
48-
--echo #
49-
CREATE TABLE t1 (pk CHAR(5) PRIMARY KEY);
50-
INSERT INTO t1 VALUES ('aaaaa'), ('bbbbb'), ('ccccc'), ('ddddd'), ('eeeee');
51-
52-
--connect (con1, localhost, root,,)
53-
SET DEBUG='+d,ddl_buf_add_two';
54-
SET DEBUG_SYNC='ddl_bulk_inserter_latches_released SIGNAL latches_released WAIT_FOR go';
55-
--echo # Send ALTER TABLE INPLACE which rebuilds table.
56-
--send ALTER TABLE t1 ENGINE=InnoDB, ALGORITHM=INPLACE
57-
58-
--connection default
59-
SET DEBUG_SYNC='now WAIT_FOR latches_released';
60-
INSERT INTO t1 VALUES ('ccaaa');
61-
SET DEBUG_SYNC='now SIGNAL go';
62-
63-
--connection con1
64-
--echo # Reap ALTER TABLE
65-
--echo # Before fix it failed with duplicate key error.
66-
--reap
67-
68-
SET DEBUG='-d,ddl_buf_add_two';
69-
70-
--disconnect con1
71-
--source include/wait_until_disconnected.inc
72-
73-
--connection default
74-
--echo # Cleanup.
75-
SET DEBUG_SYNC= 'RESET';
76-
DROP TABLE t1;
77-
7844
--disable_connect_log

storage/innobase/row/row0pread.cc

Lines changed: 13 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -252,13 +252,12 @@ class PCursor {
252252
return m_pcur->get_page_cur();
253253
}
254254

255-
/** Restore from a saved position. Sets position the next user record
256-
in the tree if it is cursor pointing to supremum that was saved.
255+
/** Restore from a saved position.
257256
@return DB_SUCCESS or error code. */
258257
[[nodiscard]] dberr_t restore_from_savepoint() noexcept;
259258

260-
/** Move to the first user rec on the next page. */
261-
[[nodiscard]] dberr_t move_to_next_block_user_rec() noexcept;
259+
/** Move to the first user rec on the restored page. */
260+
[[nodiscard]] dberr_t move_to_user_rec() noexcept;
262261

263262
/** @return true if cursor is after last on page. */
264263
[[nodiscard]] bool is_after_last_on_page() const noexcept {
@@ -278,9 +277,6 @@ class PCursor {
278277
/** Level where the cursor is positioned or need to be positioned in case of
279278
restore. */
280279
size_t m_read_level{};
281-
282-
/** Indicates that savepoint created for cursor corresponds to supremum. */
283-
bool m_supremum_saved{};
284280
};
285281

286282
buf_block_t *Parallel_reader::Scan_ctx::block_get_s_latched(
@@ -298,42 +294,9 @@ buf_block_t *Parallel_reader::Scan_ctx::block_get_s_latched(
298294
}
299295

300296
void PCursor::savepoint() noexcept {
301-
/* The below code will not work correctly in all cases if we are to take
302-
savepoint of cursor pointing to infimum.
303-
304-
If savepoint is taken for such a cursor, in optimistic restore case we
305-
might not detect situation when after mini-trancation commit/latches
306-
release concurrent merge from the previous page or concurrent update of
307-
the last record on the previous page, which doesn't fit into it, has
308-
moved records over infimum (see MySQL bugs #114248 and #115518).
309-
As result scan will revisit these records, which can cause, for example,
310-
ALTER TABLE failing with a duplicate key error.
311-
312-
Luckily, at the moment, savepoint is taken only when cursor points
313-
to a user record (when we do save/restore from Parallel_reader::Ctx::m_f
314-
callback which processes records) or to the supremum (when we do save/
315-
restore from Parallel_reader::m_finish_callback end-of-page callback or
316-
before switching to next page in move_to_next_block()). */
317-
ut_ad(!m_pcur->is_before_first_on_page());
318-
319-
/* If we are taking savepoint for cursor pointing to supremum we are doing
320-
one step back. This is necessary to prevent situation when concurrent
321-
merge from the next page, which happens after we commit mini-transaction/
322-
release latches moves records over supremum to the current page.
323-
In this case the optimistic restore of cursor pointing to supremum will
324-
result in cursor pointing to supremum, which means moved records will
325-
be incorrectly skipped by the scan. */
326-
m_supremum_saved = m_pcur->is_after_last_on_page();
327-
if (m_supremum_saved) {
328-
m_pcur->move_to_prev_on_page();
329-
// Only root page can be empty and we are not supposed to get here
330-
// in this case.
331-
ut_ad(!m_pcur->is_before_first_on_page());
332-
}
297+
/* Store the cursor position on the previous user record on the page. */
298+
m_pcur->move_to_prev_on_page();
333299

334-
/* Thanks to the above we can say that we are saving position of last user
335-
record which have processed/ which corresponds to last key value we have
336-
processed. */
337300
m_pcur->store_position(m_mtr);
338301

339302
m_mtr->commit();
@@ -345,53 +308,17 @@ void PCursor::resume() noexcept {
345308
m_mtr->set_log_mode(MTR_LOG_NO_REDO);
346309

347310
/* Restore position on the record, or its predecessor if the record
348-
was purged meanwhile. In extreme case this can be infimum record of
349-
the first page in the tree. However, this can never be a supremum
350-
record on a page, since we always move cursor one step back when
351-
savepoint() is called for infimum record.
352-
353-
We can be in either of the two situations:
354-
355-
1) We do save/restore from Parallel_reader:::Ctx::m_f callback for
356-
record processing. In this case we saved position of user-record for
357-
which callback was invoked originally, and which we already consider
358-
as processed. In theory, such record is not supposed be purged as our
359-
transaction holds read view on it. But in practice, it doesn't matter
360-
- even if the record was purged then our cursor will still point to
361-
last processed user record in the tree after its restoration.
362-
And Parallel_reader:::Ctx::m_f callback execution is always followed by
363-
page_cur_move_to_next() call, which moves our cursor to the next record
364-
and this record has not been processed yet.
365-
366-
2) We do save/restore from the Parallel_reader::m_finish_callback
367-
end-of-page callback or from move_to_next_block() before switching
368-
the page. In this case our cursor was positioned on supremum record
369-
before savepoint() call.
370-
Th savepoint() call raised m_supremum_saved flag and saved the position
371-
of the user record which preceded the supremum, i.e. the last user record
372-
which was processed.
373-
After the below call to restore_position() the cursor will point to
374-
the latter record, or, if it has been purged meanwhile, its closest
375-
non-purged predecessor (in extreme case this can be infimum of the first
376-
page in the tree!).
377-
By moving to the successor of the saved record we position the cursor
378-
either to supremum record (which means we restored original cursor
379-
position and can continue switch to the next page as usual) or to
380-
some user record which our scan have not processed yet (for example,
381-
this record might have been moved from the next page due to page
382-
merge or simply inserted to our page concurrently).
383-
The latter case is detected by caller by doing !is_after_last_on_page()
384-
check and instead of doing switch to the next page we continue processing
385-
from the restored user record. */
311+
was purged meanwhile. */
386312

387313
m_pcur->restore_position(BTR_SEARCH_LEAF, m_mtr, UT_LOCATION_HERE);
388314

389-
ut_ad(!m_pcur->is_after_last_on_page());
390-
391-
if (m_supremum_saved) m_pcur->move_to_next_on_page();
315+
if (!m_pcur->is_after_last_on_page()) {
316+
/* Move to the successor of the saved record. */
317+
m_pcur->move_to_next_on_page();
318+
}
392319
}
393320

394-
dberr_t PCursor::move_to_next_block_user_rec() noexcept {
321+
dberr_t PCursor::move_to_user_rec() noexcept {
395322
auto cur = m_pcur->get_page_cur();
396323
const auto next_page_no = btr_page_get_next(page_cur_get_page(cur), m_mtr);
397324

@@ -444,28 +371,7 @@ dberr_t PCursor::move_to_next_block_user_rec() noexcept {
444371

445372
dberr_t PCursor::restore_from_savepoint() noexcept {
446373
resume();
447-
448-
/* We should not get cursor pointing to supremum when restoring cursor
449-
which has pointed to user record from the Parallel_reader:::Ctx::m_f
450-
callback. Otherwise the below call to move_to_next_block_user_rec()
451-
will position the cursor to the first user record on the next page
452-
and then calling code will do page_cur_move_to_next() right after
453-
that. Which means that one record will be incorrectly skipped by
454-
our scan. Luckily, this always holds with current code. */
455-
ut_ad(m_supremum_saved || !m_pcur->is_after_last_on_page());
456-
457-
/* Move to the next user record in the index tree if we are at supremum.
458-
Even though this looks awkward from the proper layering point of view,
459-
and ideally should have been done on the same code level as iteration
460-
over records, doing this here, actually, a) allows to avoid scenario in
461-
which savepoint/mini-transaction commit and latch release/restore happen
462-
for the same page twice - first time in page-end callback and then in
463-
move_to_next_block() call, b) handles a hypotetical situation when
464-
the index was reduced to the empty root page while latches were released
465-
by detecting it and bailing out early (it is important for us to bail out
466-
and not try to call savepoint() method in this case). */
467-
return (!m_pcur->is_after_last_on_page()) ? DB_SUCCESS
468-
: move_to_next_block_user_rec();
374+
return m_pcur->is_on_user_rec() ? DB_SUCCESS : move_to_user_rec();
469375
}
470376

471377
dberr_t Parallel_reader::Thread_ctx::restore_from_savepoint() noexcept {
@@ -497,7 +403,7 @@ dberr_t PCursor::move_to_next_block(dict_index_t *index) {
497403

498404
err = restore_from_savepoint();
499405
} else {
500-
err = move_to_next_block_user_rec();
406+
err = move_to_user_rec();
501407
}
502408

503409
int n_retries [[maybe_unused]] = 0;

0 commit comments

Comments
 (0)