@@ -252,13 +252,12 @@ class PCursor {
252252 return m_pcur->get_page_cur ();
253253 }
254254
255- /* * Restore from a saved position. Sets position the next user record
256- in the tree if it is cursor pointing to supremum that was saved.
255+ /* * Restore from a saved position.
257256 @return DB_SUCCESS or error code. */
258257 [[nodiscard]] dberr_t restore_from_savepoint () noexcept ;
259258
260- /* * Move to the first user rec on the next page. */
261- [[nodiscard]] dberr_t move_to_next_block_user_rec () noexcept ;
259+ /* * Move to the first user rec on the restored page. */
260+ [[nodiscard]] dberr_t move_to_user_rec () noexcept ;
262261
263262 /* * @return true if cursor is after last on page. */
264263 [[nodiscard]] bool is_after_last_on_page () const noexcept {
@@ -278,9 +277,6 @@ class PCursor {
278277 /* * Level where the cursor is positioned or need to be positioned in case of
279278 restore. */
280279 size_t m_read_level{};
281-
282- /* * Indicates that savepoint created for cursor corresponds to supremum. */
283- bool m_supremum_saved{};
284280};
285281
286282buf_block_t *Parallel_reader::Scan_ctx::block_get_s_latched (
@@ -298,42 +294,9 @@ buf_block_t *Parallel_reader::Scan_ctx::block_get_s_latched(
298294}
299295
300296void PCursor::savepoint () noexcept {
301- /* The below code will not work correctly in all cases if we are to take
302- savepoint of cursor pointing to infimum.
303-
304- If savepoint is taken for such a cursor, in optimistic restore case we
305- might not detect situation when after mini-trancation commit/latches
306- release concurrent merge from the previous page or concurrent update of
307- the last record on the previous page, which doesn't fit into it, has
308- moved records over infimum (see MySQL bugs #114248 and #115518).
309- As result scan will revisit these records, which can cause, for example,
310- ALTER TABLE failing with a duplicate key error.
311-
312- Luckily, at the moment, savepoint is taken only when cursor points
313- to a user record (when we do save/restore from Parallel_reader::Ctx::m_f
314- callback which processes records) or to the supremum (when we do save/
315- restore from Parallel_reader::m_finish_callback end-of-page callback or
316- before switching to next page in move_to_next_block()). */
317- ut_ad (!m_pcur->is_before_first_on_page ());
318-
319- /* If we are taking savepoint for cursor pointing to supremum we are doing
320- one step back. This is necessary to prevent situation when concurrent
321- merge from the next page, which happens after we commit mini-transaction/
322- release latches moves records over supremum to the current page.
323- In this case the optimistic restore of cursor pointing to supremum will
324- result in cursor pointing to supremum, which means moved records will
325- be incorrectly skipped by the scan. */
326- m_supremum_saved = m_pcur->is_after_last_on_page ();
327- if (m_supremum_saved) {
328- m_pcur->move_to_prev_on_page ();
329- // Only root page can be empty and we are not supposed to get here
330- // in this case.
331- ut_ad (!m_pcur->is_before_first_on_page ());
332- }
297+ /* Store the cursor position on the previous user record on the page. */
298+ m_pcur->move_to_prev_on_page ();
333299
334- /* Thanks to the above we can say that we are saving position of last user
335- record which have processed/ which corresponds to last key value we have
336- processed. */
337300 m_pcur->store_position (m_mtr);
338301
339302 m_mtr->commit ();
@@ -345,53 +308,17 @@ void PCursor::resume() noexcept {
345308 m_mtr->set_log_mode (MTR_LOG_NO_REDO);
346309
347310 /* Restore position on the record, or its predecessor if the record
348- was purged meanwhile. In extreme case this can be infimum record of
349- the first page in the tree. However, this can never be a supremum
350- record on a page, since we always move cursor one step back when
351- savepoint() is called for infimum record.
352-
353- We can be in either of the two situations:
354-
355- 1) We do save/restore from Parallel_reader:::Ctx::m_f callback for
356- record processing. In this case we saved position of user-record for
357- which callback was invoked originally, and which we already consider
358- as processed. In theory, such record is not supposed be purged as our
359- transaction holds read view on it. But in practice, it doesn't matter
360- - even if the record was purged then our cursor will still point to
361- last processed user record in the tree after its restoration.
362- And Parallel_reader:::Ctx::m_f callback execution is always followed by
363- page_cur_move_to_next() call, which moves our cursor to the next record
364- and this record has not been processed yet.
365-
366- 2) We do save/restore from the Parallel_reader::m_finish_callback
367- end-of-page callback or from move_to_next_block() before switching
368- the page. In this case our cursor was positioned on supremum record
369- before savepoint() call.
370- Th savepoint() call raised m_supremum_saved flag and saved the position
371- of the user record which preceded the supremum, i.e. the last user record
372- which was processed.
373- After the below call to restore_position() the cursor will point to
374- the latter record, or, if it has been purged meanwhile, its closest
375- non-purged predecessor (in extreme case this can be infimum of the first
376- page in the tree!).
377- By moving to the successor of the saved record we position the cursor
378- either to supremum record (which means we restored original cursor
379- position and can continue switch to the next page as usual) or to
380- some user record which our scan have not processed yet (for example,
381- this record might have been moved from the next page due to page
382- merge or simply inserted to our page concurrently).
383- The latter case is detected by caller by doing !is_after_last_on_page()
384- check and instead of doing switch to the next page we continue processing
385- from the restored user record. */
311+ was purged meanwhile. */
386312
387313 m_pcur->restore_position (BTR_SEARCH_LEAF, m_mtr, UT_LOCATION_HERE);
388314
389- ut_ad (!m_pcur->is_after_last_on_page ());
390-
391- if (m_supremum_saved) m_pcur->move_to_next_on_page ();
315+ if (!m_pcur->is_after_last_on_page ()) {
316+ /* Move to the successor of the saved record. */
317+ m_pcur->move_to_next_on_page ();
318+ }
392319}
393320
394- dberr_t PCursor::move_to_next_block_user_rec () noexcept {
321+ dberr_t PCursor::move_to_user_rec () noexcept {
395322 auto cur = m_pcur->get_page_cur ();
396323 const auto next_page_no = btr_page_get_next (page_cur_get_page (cur), m_mtr);
397324
@@ -444,28 +371,7 @@ dberr_t PCursor::move_to_next_block_user_rec() noexcept {
444371
445372dberr_t PCursor::restore_from_savepoint () noexcept {
446373 resume ();
447-
448- /* We should not get cursor pointing to supremum when restoring cursor
449- which has pointed to user record from the Parallel_reader:::Ctx::m_f
450- callback. Otherwise the below call to move_to_next_block_user_rec()
451- will position the cursor to the first user record on the next page
452- and then calling code will do page_cur_move_to_next() right after
453- that. Which means that one record will be incorrectly skipped by
454- our scan. Luckily, this always holds with current code. */
455- ut_ad (m_supremum_saved || !m_pcur->is_after_last_on_page ());
456-
457- /* Move to the next user record in the index tree if we are at supremum.
458- Even though this looks awkward from the proper layering point of view,
459- and ideally should have been done on the same code level as iteration
460- over records, doing this here, actually, a) allows to avoid scenario in
461- which savepoint/mini-transaction commit and latch release/restore happen
462- for the same page twice - first time in page-end callback and then in
463- move_to_next_block() call, b) handles a hypotetical situation when
464- the index was reduced to the empty root page while latches were released
465- by detecting it and bailing out early (it is important for us to bail out
466- and not try to call savepoint() method in this case). */
467- return (!m_pcur->is_after_last_on_page ()) ? DB_SUCCESS
468- : move_to_next_block_user_rec ();
374+ return m_pcur->is_on_user_rec () ? DB_SUCCESS : move_to_user_rec ();
469375}
470376
471377dberr_t Parallel_reader::Thread_ctx::restore_from_savepoint () noexcept {
@@ -497,7 +403,7 @@ dberr_t PCursor::move_to_next_block(dict_index_t *index) {
497403
498404 err = restore_from_savepoint ();
499405 } else {
500- err = move_to_next_block_user_rec ();
406+ err = move_to_user_rec ();
501407 }
502408
503409 int n_retries [[maybe_unused]] = 0 ;
0 commit comments