Skip to content

Commit f3d78be

Browse files
author
Shlomi Noach
authored
Merge pull request #106 from github/remove-two-step-cut-over
Removed legacy 'safe cut-over'
2 parents a75912d + 6dbf5c3 commit f3d78be

File tree

5 files changed

+3
-292
lines changed

5 files changed

+3
-292
lines changed

build.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/bin/bash
22
#
33
#
4-
RELEASE_VERSION="1.0.4"
4+
RELEASE_VERSION="1.0.5"
55

66
buildpath=/tmp/gh-ost
77
target=gh-ost

go/base/context.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ type CutOver int
3333

3434
const (
3535
CutOverAtomic CutOver = iota
36-
CutOverSafe = iota
3736
CutOverTwoStep = iota
3837
)
3938

go/cmd/gh-ost/main.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ func main() {
6666
flag.BoolVar(&migrationContext.OkToDropTable, "ok-to-drop-table", false, "Shall the tool drop the old table at end of operation. DROPping tables can be a long locking operation, which is why I'm not doing it by default. I'm an online tool, yes?")
6767
flag.BoolVar(&migrationContext.InitiallyDropOldTable, "initially-drop-old-table", false, "Drop a possibly existing OLD table (remains from a previous run?) before beginning operation. Default is to panic and abort if such table exists")
6868
flag.BoolVar(&migrationContext.InitiallyDropGhostTable, "initially-drop-ghost-table", false, "Drop a possibly existing Ghost table (remains from a previous run?) before beginning operation. Default is to panic and abort if such table exists")
69-
cutOver := flag.String("cut-over", "atomic", "choose cut-over type (atomic, two-step, voluntary-lock)")
69+
cutOver := flag.String("cut-over", "atomic", "choose cut-over type (default|atomic, two-step)")
7070

7171
flag.BoolVar(&migrationContext.SwitchToRowBinlogFormat, "switch-to-rbr", false, "let this tool automatically switch binary log format to 'ROW' on the replica, if needed. The format will NOT be switched back. I'm too scared to do that, and wish to protect you if you happen to execute another migration while this one is running")
7272
chunkSize := flag.Int64("chunk-size", 1000, "amount of rows to handle in each iteration (allowed range: 100-100,000)")
@@ -149,8 +149,6 @@ func main() {
149149
switch *cutOver {
150150
case "atomic", "default", "":
151151
migrationContext.CutOverType = base.CutOverAtomic
152-
case "safe":
153-
migrationContext.CutOverType = base.CutOverSafe
154152
case "two-step":
155153
migrationContext.CutOverType = base.CutOverTwoStep
156154
default:

go/logic/applier.go

Lines changed: 1 addition & 162 deletions
Original file line numberDiff line numberDiff line change
@@ -488,22 +488,6 @@ func (this *Applier) SwapTablesQuickAndBumpy() error {
488488
return nil
489489
}
490490

491-
// RenameTable makes coffee. No, wait. It renames a table.
492-
func (this *Applier) RenameTable(fromName, toName string) (err error) {
493-
query := fmt.Sprintf(`rename /* gh-ost */ table %s.%s to %s.%s`,
494-
sql.EscapeName(this.migrationContext.DatabaseName),
495-
sql.EscapeName(fromName),
496-
sql.EscapeName(this.migrationContext.DatabaseName),
497-
sql.EscapeName(toName),
498-
)
499-
log.Infof("Renaming %s to %s", fromName, toName)
500-
if _, err := sqlutils.ExecNoPrepare(this.db, query); err != nil {
501-
return log.Errore(err)
502-
}
503-
log.Infof("Table renamed")
504-
return nil
505-
}
506-
507491
// RenameTablesRollback renames back both table: original back to ghost,
508492
// _old back to original. This is used by `--test-on-replica`
509493
func (this *Applier) RenameTablesRollback() (renameError error) {
@@ -603,151 +587,6 @@ func (this *Applier) GetSessionLockName(sessionId int64) string {
603587
return fmt.Sprintf("gh-ost.%d.lock", sessionId)
604588
}
605589

606-
// LockOriginalTableAndWait locks the original table, notifies the lock is in
607-
// place, and awaits further instruction
608-
func (this *Applier) LockOriginalTableAndWait(sessionIdChan chan int64, tableLocked chan<- error, okToUnlockTable <-chan bool, tableUnlocked chan<- error) error {
609-
tx, err := this.db.Begin()
610-
if err != nil {
611-
tableLocked <- err
612-
return err
613-
}
614-
defer func() {
615-
tx.Rollback()
616-
}()
617-
618-
var sessionId int64
619-
if err := tx.QueryRow(`select connection_id()`).Scan(&sessionId); err != nil {
620-
tableLocked <- err
621-
return err
622-
}
623-
sessionIdChan <- sessionId
624-
625-
query := `select get_lock(?, 0)`
626-
lockResult := 0
627-
lockName := this.GetSessionLockName(sessionId)
628-
log.Infof("Grabbing voluntary lock: %s", lockName)
629-
if err := tx.QueryRow(query, lockName).Scan(&lockResult); err != nil || lockResult != 1 {
630-
err := fmt.Errorf("Unable to acquire lock %s", lockName)
631-
tableLocked <- err
632-
return err
633-
}
634-
635-
tableLockTimeoutSeconds := this.migrationContext.CutOverLockTimeoutSeconds * 2
636-
log.Infof("Setting LOCK timeout as %d seconds", tableLockTimeoutSeconds)
637-
query = fmt.Sprintf(`set session lock_wait_timeout:=%d`, tableLockTimeoutSeconds)
638-
if _, err := tx.Exec(query); err != nil {
639-
tableLocked <- err
640-
return err
641-
}
642-
643-
query = fmt.Sprintf(`lock /* gh-ost */ tables %s.%s write`,
644-
sql.EscapeName(this.migrationContext.DatabaseName),
645-
sql.EscapeName(this.migrationContext.OriginalTableName),
646-
)
647-
log.Infof("Locking %s.%s",
648-
sql.EscapeName(this.migrationContext.DatabaseName),
649-
sql.EscapeName(this.migrationContext.OriginalTableName),
650-
)
651-
this.migrationContext.LockTablesStartTime = time.Now()
652-
if _, err := tx.Exec(query); err != nil {
653-
tableLocked <- err
654-
return err
655-
}
656-
log.Infof("Table locked")
657-
tableLocked <- nil // No error.
658-
659-
// The cut-over phase will proceed to apply remaining backlon onto ghost table,
660-
// and issue RENAMEs. We wait here until told to proceed.
661-
<-okToUnlockTable
662-
// Release
663-
query = `unlock tables`
664-
log.Infof("Releasing lock from %s.%s",
665-
sql.EscapeName(this.migrationContext.DatabaseName),
666-
sql.EscapeName(this.migrationContext.OriginalTableName),
667-
)
668-
if _, err := tx.Exec(query); err != nil {
669-
tableUnlocked <- err
670-
return log.Errore(err)
671-
}
672-
log.Infof("Table unlocked")
673-
tableUnlocked <- nil
674-
return nil
675-
}
676-
677-
// RenameOriginalTable will attempt renaming the original table into _old
678-
func (this *Applier) RenameOriginalTable(sessionIdChan chan int64, originalTableRenamed chan<- error) error {
679-
tx, err := this.db.Begin()
680-
if err != nil {
681-
return err
682-
}
683-
defer func() {
684-
tx.Rollback()
685-
originalTableRenamed <- nil
686-
}()
687-
var sessionId int64
688-
if err := tx.QueryRow(`select connection_id()`).Scan(&sessionId); err != nil {
689-
return err
690-
}
691-
sessionIdChan <- sessionId
692-
693-
log.Infof("Setting RENAME timeout as %d seconds", this.migrationContext.CutOverLockTimeoutSeconds)
694-
query := fmt.Sprintf(`set session lock_wait_timeout:=%d`, this.migrationContext.CutOverLockTimeoutSeconds)
695-
if _, err := tx.Exec(query); err != nil {
696-
return err
697-
}
698-
699-
query = fmt.Sprintf(`rename /* gh-ost */ table %s.%s to %s.%s`,
700-
sql.EscapeName(this.migrationContext.DatabaseName),
701-
sql.EscapeName(this.migrationContext.OriginalTableName),
702-
sql.EscapeName(this.migrationContext.DatabaseName),
703-
sql.EscapeName(this.migrationContext.GetOldTableName()),
704-
)
705-
log.Infof("Issuing and expecting this to block: %s", query)
706-
if _, err := tx.Exec(query); err != nil {
707-
return log.Errore(err)
708-
}
709-
log.Infof("Original table renamed")
710-
return nil
711-
}
712-
713-
// RenameGhostTable will attempt renaming the ghost table into original
714-
func (this *Applier) RenameGhostTable(sessionIdChan chan int64, ghostTableRenamed chan<- error) error {
715-
tx, err := this.db.Begin()
716-
if err != nil {
717-
return err
718-
}
719-
defer func() {
720-
tx.Rollback()
721-
}()
722-
var sessionId int64
723-
if err := tx.QueryRow(`select connection_id()`).Scan(&sessionId); err != nil {
724-
return err
725-
}
726-
sessionIdChan <- sessionId
727-
728-
log.Infof("Setting RENAME timeout as %d seconds", this.migrationContext.CutOverLockTimeoutSeconds)
729-
query := fmt.Sprintf(`set session lock_wait_timeout:=%d`, this.migrationContext.CutOverLockTimeoutSeconds)
730-
if _, err := tx.Exec(query); err != nil {
731-
return err
732-
}
733-
734-
query = fmt.Sprintf(`rename /* gh-ost */ table %s.%s to %s.%s`,
735-
sql.EscapeName(this.migrationContext.DatabaseName),
736-
sql.EscapeName(this.migrationContext.GetGhostTableName()),
737-
sql.EscapeName(this.migrationContext.DatabaseName),
738-
sql.EscapeName(this.migrationContext.OriginalTableName),
739-
)
740-
log.Infof("Issuing and expecting this to block: %s", query)
741-
if _, err := tx.Exec(query); err != nil {
742-
ghostTableRenamed <- err
743-
return log.Errore(err)
744-
}
745-
log.Infof("Ghost table renamed")
746-
ghostTableRenamed <- nil
747-
748-
return nil
749-
}
750-
751590
// ExpectUsedLock expects the special hint voluntary lock to exist on given session
752591
func (this *Applier) ExpectUsedLock(sessionId int64) error {
753592
var result int64
@@ -931,7 +770,7 @@ func (this *Applier) AtomicCutOverMagicLock(sessionIdChan chan int64, tableLocke
931770
return nil
932771
}
933772

934-
// RenameOriginalTable will attempt renaming the original table into _old
773+
// AtomicCutoverRename
935774
func (this *Applier) AtomicCutoverRename(sessionIdChan chan int64, tablesRenamed chan<- error) error {
936775
tx, err := this.db.Begin()
937776
if err != nil {

go/logic/migrator.go

Lines changed: 0 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -486,16 +486,6 @@ func (this *Migrator) cutOver() (err error) {
486486
)
487487
return err
488488
}
489-
if this.migrationContext.CutOverType == base.CutOverSafe {
490-
// Lock-based solution: we use low timeout and multiple attempts. But for
491-
// each failed attempt, we throttle until replication lag is back to normal
492-
err := this.retryOperation(
493-
func() error {
494-
return this.executeAndThrottleOnError(this.safeCutOver)
495-
},
496-
)
497-
return err
498-
}
499489
if this.migrationContext.CutOverType == base.CutOverTwoStep {
500490
err := this.retryOperation(
501491
func() error {
@@ -643,121 +633,6 @@ func (this *Migrator) atomicCutOver() (err error) {
643633
return nil
644634
}
645635

646-
// cutOverSafe performs a safe cut over, where normally (no failure) the original table
647-
// is being locked until swapped, hence DML queries being locked and unaware of the cut-over.
648-
// In the worst case, there will ba a minor outage, where the original table would not exist.
649-
func (this *Migrator) safeCutOver() (err error) {
650-
atomic.StoreInt64(&this.inCutOverCriticalActionFlag, 1)
651-
defer atomic.StoreInt64(&this.inCutOverCriticalActionFlag, 0)
652-
653-
okToUnlockTable := make(chan bool, 2)
654-
originalTableRenamed := make(chan error, 1)
655-
var originalTableRenameIntended int64
656-
defer func() {
657-
log.Infof("Checking to see if we need to roll back")
658-
// The following is to make sure we unlock the table no-matter-what!
659-
// There's enough buffer in the channel to support a redundant write here.
660-
okToUnlockTable <- true
661-
if atomic.LoadInt64(&originalTableRenameIntended) == 1 {
662-
log.Infof("Waiting for original table rename result")
663-
// We need to make sure we wait for the original-rename, successful or not,
664-
// so as to be able to rollback in case the ghost-rename fails.
665-
// But we only wait on this queue if there's actually going to be a rename.
666-
// As an example, what happens should the initial `lock tables` fail? We would
667-
// never proceed to rename the table, hence this queue is never written to.
668-
<-originalTableRenamed
669-
}
670-
// Rollback operation
671-
if !this.applier.tableExists(this.migrationContext.OriginalTableName) {
672-
log.Infof("Cannot find %s, rolling back", this.migrationContext.OriginalTableName)
673-
err := this.applier.RenameTable(this.migrationContext.GetOldTableName(), this.migrationContext.OriginalTableName)
674-
log.Errore(err)
675-
} else {
676-
log.Info("No need for rollback")
677-
}
678-
}()
679-
lockOriginalSessionIdChan := make(chan int64, 1)
680-
tableLocked := make(chan error, 1)
681-
tableUnlocked := make(chan error, 1)
682-
go func() {
683-
if err := this.applier.LockOriginalTableAndWait(lockOriginalSessionIdChan, tableLocked, okToUnlockTable, tableUnlocked); err != nil {
684-
log.Errore(err)
685-
}
686-
}()
687-
if err := <-tableLocked; err != nil {
688-
return log.Errore(err)
689-
}
690-
lockOriginalSessionId := <-lockOriginalSessionIdChan
691-
log.Infof("Session locking original table is %+v", lockOriginalSessionId)
692-
// At this point we know the table is locked.
693-
// We know any newly incoming DML on original table is blocked.
694-
this.waitForEventsUpToLock()
695-
696-
// Step 2
697-
// We now attempt a RENAME on the original table, and expect it to block
698-
renameOriginalSessionIdChan := make(chan int64, 1)
699-
this.migrationContext.RenameTablesStartTime = time.Now()
700-
atomic.StoreInt64(&originalTableRenameIntended, 1)
701-
702-
go func() {
703-
this.applier.RenameOriginalTable(renameOriginalSessionIdChan, originalTableRenamed)
704-
}()
705-
renameOriginalSessionId := <-renameOriginalSessionIdChan
706-
log.Infof("Session renaming original table is %+v", renameOriginalSessionId)
707-
708-
if err := this.retryOperation(
709-
func() error {
710-
return this.applier.ExpectProcess(renameOriginalSessionId, "metadata lock", "rename")
711-
}); err != nil {
712-
return err
713-
}
714-
log.Infof("Found RENAME on original table to be blocking, as expected. Double checking original is still being locked")
715-
if err := this.applier.ExpectUsedLock(lockOriginalSessionId); err != nil {
716-
// Abort operation; but make sure to unlock table!
717-
return log.Errore(err)
718-
}
719-
log.Infof("Connection holding lock on original table still exists")
720-
721-
// Now that we've found the RENAME blocking, AND the locking connection still alive,
722-
// we know it is safe to proceed to renaming ghost table.
723-
724-
// Step 3
725-
// We now attempt a RENAME on the ghost table, and expect it to block
726-
renameGhostSessionIdChan := make(chan int64, 1)
727-
ghostTableRenamed := make(chan error, 1)
728-
go func() {
729-
this.applier.RenameGhostTable(renameGhostSessionIdChan, ghostTableRenamed)
730-
}()
731-
renameGhostSessionId := <-renameGhostSessionIdChan
732-
log.Infof("Session renaming ghost table is %+v", renameGhostSessionId)
733-
734-
if err := this.retryOperation(
735-
func() error {
736-
return this.applier.ExpectProcess(renameGhostSessionId, "metadata lock", "rename")
737-
}); err != nil {
738-
return err
739-
}
740-
log.Infof("Found RENAME on ghost table to be blocking, as expected. Will next release lock on original table")
741-
742-
// Step 4
743-
okToUnlockTable <- true
744-
// BAM! original table lock is released, RENAME original->old released,
745-
// RENAME ghost->original is released, queries on original are unblocked.
746-
// (that is, assuming all went well)
747-
if err := <-tableUnlocked; err != nil {
748-
return log.Errore(err)
749-
}
750-
if err := <-ghostTableRenamed; err != nil {
751-
return log.Errore(err)
752-
}
753-
this.migrationContext.RenameTablesEndTime = time.Now()
754-
755-
// ooh nice! We're actually truly and thankfully done
756-
lockAndRenameDuration := this.migrationContext.RenameTablesEndTime.Sub(this.migrationContext.LockTablesStartTime)
757-
log.Infof("Lock & rename duration: %s. During this time, queries on %s were blocked", lockAndRenameDuration, sql.EscapeName(this.migrationContext.OriginalTableName))
758-
return nil
759-
}
760-
761636
// stopWritesAndCompleteMigrationOnReplica will stop replication IO thread, apply
762637
// what DML events are left, and that's it.
763638
// This only applies in --test-on-replica. It leaves replication stopped, with both tables

0 commit comments

Comments
 (0)