Skip to content

Commit eac6a72

Browse files
author
Shlomi Noach
authored
Merge pull request #231 from github/named-cut-over
Named cut over
2 parents 5215dd5 + 7517d48 commit eac6a72

File tree

6 files changed

+192
-165
lines changed

6 files changed

+192
-165
lines changed

build.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#
33
#
44

5-
RELEASE_VERSION="1.0.18"
5+
RELEASE_VERSION="1.0.20"
66

77
function build {
88
osname=$1

go/base/context.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ type MigrationContext struct {
9292
criticalLoad LoadMap
9393
PostponeCutOverFlagFile string
9494
CutOverLockTimeoutSeconds int64
95+
ForceNamedCutOverCommand bool
9596
PanicFlagFile string
9697
HooksPath string
9798
HooksHintMessage string
@@ -140,6 +141,8 @@ type MigrationContext struct {
140141
CountingRowsFlag int64
141142
AllEventsUpToLockProcessedInjectedFlag int64
142143
CleanupImminentFlag int64
144+
UserCommandedUnpostponeFlag int64
145+
PanicAbort chan error
143146

144147
OriginalTableColumns *sql.ColumnList
145148
OriginalTableUniqueKeys [](*sql.UniqueKey)
@@ -192,6 +195,7 @@ func newMigrationContext() *MigrationContext {
192195
configMutex: &sync.Mutex{},
193196
pointOfInterestTimeMutex: &sync.Mutex{},
194197
ColumnRenameMap: make(map[string]string),
198+
PanicAbort: make(chan error),
195199
}
196200
}
197201

go/cmd/gh-ost/main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ func main() {
7171
flag.BoolVar(&migrationContext.InitiallyDropOldTable, "initially-drop-old-table", false, "Drop a possibly existing OLD table (remains from a previous run?) before beginning operation. Default is to panic and abort if such table exists")
7272
flag.BoolVar(&migrationContext.InitiallyDropGhostTable, "initially-drop-ghost-table", false, "Drop a possibly existing Ghost table (remains from a previous run?) before beginning operation. Default is to panic and abort if such table exists")
7373
cutOver := flag.String("cut-over", "atomic", "choose cut-over type (default|atomic, two-step)")
74+
flag.BoolVar(&migrationContext.ForceNamedCutOverCommand, "force-named-cut-over", false, "When true, the 'unpostpone|cut-over' interactive command must name the migrated table")
7475

7576
flag.BoolVar(&migrationContext.SwitchToRowBinlogFormat, "switch-to-rbr", false, "let this tool automatically switch binary log format to 'ROW' on the replica, if needed. The format will NOT be switched back. I'm too scared to do that, and wish to protect you if you happen to execute another migration while this one is running")
7677
flag.BoolVar(&migrationContext.AssumeRBR, "assume-rbr", false, "set to 'true' when you know for certain your server uses 'ROW' binlog_format. gh-ost is unable to tell, event after reading binlog_format, whether the replication process does indeed use 'ROW', and restarts replication to be certain RBR setting is applied. Such operation requires SUPER privileges which you might not have. Setting this flag avoids restarting replication and you can proceed to use gh-ost without SUPER privileges")

go/logic/migrator.go

Lines changed: 14 additions & 154 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,11 @@
66
package logic
77

88
import (
9-
"bufio"
109
"fmt"
1110
"io"
1211
"math"
1312
"os"
1413
"os/signal"
15-
"strconv"
16-
"strings"
1714
"sync/atomic"
1815
"syscall"
1916
"time"
@@ -42,7 +39,8 @@ const (
4239
type PrintStatusRule int
4340

4441
const (
45-
HeuristicPrintStatusRule PrintStatusRule = iota
42+
NoPrintStatusRule PrintStatusRule = iota
43+
HeuristicPrintStatusRule = iota
4644
ForcePrintStatusRule = iota
4745
ForcePrintStatusOnlyRule = iota
4846
ForcePrintStatusAndHintRule = iota
@@ -63,11 +61,9 @@ type Migrator struct {
6361
tablesInPlace chan bool
6462
rowCopyComplete chan bool
6563
allEventsUpToLockProcessed chan bool
66-
panicAbort chan error
6764

6865
rowCopyCompleteFlag int64
6966
inCutOverCriticalActionFlag int64
70-
userCommandedUnpostponeFlag int64
7167
// copyRowsQueue should not be buffered; if buffered some non-damaging but
7268
// excessive work happens at the end of the iteration as new copy-jobs arrive befroe realizing the copy is complete
7369
copyRowsQueue chan tableWriteFunc
@@ -84,7 +80,6 @@ func NewMigrator() *Migrator {
8480
firstThrottlingCollected: make(chan bool, 1),
8581
rowCopyComplete: make(chan bool),
8682
allEventsUpToLockProcessed: make(chan bool),
87-
panicAbort: make(chan error),
8883

8984
copyRowsQueue: make(chan tableWriteFunc),
9085
applyEventsQueue: make(chan tableWriteFunc, applyEventsQueueBuffer),
@@ -148,7 +143,7 @@ func (this *Migrator) retryOperation(operation func() error, notFatalHint ...boo
148143
// there's an error. Let's try again.
149144
}
150145
if len(notFatalHint) == 0 {
151-
this.panicAbort <- err
146+
this.migrationContext.PanicAbort <- err
152147
}
153148
return err
154149
}
@@ -217,7 +212,7 @@ func (this *Migrator) onChangelogStateEvent(dmlEvent *binlog.BinlogDMLEvent) (er
217212

218213
// listenOnPanicAbort aborts on abort request
219214
func (this *Migrator) listenOnPanicAbort() {
220-
err := <-this.panicAbort
215+
err := <-this.migrationContext.PanicAbort
221216
log.Fatale(err)
222217
}
223218

@@ -385,7 +380,7 @@ func (this *Migrator) cutOver() (err error) {
385380
if this.migrationContext.PostponeCutOverFlagFile == "" {
386381
return false, nil
387382
}
388-
if atomic.LoadInt64(&this.userCommandedUnpostponeFlag) > 0 {
383+
if atomic.LoadInt64(&this.migrationContext.UserCommandedUnpostponeFlag) > 0 {
389384
return false, nil
390385
}
391386
if base.FileExists(this.migrationContext.PostponeCutOverFlagFile) {
@@ -584,150 +579,12 @@ func (this *Migrator) atomicCutOver() (err error) {
584579
return nil
585580
}
586581

587-
// onServerCommand responds to a user's interactive command
588-
func (this *Migrator) onServerCommand(command string, writer *bufio.Writer) (err error) {
589-
defer writer.Flush()
590-
591-
tokens := strings.SplitN(command, "=", 2)
592-
command = strings.TrimSpace(tokens[0])
593-
arg := ""
594-
if len(tokens) > 1 {
595-
arg = strings.TrimSpace(tokens[1])
596-
}
597-
598-
throttleHint := "# Note: you may only throttle for as long as your binary logs are not purged\n"
599-
600-
if err := this.hooksExecutor.onInteractiveCommand(command); err != nil {
601-
return err
602-
}
603-
604-
switch command {
605-
case "help":
606-
{
607-
fmt.Fprintln(writer, `available commands:
608-
status # Print a detailed status message
609-
sup # Print a short status message
610-
chunk-size=<newsize> # Set a new chunk-size
611-
nice-ratio=<ratio> # Set a new nice-ratio, immediate sleep after each row-copy operation, float (examples: 0 is agrressive, 0.7 adds 70% runtime, 1.0 doubles runtime, 2.0 triples runtime, ...)
612-
critical-load=<load> # Set a new set of max-load thresholds
613-
max-lag-millis=<max-lag> # Set a new replication lag threshold
614-
replication-lag-query=<query> # Set a new query that determines replication lag (no quotes)
615-
max-load=<load> # Set a new set of max-load thresholds
616-
throttle-query=<query> # Set a new throttle-query (no quotes)
617-
throttle-control-replicas=<replicas> # Set a new comma delimited list of throttle control replicas
618-
throttle # Force throttling
619-
no-throttle # End forced throttling (other throttling may still apply)
620-
unpostpone # Bail out a cut-over postpone; proceed to cut-over
621-
panic # panic and quit without cleanup
622-
help # This message
623-
`)
624-
}
625-
case "sup":
626-
this.printStatus(ForcePrintStatusOnlyRule, writer)
627-
case "info", "status":
628-
this.printStatus(ForcePrintStatusAndHintRule, writer)
629-
case "chunk-size":
630-
{
631-
if chunkSize, err := strconv.Atoi(arg); err != nil {
632-
fmt.Fprintf(writer, "%s\n", err.Error())
633-
return log.Errore(err)
634-
} else {
635-
this.migrationContext.SetChunkSize(int64(chunkSize))
636-
this.printStatus(ForcePrintStatusAndHintRule, writer)
637-
}
638-
}
639-
case "max-lag-millis":
640-
{
641-
if maxLagMillis, err := strconv.Atoi(arg); err != nil {
642-
fmt.Fprintf(writer, "%s\n", err.Error())
643-
return log.Errore(err)
644-
} else {
645-
this.migrationContext.SetMaxLagMillisecondsThrottleThreshold(int64(maxLagMillis))
646-
this.printStatus(ForcePrintStatusAndHintRule, writer)
647-
}
648-
}
649-
case "replication-lag-query":
650-
{
651-
this.migrationContext.SetReplicationLagQuery(arg)
652-
this.printStatus(ForcePrintStatusAndHintRule, writer)
653-
}
654-
case "nice-ratio":
655-
{
656-
if niceRatio, err := strconv.ParseFloat(arg, 64); err != nil {
657-
fmt.Fprintf(writer, "%s\n", err.Error())
658-
return log.Errore(err)
659-
} else {
660-
this.migrationContext.SetNiceRatio(niceRatio)
661-
this.printStatus(ForcePrintStatusAndHintRule, writer)
662-
}
663-
}
664-
case "max-load":
665-
{
666-
if err := this.migrationContext.ReadMaxLoad(arg); err != nil {
667-
fmt.Fprintf(writer, "%s\n", err.Error())
668-
return log.Errore(err)
669-
}
670-
this.printStatus(ForcePrintStatusAndHintRule, writer)
671-
}
672-
case "critical-load":
673-
{
674-
if err := this.migrationContext.ReadCriticalLoad(arg); err != nil {
675-
fmt.Fprintf(writer, "%s\n", err.Error())
676-
return log.Errore(err)
677-
}
678-
this.printStatus(ForcePrintStatusAndHintRule, writer)
679-
}
680-
case "throttle-query":
681-
{
682-
this.migrationContext.SetThrottleQuery(arg)
683-
fmt.Fprintf(writer, throttleHint)
684-
this.printStatus(ForcePrintStatusAndHintRule, writer)
685-
}
686-
case "throttle-control-replicas":
687-
{
688-
if err := this.migrationContext.ReadThrottleControlReplicaKeys(arg); err != nil {
689-
fmt.Fprintf(writer, "%s\n", err.Error())
690-
return log.Errore(err)
691-
}
692-
fmt.Fprintf(writer, "%s\n", this.migrationContext.GetThrottleControlReplicaKeys().ToCommaDelimitedList())
693-
this.printStatus(ForcePrintStatusAndHintRule, writer)
694-
}
695-
case "throttle", "pause", "suspend":
696-
{
697-
atomic.StoreInt64(&this.migrationContext.ThrottleCommandedByUser, 1)
698-
fmt.Fprintf(writer, throttleHint)
699-
this.printStatus(ForcePrintStatusAndHintRule, writer)
700-
}
701-
case "no-throttle", "unthrottle", "resume", "continue":
702-
{
703-
atomic.StoreInt64(&this.migrationContext.ThrottleCommandedByUser, 0)
704-
}
705-
case "unpostpone", "no-postpone", "cut-over":
706-
{
707-
if atomic.LoadInt64(&this.migrationContext.IsPostponingCutOver) > 0 {
708-
atomic.StoreInt64(&this.userCommandedUnpostponeFlag, 1)
709-
fmt.Fprintf(writer, "Unpostponed\n")
710-
} else {
711-
fmt.Fprintf(writer, "You may only invoke this when gh-ost is actively postponing migration. At this time it is not.\n")
712-
}
713-
}
714-
case "panic":
715-
{
716-
err := fmt.Errorf("User commanded 'panic'. I will now panic, without cleanup. PANIC!")
717-
fmt.Fprintf(writer, "%s\n", err.Error())
718-
this.panicAbort <- err
719-
}
720-
default:
721-
err = fmt.Errorf("Unknown command: %s", command)
722-
fmt.Fprintf(writer, "%s\n", err.Error())
723-
return err
724-
}
725-
return nil
726-
}
727-
728582
// initiateServer begins listening on unix socket/tcp for incoming interactive commands
729583
func (this *Migrator) initiateServer() (err error) {
730-
this.server = NewServer(this.onServerCommand)
584+
var f printStatusFunc = func(rule PrintStatusRule, writer io.Writer) {
585+
this.printStatus(rule, writer)
586+
}
587+
this.server = NewServer(this.hooksExecutor, f)
731588
if err := this.server.BindSocketFile(); err != nil {
732589
return err
733590
}
@@ -887,6 +744,9 @@ func (this *Migrator) printMigrationStatusHint(writers ...io.Writer) {
887744
// By default the status is written to standard output, but other writers can
888745
// be used as well.
889746
func (this *Migrator) printStatus(rule PrintStatusRule, writers ...io.Writer) {
747+
if rule == NoPrintStatusRule {
748+
return
749+
}
890750
writers = append(writers, os.Stdout)
891751

892752
elapsedTime := this.migrationContext.ElapsedTime()
@@ -1007,7 +867,7 @@ func (this *Migrator) initiateStreaming() error {
1007867
log.Debugf("Beginning streaming")
1008868
err := this.eventsStreamer.StreamEvents(this.canStopStreaming)
1009869
if err != nil {
1010-
this.panicAbort <- err
870+
this.migrationContext.PanicAbort <- err
1011871
}
1012872
log.Debugf("Done streaming")
1013873
}()
@@ -1035,7 +895,7 @@ func (this *Migrator) addDMLEventsListener() error {
1035895

1036896
// initiateThrottler kicks in the throttling collection and the throttling checks.
1037897
func (this *Migrator) initiateThrottler() error {
1038-
this.throttler = NewThrottler(this.applier, this.inspector, this.panicAbort)
898+
this.throttler = NewThrottler(this.applier, this.inspector)
1039899

1040900
go this.throttler.initiateThrottlerCollection(this.firstThrottlingCollected)
1041901
log.Infof("Waiting for first throttle metrics to be collected")

0 commit comments

Comments
 (0)