Skip to content

Commit 48cb9ab

Browse files
Refine wait_timeout override to be at cut-over only (#1406)
Signed-off-by: Tim Vaillancourt <[email protected]>
1 parent 1e1fbcb commit 48cb9ab

File tree

6 files changed

+37
-18
lines changed

6 files changed

+37
-18
lines changed

doc/command-line-flags.md

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -202,10 +202,6 @@ List of metrics and threshold values; topping the threshold of any will cause th
202202

203203
Typically `gh-ost` is used to migrate tables on a master. If you wish to only perform the migration in full on a replica, connect `gh-ost` to said replica and pass `--migrate-on-replica`. `gh-ost` will briefly connect to the master but otherwise will make no changes on the master. Migration will be fully executed on the replica, while making sure to maintain a small replication lag.
204204

205-
### mysql-wait-timeout
206-
207-
If set to a value greater than zero, causes `gh-ost` to set a provided [MySQL `wait_timeout`](https://dev.mysql.com/doc/refman/8.0/en/server-system-variables.html#sysvar_wait_timeout) for MySQL sessions opened by `gh-ost`, specified in seconds.
208-
209205
### postpone-cut-over-flag-file
210206

211207
Indicate a file name, such that the final [cut-over](cut-over.md) step does not take place as long as the file exists.

go/base/context.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ type MigrationContext struct {
164164
Hostname string
165165
AssumeMasterHostname string
166166
ApplierTimeZone string
167+
ApplierWaitTimeout int64
167168
TableEngine string
168169
RowsEstimate int64
169170
RowsDeltaEstimate int64

go/cmd/gh-ost/main.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ func main() {
5151
flag.StringVar(&migrationContext.AssumeMasterHostname, "assume-master-host", "", "(optional) explicitly tell gh-ost the identity of the master. Format: some.host.com[:port] This is useful in master-master setups where you wish to pick an explicit master, or in a tungsten-replicator where gh-ost is unable to determine the master")
5252
flag.IntVar(&migrationContext.InspectorConnectionConfig.Key.Port, "port", 3306, "MySQL port (preferably a replica, not the master)")
5353
flag.Float64Var(&migrationContext.InspectorConnectionConfig.Timeout, "mysql-timeout", 0.0, "Connect, read and write timeout for MySQL")
54-
flag.Float64Var(&migrationContext.InspectorConnectionConfig.WaitTimeout, "mysql-wait-timeout", 0.0, "wait_timeout for MySQL sessions")
5554
flag.StringVar(&migrationContext.CliUser, "user", "", "MySQL user")
5655
flag.StringVar(&migrationContext.CliPassword, "password", "", "MySQL password")
5756
flag.StringVar(&migrationContext.CliMasterUser, "master-user", "", "MySQL user on master, if different from that on replica. Requires --assume-master-host")

go/logic/applier.go

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ func (this *Applier) InitDBConnections() (err error) {
8989
return err
9090
}
9191
this.migrationContext.ApplierMySQLVersion = version
92-
if err := this.validateAndReadTimeZone(); err != nil {
92+
if err := this.validateAndReadGlobalVariables(); err != nil {
9393
return err
9494
}
9595
if !this.migrationContext.AliyunRDS && !this.migrationContext.GoogleCloudPlatform && !this.migrationContext.AzureMySQL {
@@ -106,10 +106,13 @@ func (this *Applier) InitDBConnections() (err error) {
106106
return nil
107107
}
108108

109-
// validateAndReadTimeZone potentially reads server time-zone
110-
func (this *Applier) validateAndReadTimeZone() error {
111-
query := `select /* gh-ost */ @@global.time_zone`
112-
if err := this.db.QueryRow(query).Scan(&this.migrationContext.ApplierTimeZone); err != nil {
109+
// validateAndReadGlobalVariables potentially reads server global variables, such as the time_zone and wait_timeout.
110+
func (this *Applier) validateAndReadGlobalVariables() error {
111+
query := `select /* gh-ost */ @@global.time_zone, @@global.wait_timeout`
112+
if err := this.db.QueryRow(query).Scan(
113+
&this.migrationContext.ApplierTimeZone,
114+
&this.migrationContext.ApplierWaitTimeout,
115+
); err != nil {
113116
return err
114117
}
115118

@@ -934,6 +937,27 @@ func (this *Applier) CreateAtomicCutOverSentryTable() error {
934937
return nil
935938
}
936939

940+
// InitAtomicCutOverWaitTimeout sets the cut-over session wait_timeout in order to reduce the
941+
// time an unresponsive (but still connected) gh-ost process can hold the cut-over lock.
942+
func (this *Applier) InitAtomicCutOverWaitTimeout(tx *gosql.Tx) error {
943+
cutOverWaitTimeoutSeconds := this.migrationContext.CutOverLockTimeoutSeconds * 3
944+
this.migrationContext.Log.Infof("Setting cut-over idle timeout as %d seconds", cutOverWaitTimeoutSeconds)
945+
query := fmt.Sprintf(`set /* gh-ost */ session wait_timeout:=%d`, cutOverWaitTimeoutSeconds)
946+
_, err := tx.Exec(query)
947+
return err
948+
}
949+
950+
// RevertAtomicCutOverWaitTimeout restores the original wait_timeout for the applier session post-cut-over.
951+
func (this *Applier) RevertAtomicCutOverWaitTimeout() {
952+
this.migrationContext.Log.Infof("Reverting cut-over idle timeout to %d seconds", this.migrationContext.ApplierWaitTimeout)
953+
query := fmt.Sprintf(`set /* gh-ost */ session wait_timeout:=%d`, this.migrationContext.ApplierWaitTimeout)
954+
if _, err := sqlutils.ExecNoPrepare(this.db, query); err != nil {
955+
this.migrationContext.Log.Errorf("Failed to restore applier wait_timeout to %d seconds: %v",
956+
this.migrationContext.ApplierWaitTimeout, err,
957+
)
958+
}
959+
}
960+
937961
// AtomicCutOverMagicLock
938962
func (this *Applier) AtomicCutOverMagicLock(sessionIdChan chan int64, tableLocked chan<- error, okToUnlockTable <-chan bool, tableUnlocked chan<- error) error {
939963
tx, err := this.db.Begin()
@@ -979,6 +1003,12 @@ func (this *Applier) AtomicCutOverMagicLock(sessionIdChan chan int64, tableLocke
9791003
return err
9801004
}
9811005

1006+
if err := this.InitAtomicCutOverWaitTimeout(tx); err != nil {
1007+
tableLocked <- err
1008+
return err
1009+
}
1010+
defer this.RevertAtomicCutOverWaitTimeout()
1011+
9821012
query = fmt.Sprintf(`lock /* gh-ost */ tables %s.%s write, %s.%s write`,
9831013
sql.EscapeName(this.migrationContext.DatabaseName),
9841014
sql.EscapeName(this.migrationContext.OriginalTableName),

go/mysql/connection.go

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ type ConnectionConfig struct {
3131
Timeout float64
3232
TransactionIsolation string
3333
Charset string
34-
WaitTimeout float64
3534
}
3635

3736
func NewConnectionConfig() *ConnectionConfig {
@@ -52,7 +51,6 @@ func (this *ConnectionConfig) DuplicateCredentials(key InstanceKey) *ConnectionC
5251
Timeout: this.Timeout,
5352
TransactionIsolation: this.TransactionIsolation,
5453
Charset: this.Charset,
55-
WaitTimeout: this.WaitTimeout,
5654
}
5755
config.ImpliedKey = &config.Key
5856
return config
@@ -141,9 +139,6 @@ func (this *ConnectionConfig) GetDBUri(databaseName string) string {
141139
fmt.Sprintf("readTimeout=%fs", this.Timeout),
142140
fmt.Sprintf("writeTimeout=%fs", this.Timeout),
143141
}
144-
if this.WaitTimeout > 0 {
145-
connectionParams = append(connectionParams, fmt.Sprintf("wait_timeout=%fs", this.WaitTimeout))
146-
}
147142

148143
return fmt.Sprintf("%s:%s@tcp(%s:%d)/%s?%s", this.User, this.Password, hostname, this.Key.Port, databaseName, strings.Join(connectionParams, "&"))
149144
}

go/mysql/connection_test.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ func TestGetDBUri(t *testing.T) {
8282
c.User = "gromit"
8383
c.Password = "penguin"
8484
c.Timeout = 1.2345
85-
c.WaitTimeout = 0 // should be ignored
8685
c.TransactionIsolation = transactionIsolation
8786
c.Charset = "utf8mb4,utf8,latin1"
8887

@@ -96,11 +95,10 @@ func TestGetDBUriWithTLSSetup(t *testing.T) {
9695
c.User = "gromit"
9796
c.Password = "penguin"
9897
c.Timeout = 1.2345
99-
c.WaitTimeout = 60
10098
c.tlsConfig = &tls.Config{}
10199
c.TransactionIsolation = transactionIsolation
102100
c.Charset = "utf8mb4_general_ci,utf8_general_ci,latin1"
103101

104102
uri := c.GetDBUri("test")
105-
test.S(t).ExpectEquals(uri, `gromit:penguin@tcp(myhost:3306)/test?autocommit=true&interpolateParams=true&charset=utf8mb4_general_ci,utf8_general_ci,latin1&tls=ghost&transaction_isolation="REPEATABLE-READ"&timeout=1.234500s&readTimeout=1.234500s&writeTimeout=1.234500s&wait_timeout=60.000000s`)
103+
test.S(t).ExpectEquals(uri, `gromit:penguin@tcp(myhost:3306)/test?autocommit=true&interpolateParams=true&charset=utf8mb4_general_ci,utf8_general_ci,latin1&tls=ghost&transaction_isolation="REPEATABLE-READ"&timeout=1.234500s&readTimeout=1.234500s&writeTimeout=1.234500s`)
106104
}

0 commit comments

Comments
 (0)