Skip to content

Commit 67979fa

Browse files
author
Shlomi Noach
authored
Merge pull request #92 from github/dynamic-max-lag
max-lag-millis is dynamicly controllable
2 parents 6824447 + 8e46b4c commit 67979fa

File tree

5 files changed

+27
-6
lines changed

5 files changed

+27
-6
lines changed

build.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/bin/bash
22
#
33
#
4-
RELEASE_VERSION="1.0.1"
4+
RELEASE_VERSION="1.0.2"
55

66
buildpath=/tmp/gh-ost
77
target=gh-ost

doc/interactive-commands.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Both interfaces may serve at the same time. Both respond to simple text command,
1818
- `status`: returns a status summary of migration progress and configuration
1919
replication lag on to determine throttling
2020
- `chunk-size=<newsize>`: modify the `chunk-size`; applies on next running copy-iteration
21+
- `max-lag-millis=<max-lag>`: modify the maximum replication lag threshold (milliseconds, minimum value is `1000`, i.e. 1 second)
2122
- `max-load=<max-load-thresholds>`: modify the `max-load` config; applies on next running copy-iteration
2223
The `max-load` format must be: `some_status=<numeric-threshold>[,some_status=<numeric-threshold>...]`. For example: `Threads_running=50,threads_connected=1000`, and you would then write/echo `max-load=Threads_running=50,threads_connected=1000` to the socket.
2324
- `critical-load=<load>`: change critical load setting (exceeding given thresholds causes panic and abort)

go/base/context.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ func newMigrationContext() *MigrationContext {
148148
ChunkSize: 1000,
149149
InspectorConnectionConfig: mysql.NewConnectionConfig(),
150150
ApplierConnectionConfig: mysql.NewConnectionConfig(),
151-
MaxLagMillisecondsThrottleThreshold: 1000,
151+
MaxLagMillisecondsThrottleThreshold: 1500,
152152
CutOverLockTimeoutSeconds: 3,
153153
maxLoad: NewLoadMap(),
154154
criticalLoad: NewLoadMap(),
@@ -298,6 +298,13 @@ func (this *MigrationContext) TimeSincePointOfInterest() time.Duration {
298298
return time.Now().Sub(this.pointOfInterestTime)
299299
}
300300

301+
func (this *MigrationContext) SetMaxLagMillisecondsThrottleThreshold(maxLagMillisecondsThrottleThreshold int64) {
302+
if maxLagMillisecondsThrottleThreshold < 1000 {
303+
maxLagMillisecondsThrottleThreshold = 1000
304+
}
305+
atomic.StoreInt64(&this.MaxLagMillisecondsThrottleThreshold, maxLagMillisecondsThrottleThreshold)
306+
}
307+
301308
func (this *MigrationContext) SetChunkSize(chunkSize int64) {
302309
if chunkSize < 100 {
303310
chunkSize = 100

go/cmd/gh-ost/main.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ func main() {
7474
cutOverLockTimeoutSeconds := flag.Int64("cut-over-lock-timeout-seconds", 3, "Max number of seconds to hold locks on tables while attempting to cut-over (retry attempted when lock exceeds timeout)")
7575
flag.Int64Var(&migrationContext.NiceRatio, "nice-ratio", 0, "force being 'nice', imply sleep time per chunk time. Example values: 0 is aggressive. 3: for every ms spend in a rowcopy chunk, spend 3ms sleeping immediately after")
7676

77-
flag.Int64Var(&migrationContext.MaxLagMillisecondsThrottleThreshold, "max-lag-millis", 1500, "replication lag at which to throttle operation")
77+
maxLagMillis := flag.Int64("max-lag-millis", 1500, "replication lag at which to throttle operation")
7878
flag.StringVar(&migrationContext.ReplictionLagQuery, "replication-lag-query", "", "Query that detects replication lag in seconds. Result can be a floating point (by default gh-ost issues SHOW SLAVE STATUS and reads Seconds_behind_master). If you're using pt-heartbeat, query would be something like: SELECT ROUND(UNIX_TIMESTAMP() - MAX(UNIX_TIMESTAMP(ts))) AS delay FROM my_schema.heartbeat")
7979
throttleControlReplicas := flag.String("throttle-control-replicas", "", "List of replicas on which to check for lag; comma delimited. Example: myhost1.com:3306,myhost2.com,myhost3.com:3307")
8080
flag.StringVar(&migrationContext.ThrottleQuery, "throttle-query", "", "when given, issued (every second) to check if operation should throttle. Expecting to return zero for no-throttle, >0 for throttle. Query is issued on the migrated server. Make sure this query is lightweight")
@@ -171,6 +171,7 @@ func main() {
171171
migrationContext.ServeSocketFile = fmt.Sprintf("/tmp/gh-ost.%s.%s.sock", migrationContext.DatabaseName, migrationContext.OriginalTableName)
172172
}
173173
migrationContext.SetChunkSize(*chunkSize)
174+
migrationContext.SetMaxLagMillisecondsThrottleThreshold(*maxLagMillis)
174175
migrationContext.SetDefaultNumRetries(*defaultRetries)
175176
migrationContext.ApplyCredentials()
176177
if err := migrationContext.SetCutOverLockTimeoutSeconds(*cutOverLockTimeoutSeconds); err != nil {

go/logic/migrator.go

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -148,16 +148,17 @@ func (this *Migrator) shouldThrottle() (result bool, reason string) {
148148
}
149149
}
150150
// Replication lag throttle
151+
maxLagMillisecondsThrottleThreshold := atomic.LoadInt64(&this.migrationContext.MaxLagMillisecondsThrottleThreshold)
151152
lag := atomic.LoadInt64(&this.migrationContext.CurrentLag)
152-
if time.Duration(lag) > time.Duration(this.migrationContext.MaxLagMillisecondsThrottleThreshold)*time.Millisecond {
153+
if time.Duration(lag) > time.Duration(maxLagMillisecondsThrottleThreshold)*time.Millisecond {
153154
return true, fmt.Sprintf("lag=%fs", time.Duration(lag).Seconds())
154155
}
155156
if (this.migrationContext.TestOnReplica || this.migrationContext.MigrateOnReplica) && (atomic.LoadInt64(&this.allEventsUpToLockProcessedInjectedFlag) == 0) {
156157
replicationLag, err := mysql.GetMaxReplicationLag(this.migrationContext.InspectorConnectionConfig, this.migrationContext.ThrottleControlReplicaKeys, this.migrationContext.ReplictionLagQuery)
157158
if err != nil {
158159
return true, err.Error()
159160
}
160-
if replicationLag > time.Duration(this.migrationContext.MaxLagMillisecondsThrottleThreshold)*time.Millisecond {
161+
if replicationLag > time.Duration(maxLagMillisecondsThrottleThreshold)*time.Millisecond {
161162
return true, fmt.Sprintf("replica-lag=%fs", replicationLag.Seconds())
162163
}
163164
}
@@ -792,6 +793,7 @@ status # Print a status message
792793
chunk-size=<newsize> # Set a new chunk-size
793794
nice-ratio=<ratio> # Set a new nice-ratio, integer (0 is agrressive)
794795
critical-load=<load> # Set a new set of max-load thresholds
796+
max-lag-millis=<max-lag> # Set a new replication lag threshold
795797
max-load=<load> # Set a new set of max-load thresholds
796798
throttle-query=<query> # Set a new throttle-query
797799
throttle-control-replicas=<replicas> #
@@ -814,6 +816,16 @@ help # This message
814816
this.printStatus(ForcePrintStatusAndHint, writer)
815817
}
816818
}
819+
case "max-lag-millis":
820+
{
821+
if maxLagMillis, err := strconv.Atoi(arg); err != nil {
822+
fmt.Fprintf(writer, "%s\n", err.Error())
823+
return log.Errore(err)
824+
} else {
825+
this.migrationContext.SetMaxLagMillisecondsThrottleThreshold(int64(maxLagMillis))
826+
this.printStatus(ForcePrintStatusAndHint, writer)
827+
}
828+
}
817829
case "nice-ratio":
818830
{
819831
if niceRatio, err := strconv.Atoi(arg); err != nil {
@@ -974,7 +986,7 @@ func (this *Migrator) printMigrationStatusHint(writers ...io.Writer) {
974986
))
975987
maxLoad := this.migrationContext.GetMaxLoad()
976988
criticalLoad := this.migrationContext.GetCriticalLoad()
977-
fmt.Fprintln(w, fmt.Sprintf("# chunk-size: %+v; max lag: %+vms; max-load: %s; critical-load: %s; nice-ratio: %d",
989+
fmt.Fprintln(w, fmt.Sprintf("# chunk-size: %+v; max-lag-millis: %+vms; max-load: %s; critical-load: %s; nice-ratio: %d",
978990
atomic.LoadInt64(&this.migrationContext.ChunkSize),
979991
atomic.LoadInt64(&this.migrationContext.MaxLagMillisecondsThrottleThreshold),
980992
maxLoad.String(),

0 commit comments

Comments
 (0)