Skip to content

Commit 15f4ddf

Browse files
author
Shlomi Noach
committed
support for --critical-load-interval-millis
this optional flag gives --critical-load a second chance. When configured to positive value, meeting with critical-load spawns a timer. When this timer expires a second check for critical-load is made; if still met, gh-ost bails out. By default the interval is zero, in which case gh-ost bails out immediately on meeting critical load.
1 parent c749edc commit 15f4ddf

File tree

3 files changed

+33
-9
lines changed

3 files changed

+33
-9
lines changed

go/base/context.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ type MigrationContext struct {
9090
ThrottleCommandedByUser int64
9191
maxLoad LoadMap
9292
criticalLoad LoadMap
93+
CriticalLoadIntervalMilliseconds int64
9394
PostponeCutOverFlagFile string
9495
CutOverLockTimeoutSeconds int64
9596
ForceNamedCutOverCommand bool

go/cmd/gh-ost/main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ func main() {
9999

100100
maxLoad := flag.String("max-load", "", "Comma delimited status-name=threshold. e.g: 'Threads_running=100,Threads_connected=500'. When status exceeds threshold, app throttles writes")
101101
criticalLoad := flag.String("critical-load", "", "Comma delimited status-name=threshold, same format as `--max-load`. When status exceeds threshold, app panics and quits")
102+
flag.Int64Var(&migrationContext.CriticalLoadIntervalMilliseconds, "critical-load-interval-millis", 0, "When 0, migration bails out upon meeting critical-load immediately. When non-zero, a second check is done after given interval, and migration only bails out if 2nd check still meets critical load")
102103
quiet := flag.Bool("quiet", false, "quiet")
103104
verbose := flag.Bool("verbose", false, "verbose")
104105
debug := flag.Bool("debug", false, "debug mode (very verbose)")

go/logic/throttler.go

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,20 @@ func (this *Throttler) collectControlReplicasLag() {
130130
}
131131
}
132132

133+
func (this *Throttler) criticalLoadIsMet() (met bool, variableName string, value int64, threshold int64, err error) {
134+
criticalLoad := this.migrationContext.GetCriticalLoad()
135+
for variableName, threshold = range criticalLoad {
136+
value, err = this.applier.ShowStatusVariable(variableName)
137+
if err != nil {
138+
return false, variableName, value, threshold, err
139+
}
140+
if value >= threshold {
141+
return true, variableName, value, threshold, nil
142+
}
143+
}
144+
return false, variableName, value, threshold, nil
145+
}
146+
133147
// collectGeneralThrottleMetrics reads the once-per-sec metrics, and stores them onto this.migrationContext
134148
func (this *Throttler) collectGeneralThrottleMetrics() error {
135149

@@ -144,15 +158,23 @@ func (this *Throttler) collectGeneralThrottleMetrics() error {
144158
this.migrationContext.PanicAbort <- fmt.Errorf("Found panic-file %s. Aborting without cleanup", this.migrationContext.PanicFlagFile)
145159
}
146160
}
147-
criticalLoad := this.migrationContext.GetCriticalLoad()
148-
for variableName, threshold := range criticalLoad {
149-
value, err := this.applier.ShowStatusVariable(variableName)
150-
if err != nil {
151-
return setThrottle(true, fmt.Sprintf("%s %s", variableName, err))
152-
}
153-
if value >= threshold {
154-
this.migrationContext.PanicAbort <- fmt.Errorf("critical-load met: %s=%d, >=%d", variableName, value, threshold)
155-
}
161+
162+
criticalLoadMet, variableName, value, threshold, err := this.criticalLoadIsMet()
163+
if err != nil {
164+
return setThrottle(true, fmt.Sprintf("%s %s", variableName, err))
165+
}
166+
if criticalLoadMet && this.migrationContext.CriticalLoadIntervalMilliseconds == 0 {
167+
this.migrationContext.PanicAbort <- fmt.Errorf("critical-load met: %s=%d, >=%d", variableName, value, threshold)
168+
}
169+
if criticalLoadMet && this.migrationContext.CriticalLoadIntervalMilliseconds > 0 {
170+
log.Errorf("critical-load met once: %s=%d, >=%d. Will check again in %d millis", variableName, value, threshold, this.migrationContext.CriticalLoadIntervalMilliseconds)
171+
go func() {
172+
timer := time.NewTimer(time.Millisecond * time.Duration(this.migrationContext.CriticalLoadIntervalMilliseconds))
173+
<-timer.C
174+
if criticalLoadMetAgain, variableName, value, threshold, _ := this.criticalLoadIsMet(); criticalLoadMetAgain {
175+
this.migrationContext.PanicAbort <- fmt.Errorf("critical-load met again after %d millis: %s=%d, >=%d", this.migrationContext.CriticalLoadIntervalMilliseconds, variableName, value, threshold)
176+
}
177+
}()
156178
}
157179

158180
// Back to throttle considerations

0 commit comments

Comments
 (0)