Skip to content

Commit 3e72f1b

Browse files
ajm188timvaillancourtdm-2
committed
Cancel any row count queries before attempting to cut over (#846)
* Cancel any row count queries before attempting to cut over Closes #830. Switches from using `QueryRow` to `QueryRowContext`, and stores a context.CancelFunc in the migration context, which is called to halt any running row count query before beginning the cut over. * Make it threadsafe * Kill the count query on the database side as well * Explicitly grab a connection to run the count, store its connection id * When the query context is canceled, run a `KILL QUERY ?` on that connection id * Rewrite these to use the threadsafe functions, stop exporting the cancel func * Update logger * Update logger Co-authored-by: Tim Vaillancourt <[email protected]> Co-authored-by: Tim Vaillancourt <[email protected]> Co-authored-by: dm-2 <[email protected]>
1 parent 614b379 commit 3e72f1b

File tree

3 files changed

+80
-7
lines changed

3 files changed

+80
-7
lines changed

go/base/context.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ type MigrationContext struct {
8383
AlterStatement string
8484
AlterStatementOptions string // anything following the 'ALTER TABLE [schema.]table' from AlterStatement
8585

86+
countMutex sync.Mutex
87+
countTableRowsCancelFunc func()
8688
CountTableRows bool
8789
ConcurrentCountTableRows bool
8890
AllowedRunningOnMaster bool
@@ -429,6 +431,36 @@ func (this *MigrationContext) IsTransactionalTable() bool {
429431
return false
430432
}
431433

434+
// SetCountTableRowsCancelFunc sets the cancel function for the CountTableRows query context
435+
func (this *MigrationContext) SetCountTableRowsCancelFunc(f func()) {
436+
this.countMutex.Lock()
437+
defer this.countMutex.Unlock()
438+
439+
this.countTableRowsCancelFunc = f
440+
}
441+
442+
// IsCountingTableRows returns true if the migration has a table count query running
443+
func (this *MigrationContext) IsCountingTableRows() bool {
444+
this.countMutex.Lock()
445+
defer this.countMutex.Unlock()
446+
447+
return this.countTableRowsCancelFunc != nil
448+
}
449+
450+
// CancelTableRowsCount cancels the CountTableRows query context. It is safe to
451+
// call function even when IsCountingTableRows is false.
452+
func (this *MigrationContext) CancelTableRowsCount() {
453+
this.countMutex.Lock()
454+
defer this.countMutex.Unlock()
455+
456+
if this.countTableRowsCancelFunc == nil {
457+
return
458+
}
459+
460+
this.countTableRowsCancelFunc()
461+
this.countTableRowsCancelFunc = nil
462+
}
463+
432464
// ElapsedTime returns time since very beginning of the process
433465
func (this *MigrationContext) ElapsedTime() time.Duration {
434466
return time.Since(this.StartTime)

go/logic/inspect.go

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
package logic
77

88
import (
9+
"context"
910
gosql "database/sql"
1011
"fmt"
1112
"reflect"
@@ -533,18 +534,48 @@ func (this *Inspector) estimateTableRowsViaExplain() error {
533534
return nil
534535
}
535536

537+
// Kill kills a query for connectionID.
538+
// - @amason: this should go somewhere _other_ than `logic`, but I couldn't decide
539+
// between `base`, `sql`, or `mysql`.
540+
func Kill(db *gosql.DB, connectionID string) error {
541+
_, err := db.Exec(`KILL QUERY %s`, connectionID)
542+
return err
543+
}
544+
536545
// CountTableRows counts exact number of rows on the original table
537-
func (this *Inspector) CountTableRows() error {
546+
func (this *Inspector) CountTableRows(ctx context.Context) error {
538547
atomic.StoreInt64(&this.migrationContext.CountingRowsFlag, 1)
539548
defer atomic.StoreInt64(&this.migrationContext.CountingRowsFlag, 0)
540549

541550
this.migrationContext.Log.Infof("As instructed, I'm issuing a SELECT COUNT(*) on the table. This may take a while")
542551

552+
conn, err := this.db.Conn(ctx)
553+
if err != nil {
554+
return err
555+
}
556+
defer conn.Close()
557+
558+
var connectionID string
559+
if err := conn.QueryRowContext(ctx, `SELECT /* gh-ost */ CONNECTION_ID()`).Scan(&connectionID); err != nil {
560+
return err
561+
}
562+
543563
query := fmt.Sprintf(`select /* gh-ost */ count(*) as count_rows from %s.%s`, sql.EscapeName(this.migrationContext.DatabaseName), sql.EscapeName(this.migrationContext.OriginalTableName))
544564
var rowsEstimate int64
545-
if err := this.db.QueryRow(query).Scan(&rowsEstimate); err != nil {
546-
return err
565+
if err := conn.QueryRowContext(ctx, query).Scan(&rowsEstimate); err != nil {
566+
switch err {
567+
case context.Canceled, context.DeadlineExceeded:
568+
this.migrationContext.Log.Infof("exact row count cancelled (%s), likely because I'm about to cut over. I'm going to kill that query.", ctx.Err())
569+
return Kill(this.db, connectionID)
570+
default:
571+
return err
572+
}
547573
}
574+
575+
// row count query finished. nil out the cancel func, so the main migration thread
576+
// doesn't bother calling it after row copy is done.
577+
this.migrationContext.SetCountTableRowsCancelFunc(nil)
578+
548579
atomic.StoreInt64(&this.migrationContext.RowsEstimate, rowsEstimate)
549580
this.migrationContext.UsedRowsEstimateMethod = base.CountRowsEstimate
550581

go/logic/migrator.go

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
package logic
77

88
import (
9+
"context"
910
"fmt"
1011
"io"
1112
"math"
@@ -295,8 +296,8 @@ func (this *Migrator) countTableRows() (err error) {
295296
return nil
296297
}
297298

298-
countRowsFunc := func() error {
299-
if err := this.inspector.CountTableRows(); err != nil {
299+
countRowsFunc := func(ctx context.Context) error {
300+
if err := this.inspector.CountTableRows(ctx); err != nil {
300301
return err
301302
}
302303
if err := this.hooksExecutor.onRowCountComplete(); err != nil {
@@ -306,12 +307,17 @@ func (this *Migrator) countTableRows() (err error) {
306307
}
307308

308309
if this.migrationContext.ConcurrentCountTableRows {
310+
// store a cancel func so we can stop this query before a cut over
311+
rowCountContext, rowCountCancel := context.WithCancel(context.Background())
312+
this.migrationContext.SetCountTableRowsCancelFunc(rowCountCancel)
313+
309314
this.migrationContext.Log.Infof("As instructed, counting rows in the background; meanwhile I will use an estimated count, and will update it later on")
310-
go countRowsFunc()
315+
go countRowsFunc(rowCountContext)
316+
311317
// and we ignore errors, because this turns to be a background job
312318
return nil
313319
}
314-
return countRowsFunc()
320+
return countRowsFunc(context.Background())
315321
}
316322

317323
func (this *Migrator) createFlagFiles() (err error) {
@@ -415,6 +421,10 @@ func (this *Migrator) Migrate() (err error) {
415421
}
416422
this.printStatus(ForcePrintStatusRule)
417423

424+
if this.migrationContext.IsCountingTableRows() {
425+
this.migrationContext.Log.Info("stopping query for exact row count, because that can accidentally lock out the cut over")
426+
this.migrationContext.CancelTableRowsCount()
427+
}
418428
if err := this.hooksExecutor.onBeforeCutOver(); err != nil {
419429
return err
420430
}

0 commit comments

Comments
 (0)