Skip to content

Commit 0abb2ef

Browse files
author
Guillaume Lefranc
committed
Post-switchover hook must happen before slave switch
1 parent 367cc18 commit 0abb2ef

File tree

1 file changed

+48
-42
lines changed

1 file changed

+48
-42
lines changed

mariadb-repmgr/repmgr.go

Lines changed: 48 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -17,27 +17,27 @@ import (
1717
"time"
1818
)
1919

20-
const repmgrVersion string = "0.4.0"
20+
const repmgrVersion string = "0.4.1"
2121

2222
var (
23-
hostList []string
24-
hhdls []*ServerMonitor
25-
slave []*ServerMonitor
26-
master *ServerMonitor
27-
exit bool
28-
vy int
29-
dbUser string
30-
dbPass string
31-
rplUser string
32-
rplPass string
23+
hostList []string
24+
hhdls []*ServerMonitor
25+
slave []*ServerMonitor
26+
master *ServerMonitor
27+
exit bool
28+
vy int
29+
dbUser string
30+
dbPass string
31+
rplUser string
32+
rplPass string
3333
)
3434

3535
var (
36-
version = flag.Bool("version", false, "Return version")
37-
user = flag.String("user", "", "User for MariaDB login, specified in the [user]:[password] format")
38-
hosts = flag.String("hosts", "", "List of MariaDB hosts IP and port (optional), specified in the host:[port] format and separated by commas")
39-
socket = flag.String("socket", "/var/run/mysqld/mysqld.sock", "Path of MariaDB unix socket")
40-
rpluser = flag.String("rpluser", "", "Replication user in the [user]:[password] format")
36+
version = flag.Bool("version", false, "Return version")
37+
user = flag.String("user", "", "User for MariaDB login, specified in the [user]:[password] format")
38+
hosts = flag.String("hosts", "", "List of MariaDB hosts IP and port (optional), specified in the host:[port] format and separated by commas")
39+
socket = flag.String("socket", "/var/run/mysqld/mysqld.sock", "Path of MariaDB unix socket")
40+
rpluser = flag.String("rpluser", "", "Replication user in the [user]:[password] format")
4141
// command specific-options
4242
interactive = flag.Bool("interactive", true, "Runs the MariaDB monitor in interactive mode")
4343
verbose = flag.Bool("verbose", false, "Print detailed execution info")
@@ -90,7 +90,7 @@ func main() {
9090
log.Fatal("ERROR: No replication user/pair specified.")
9191
}
9292
rplUser, rplPass = splitPair(*rpluser)
93-
93+
9494
// Create a connection to each host.
9595
hostCount := len(hostList)
9696
hhdls = make([]*ServerMonitor, hostCount)
@@ -108,25 +108,25 @@ func main() {
108108
continue
109109
}
110110
log.Fatalln("ERROR: Error when establishing initial connection to host", err)
111-
}
111+
}
112112
defer hhdls[k].Conn.Close()
113113
if *verbose {
114114
log.Printf("DEBUG: Checking if server %s is slave", hhdls[k].URL)
115115
}
116-
ss, err := dbhelper.GetSlaveStatus(hhdls[k].Conn)
117-
if ss.Master_Host != "" {
116+
ss, err := dbhelper.GetSlaveStatus(hhdls[k].Conn)
117+
if ss.Master_Host != "" {
118118
log.Printf("INFO : Server %s is configured as a slave", hhdls[k].URL)
119119
slave = append(slave, hhdls[k])
120120
slaveCount++
121121
} else {
122122
log.Printf("INFO : Server %s is not a slave. Assuming master status.", hhdls[k].URL)
123-
master = hhdls[k]
123+
master = hhdls[k]
124124
}
125125
}
126126
if (hostCount - slaveCount) == 0 {
127127
log.Fatalln("ERROR: Multi-master topologies are not yet supported.")
128128
}
129-
129+
130130
for _, sl := range slave {
131131
if *verbose {
132132
log.Printf("DEBUG: Checking if server %s is a slave of server %s", sl.Host, master.Host)
@@ -148,7 +148,7 @@ func main() {
148148
if ret() == false && *prefMaster != "" {
149149
log.Fatal("ERROR: Preferred master is not included in the hosts option")
150150
}
151-
151+
152152
// Do failover or switchover interactively, else start the interactive monitor.
153153
if *state == "dead" {
154154
master.failover()
@@ -225,7 +225,7 @@ func main() {
225225
time.Sleep(5 * time.Second)
226226
exit = false
227227
goto MainLoop
228-
}
228+
}
229229
}
230230
}
231231

@@ -294,6 +294,7 @@ func (sm *ServerMonitor) healthCheck() string {
294294
/* Triggers a master switchover. Returns the new master's URL */
295295
func (master *ServerMonitor) switchover() (string, int) {
296296
log.Println("INFO : Starting switchover")
297+
// Phase 1: Cleanup and election
297298
log.Printf("INFO : Flushing tables on %s (master)", master.URL)
298299
err := dbhelper.FlushTablesNoLog(master.Conn)
299300
if err != nil {
@@ -319,8 +320,9 @@ func (master *ServerMonitor) switchover() (string, int) {
319320
if err != nil {
320321
log.Println("ERROR:", err)
321322
}
322-
log.Println("INFO : Post-failover script complete:", string(out))
323+
log.Println("INFO : Pre-failover script complete:", string(out))
323324
}
325+
// Phase 2: Reject updates and sync slaves
324326
master.freeze()
325327
log.Printf("INFO : Rejecting updates on %s (old master)", master.URL)
326328
err = dbhelper.FlushTablesWithReadLock(master.Conn)
@@ -339,11 +341,31 @@ func (master *ServerMonitor) switchover() (string, int) {
339341
log.Println("DEBUG: MASTER_POS_WAIT executed.")
340342
newMaster.log()
341343
}
344+
// Phase 3: Prepare new master
342345
log.Println("INFO: Stopping slave thread on new master")
343346
err = dbhelper.StopSlave(newMaster.Conn)
344347
if err != nil {
345348
log.Println("WARN : Stopping slave failed on new master")
346349
}
350+
// Call post-failover script before unlocking the old master.
351+
if *postScript != "" {
352+
log.Printf("INFO : Calling post-failover script")
353+
out, err := exec.Command(*postScript, master.Host, newMaster.Host).CombinedOutput()
354+
if err != nil {
355+
log.Println("ERROR:", err)
356+
}
357+
log.Println("INFO : Post-failover script complete", string(out))
358+
}
359+
log.Println("INFO : Resetting slave on new master and set read/write mode on")
360+
err = dbhelper.ResetSlave(newMaster.Conn, true)
361+
if err != nil {
362+
log.Println("WARN : Reset slave failed on new master")
363+
}
364+
// Phase 4: Demote old master to slave
365+
err = dbhelper.SetReadOnly(newMaster.Conn, false)
366+
if err != nil {
367+
log.Println("ERROR: Could not set new master as read-write")
368+
}
347369
cm := "CHANGE MASTER TO master_host='" + newMaster.IP + "', master_port=" + newMaster.Port + ", master_user='" + rplUser + "', master_password='" + rplPass + "'"
348370
log.Println("INFO : Switching old master as a slave")
349371
err = dbhelper.UnlockTables(master.Conn)
@@ -364,15 +386,7 @@ func (master *ServerMonitor) switchover() (string, int) {
364386
log.Printf("ERROR: Could not set old master as read-only, %s", err)
365387
}
366388
}
367-
log.Println("INFO : Resetting slave on new master and set read/write mode on")
368-
err = dbhelper.ResetSlave(newMaster.Conn, true)
369-
if err != nil {
370-
log.Println("WARN : Reset slave failed on new master")
371-
}
372-
err = dbhelper.SetReadOnly(newMaster.Conn, false)
373-
if err != nil {
374-
log.Println("ERROR: Could not set new master as read-write")
375-
}
389+
// Phase 5: Switch slaves to new master
376390
log.Println("INFO : Switching other slaves to the new master")
377391
var oldMasterKey int
378392
for k, sl := range slave {
@@ -409,14 +423,6 @@ func (master *ServerMonitor) switchover() (string, int) {
409423
}
410424
}
411425
}
412-
if *postScript != "" {
413-
log.Printf("INFO : Calling post-failover script")
414-
out, err := exec.Command(*postScript, master.Host, newMaster.Host).CombinedOutput()
415-
if err != nil {
416-
log.Println("ERROR:", err)
417-
}
418-
log.Println("INFO : Post-failover script complete", string(out))
419-
}
420426
log.Println("INFO : Switchover complete")
421427
return newMaster.URL, oldMasterKey
422428
}

0 commit comments

Comments
 (0)