@@ -69,6 +69,30 @@ func (this InstancesByCountReplicas) Less(i, j int) bool {
6969 return len (this [i ].Replicas ) < len (this [j ].Replicas )
7070}
7171
72+ // InstancesByDc is a sortable type for Instance
73+ // 1. Instances are sorted by DC
74+ // 2. Within DC group instances are sorted by replicas count
75+ // 3. Within ReplicasCount group insances are:
76+ // a) not sorted if ReplicasCount > 0
77+ // b) sorted by replication lag if ReplicasCount == 0
78+ //
79+ // DC1 < DC2
80+ // if DC1 == DC2 => len(Replicas1) < len (Replicas2)
81+ // if Replicas.cnt == 0 => replicationLag1 < replicatonLag2
82+ type InstancesByDc [](* Instance )
83+
84+ func (this InstancesByDc ) Len () int { return len (this ) }
85+ func (this InstancesByDc ) Swap (i , j int ) { this [i ], this [j ] = this [j ], this [i ] }
86+ func (this InstancesByDc ) Less (i , j int ) bool {
87+ if this [i ].DataCenter == this [j ].DataCenter {
88+ if len (this [i ].Replicas ) == 0 && len (this [j ].Replicas ) == 0 {
89+ return this [i ].ReplicationLagSeconds .Int64 < this [j ].ReplicationLagSeconds .Int64
90+ }
91+ return len (this [i ].Replicas ) < len (this [j ].Replicas )
92+ }
93+ return (this [i ].DataCenter < this [j ].DataCenter )
94+ }
95+
7296// Constant strings for Group Replication information
7397// See https://dev.mysql.com/doc/refman/8.0/en/replication-group-members-table.html for additional information.
7498const (
@@ -1718,62 +1742,91 @@ func filterOSCInstances(instances [](*Instance)) [](*Instance) {
17181742 return result
17191743}
17201744
1745+ // Get two busiest instances per DC
1746+ func getTwoBusiestPerDC (all [](* Instance )) [](* Instance ) {
1747+ result := [](* Instance ){}
1748+
1749+ // sort by DC and replicas count
1750+ sort .Sort (sort .Reverse (InstancesByDc (all )))
1751+
1752+ currentDCInstances := 0
1753+ var currentDC * string = nil
1754+
1755+ for _ , im := range all {
1756+ if currentDC == nil || * currentDC != im .DataCenter {
1757+ currentDCInstances = 0
1758+ currentDC = & im .DataCenter
1759+ }
1760+ if currentDCInstances > 1 {
1761+ continue
1762+ }
1763+ currentDCInstances ++
1764+ result = append (result , im )
1765+ }
1766+ return result
1767+ }
1768+
17211769// GetClusterOSCReplicas returns a heuristic list of replicas which are fit as controll replicas for an OSC operation.
17221770// These would be intermediate masters
17231771func GetClusterOSCReplicas (clusterName string ) ([](* Instance ), error ) {
1724- intermediateMasters := [](* Instance ){}
1725- result := [](* Instance ){}
1726- var err error
1727- if strings .Index (clusterName , "'" ) >= 0 {
1772+ if strings .Contains (clusterName , "'" ) {
17281773 return [](* Instance ){}, log .Errorf ("Invalid cluster name: %s" , clusterName )
17291774 }
1775+
1776+ result := [](* Instance ){}
1777+ // Stage 1: 1st tier servers.
1778+ // We get up to two 1st tier servers from each DC in the following order:
1779+ // 1. Most busiest IMs
1780+ // 2. Most lagging leaf nodes
1781+ // Examples:
1782+ // 1. If there are N > 1 IMs in the DC, we will use 2 busiest ones
1783+ // (having the highest number of replicas)
1784+ // 2. If there is only 1 IM in the DC, but there are some leaf nodes,
1785+ // we will use IM + most lagging leaf node
1786+ // 3. If there are no IMs in the DC, but there are leaf nodes, we will use
1787+ // up to two most lagging leaf nodes
1788+ //
1789+ // So this stage will collect at most 2 servers per DC
17301790 {
1731- // Pick up to two busiest IMs
17321791 condition := `
17331792 replication_depth = 1
1734- and num_slave_hosts > 0
17351793 and cluster_name = ?
17361794 `
1737- intermediateMasters , err = readInstancesByCondition (condition , sqlutils .Args (clusterName ), "" )
1795+ firstTierServers , err : = readInstancesByCondition (condition , sqlutils .Args (clusterName ), "" )
17381796 if err != nil {
17391797 return result , err
17401798 }
1741- sort .Sort (sort .Reverse (InstancesByCountReplicas (intermediateMasters )))
1742- intermediateMasters = filterOSCInstances (intermediateMasters )
1743- intermediateMasters = intermediateMasters [0 :math .MinInt (2 , len (intermediateMasters ))]
1744- result = append (result , intermediateMasters ... )
1799+
1800+ firstTierServers = filterOSCInstances (firstTierServers )
1801+ result = append (result , getTwoBusiestPerDC (firstTierServers )... )
17451802 }
1803+
1804+ // Stage 2: 2nd tier servers
1805+ // Examine all selected 1st tier servers, and if they are IMs, get at most
1806+ // two of their busiest replicas (2nd tier servers).
1807+ // So this stage will collect at most 2 replicas per IM. If we collected 2 IMs
1808+ // per DC in the 1st stage, here we will get 4 servers per DC
17461809 {
1747- // Get 2 replicas of found IMs, if possible
1748- if len (intermediateMasters ) == 1 {
1749- // Pick 2 replicas for this IM
1750- replicas , err := ReadReplicaInstances (& (intermediateMasters [0 ].Key ))
1810+ // Get at most 2 replicas of found IMs
1811+ for _ , im := range result {
1812+ if len (im .Replicas ) == 0 {
1813+ // this is 1st tier leaf
1814+ continue
1815+ }
1816+ replicas , err := ReadReplicaInstances (& im .Key )
17511817 if err != nil {
17521818 return result , err
17531819 }
17541820 sort .Sort (sort .Reverse (InstancesByCountReplicas (replicas )))
17551821 replicas = filterOSCInstances (replicas )
17561822 replicas = replicas [0 :math .MinInt (2 , len (replicas ))]
17571823 result = append (result , replicas ... )
1758-
1759- }
1760- if len (intermediateMasters ) == 2 {
1761- // Pick one replica from each IM (should be possible)
1762- for _ , im := range intermediateMasters {
1763- replicas , err := ReadReplicaInstances (& im .Key )
1764- if err != nil {
1765- return result , err
1766- }
1767- sort .Sort (sort .Reverse (InstancesByCountReplicas (replicas )))
1768- replicas = filterOSCInstances (replicas )
1769- if len (replicas ) > 0 {
1770- result = append (result , replicas [0 ])
1771- }
1772- }
17731824 }
17741825 }
1826+
1827+ // Stage 3: 3rd tier servers
1828+ // Get 2 busiest 3rd tier replicas per DC
17751829 {
1776- // Get 2 3rd tier replicas, if possible
17771830 condition := `
17781831 replication_depth = 3
17791832 and cluster_name = ?
@@ -1782,25 +1835,8 @@ func GetClusterOSCReplicas(clusterName string) ([](*Instance), error) {
17821835 if err != nil {
17831836 return result , err
17841837 }
1785- sort .Sort (sort .Reverse (InstancesByCountReplicas (replicas )))
1786- replicas = filterOSCInstances (replicas )
1787- replicas = replicas [0 :math .MinInt (2 , len (replicas ))]
1788- result = append (result , replicas ... )
1789- }
1790- {
1791- // Get 2 1st tier leaf replicas, if possible
1792- condition := `
1793- replication_depth = 1
1794- and num_slave_hosts = 0
1795- and cluster_name = ?
1796- `
1797- replicas , err := readInstancesByCondition (condition , sqlutils .Args (clusterName ), "" )
1798- if err != nil {
1799- return result , err
1800- }
18011838 replicas = filterOSCInstances (replicas )
1802- replicas = replicas [0 :math .MinInt (2 , len (replicas ))]
1803- result = append (result , replicas ... )
1839+ result = append (result , getTwoBusiestPerDC (replicas )... )
18041840 }
18051841
18061842 return result , nil
0 commit comments