Allow for bad replicas to be scaled down

jkatz · Jonathan S. Katz · commit 83dcde3197d4 · 2020-09-25T13:01:58.000-04:00
Previously, the scaledown command would only list out healthy
replicas, not replicas that were in a bad state. However, these
may be the exact replicas that one would want to scale down,
given they are unhealthy.

Issue: [ch9253]
diff --git a/apiserver/clusterservice/scaleimpl.go b/apiserver/clusterservice/scaleimpl.go
@@ -217,7 +217,7 @@ func ScaleQuery(name, ns string) msgs.ScaleQueryResponse {
 		ClusterName: name,
 	}
 
-	replicationStatusResponse, err := util.ReplicationStatus(replicationStatusRequest, false)
+	replicationStatusResponse, err := util.ReplicationStatus(replicationStatusRequest, false, true)
 
 	// if an error is return, log the message, and return the response
 	if err != nil {
@@ -292,10 +292,10 @@ func ScaleDown(deleteData bool, clusterName, replicaName, ns string) msgs.ScaleD
 		return response
 	}
 
-	// selector in the format "pg-cluster=<cluster-name>,pg-ha-scope=<cluster-name>"
-	// which will grab the primary and any/all replicas
-	selector := fmt.Sprintf("%s=%s,%s=%s", config.LABEL_PG_CLUSTER, clusterName,
-		config.LABEL_PGHA_ROLE, config.LABEL_PGHA_ROLE_REPLICA)
+	// selector in the format "pg-cluster=<cluster-name>,pgo-pg-database,role!=config.LABEL_PGHA_ROLE_PRIMARY"
+	// which will grab all the replicas
+	selector := fmt.Sprintf("%s=%s,%s,%s!=%s", config.LABEL_PG_CLUSTER, clusterName,
+		config.LABEL_PG_DATABASE, config.LABEL_PGHA_ROLE, config.LABEL_PGHA_ROLE_PRIMARY)
 	replicaList, err := kubeapi.GetPods(apiserver.Clientset, selector, ns)
 	if err != nil {
 		response.Status.Code = msgs.Error
diff --git a/apiserver/failoverservice/failoverimpl.go b/apiserver/failoverservice/failoverimpl.go
@@ -137,7 +137,7 @@ func QueryFailover(name, ns string) msgs.QueryFailoverResponse {
 		ClusterName: name,
 	}
 
-	replicationStatusResponse, err := util.ReplicationStatus(replicationStatusRequest, false)
+	replicationStatusResponse, err := util.ReplicationStatus(replicationStatusRequest, false, false)
 
 	// if an error is return, log the message, and return the response
 	if err != nil {
diff --git a/apiserver/restartservice/restartimpl.go b/apiserver/restartservice/restartimpl.go
@@ -123,7 +123,9 @@ func QueryRestart(clusterName, namespace string) msgs.QueryRestartResponse {
 		ClusterName: clusterName,
 	}
 
-	replicationStatusResponse, err := util.ReplicationStatus(replicationStatusRequest, true)
+	// get a list of all the Pods...note that we can included "busted" pods as
+	// by including the primary, we're getting all of the database pods anyway.
+	replicationStatusResponse, err := util.ReplicationStatus(replicationStatusRequest, true, true)
 	if err != nil {
 		log.Error(err.Error())
 		resp.Status.Code = msgs.Error
diff --git a/pgo/cmd/restart.go b/pgo/cmd/restart.go
@@ -32,13 +32,13 @@ var restartCmd = &cobra.Command{
 	Use:   "restart",
 	Short: "Restarts the PostgrSQL database within a PostgreSQL cluster",
 	Long: `Restarts one or more PostgreSQL databases within a PostgreSQL cluster.
-	
+
 	For example, to restart the primary and all replicas:
 	pgo restart mycluster
 
 	Or target a specific instance within the cluster:
 	pgo restart mycluster --target=mycluster-abcd
-	
+
 	And use the 'query' flag obtain a list of all instances within the cluster:
 	pgo restart mycluster --query`,
 	Run: func(cmd *cobra.Command, args []string) {
@@ -169,9 +169,15 @@ func queryRestart(args []string, namespace string) {
 
 			log.Debugf("postgresql instance: %v", instance)
 
-			fmt.Printf("%-20s\t%-10s\t%-10s\t%-10s\t%12d %-7s\t%15t\n",
-				instance.Name, instance.Role, instance.Status, instance.Node, instance.ReplicationLag, "MB",
-				instance.PendingRestart)
+			if instance.ReplicationLag != -1 {
+				fmt.Printf("%-20s\t%-10s\t%-10s\t%-10s\t%12d %-7s\t%15t\n",
+					instance.Name, instance.Role, instance.Status, instance.Node, instance.ReplicationLag, "MB",
+					instance.PendingRestart)
+			} else {
+				fmt.Printf("%-20s\t%-10s\t%-10s\t%-10s\t%15s\t%23t\n",
+					instance.Name, instance.Role, instance.Status, instance.Node, "unknown",
+					instance.PendingRestart)
+			}
 		}
 	}
 }
diff --git a/pgo/cmd/scaledown.go b/pgo/cmd/scaledown.go
@@ -120,9 +120,15 @@ func queryCluster(args []string, ns string) {
 
 			log.Debugf("postgresql instance: %v", instance)
 
-			fmt.Printf("%-20s\t%-10s\t%-10s\t%12d %-7s\t%15t\n",
-				instance.Name, instance.Status, instance.Node, instance.ReplicationLag, "MB",
-				instance.PendingRestart)
+			if instance.ReplicationLag != -1 {
+				fmt.Printf("%-20s\t%-10s\t%-10s\t%12d %-7s\t%15t\n",
+					instance.Name, instance.Status, instance.Node, instance.ReplicationLag, "MB",
+					instance.PendingRestart)
+			} else {
+				fmt.Printf("%-20s\t%-10s\t%-10s\t%15s\t%23t\n",
+					instance.Name, instance.Status, instance.Node, "unknown",
+					instance.PendingRestart)
+			}
 		}
 	}
 }
diff --git a/util/failover.go b/util/failover.go
@@ -78,6 +78,14 @@ const (
 	// instanceReplicationInfoTypePrimaryStandby is the label used by Patroni to indicate that an
 	// instance is indeed a primary PostgreSQL instance, specifically within a standby cluster
 	instanceReplicationInfoTypePrimaryStandby = "Standby Leader"
+	// instanceRolePrimary indicates that an instance is a primary
+	instanceRolePrimary = "primary"
+	// instanceRoleReplica indicates that an instance is a replica
+	instanceRoleReplica = "replica"
+	// instanceRoleUnknown indicates taht an instance is of an unknown typ
+	instanceRoleUnknown = "unknown"
+	// instanceStatusUnavailable indicates an instance is unavailable
+	instanceStatusUnavailable = "unavailable"
 )
 
 var (
@@ -136,20 +144,34 @@ func GetPod(clientset *kubernetes.Clientset, deploymentName, namespace string) (
 // By default information is only returned for replicas within the cluster.  However,
 // if primary information is also needed, the inlcudePrimary flag can set set to true
 // and primary information will will also be included in the ReplicationStatusResponse.
-func ReplicationStatus(request ReplicationStatusRequest, includePrimary bool) (ReplicationStatusResponse, error) {
+//
+// Also by default we do not include any "busted" Pods, e.g. a Pod that is not
+// in a happy phase. That Pod may be lacking a "role" label. From there, we zero
+// out the statistics and apply an error
+func ReplicationStatus(request ReplicationStatusRequest, includePrimary, includeBusted bool) (ReplicationStatusResponse, error) {
 	response := ReplicationStatusResponse{
 		Instances: make([]InstanceReplicationInfo, 0),
 	}
 
-	// First, get replica pods using selector pg-cluster=clusterName,role=replica if not including the primary,
-	// or pg-cluster=clusterName,pg-database if including the primary
-	var roleSelector string
+	// Build up the selector. First, create the base, which restricts to the
+	// current cluster
+	// pg-cluster=clusterName,pgo-pg-database
+	selector := fmt.Sprintf("%s=%s,%s",
+		config.LABEL_PG_CLUSTER, request.ClusterName, config.LABEL_PG_DATABASE)
+
+	// if we are not including the primary, determine if we are including busted
+	// replicas or not
 	if !includePrimary {
-		roleSelector = fmt.Sprintf("%s=%s", config.LABEL_PGHA_ROLE, config.LABEL_PGHA_ROLE_REPLICA)
-	} else {
-		roleSelector = config.LABEL_PG_DATABASE
+		if includeBusted {
+			// include all Pods that identify as a database, but **not** a primary
+			// pg-cluster=clusterName,pgo-pg-database,role!=config.LABEL_PGHA_ROLE_PRIMARY
+			selector += fmt.Sprintf(",%s!=%s", config.LABEL_PGHA_ROLE, config.LABEL_PGHA_ROLE_PRIMARY)
+		} else {
+			// include all Pods that identify as a database and have a replica label
+			// pg-cluster=clusterName,pgo-pg-database,role=replica
+			selector += fmt.Sprintf(",%s=%s", config.LABEL_PGHA_ROLE, config.LABEL_PGHA_ROLE_REPLICA)
+		}
 	}
-	selector := fmt.Sprintf("%s=%s,%s", config.LABEL_PG_CLUSTER, request.ClusterName, roleSelector)
 
 	log.Debugf(`searching for pods with "%s"`, selector)
 	pods, err := kubeapi.GetPods(request.Clientset, selector, request.Namespace)
@@ -175,8 +197,36 @@ func ReplicationStatus(request ReplicationStatusRequest, includePrimary bool) (R
 	// Now get the statistics about the current state of the replicas, which we
 	// can delegate to Patroni vis-a-vis the information that it collects
 	// We can get the statistics about the current state of the managed instance
-	// From executing and running a command in the first pod
-	pod := pods.Items[0]
+	// From executing and running a command in the first active pod
+	var pod *v1.Pod
+
+	for _, p := range pods.Items {
+		if p.Status.Phase == v1.PodRunning {
+			pod = &p
+			break
+		}
+	}
+
+	// if no active Pod can be found, we can only assume that all of the instances
+	// are unavailable, and we should indicate as such
+	if pod == nil {
+		for _, p := range pods.Items {
+			// set up the instance that will be returned
+			instance := InstanceReplicationInfo{
+				Name:           instanceInfoMap[p.Name].name,
+				Node:           instanceInfoMap[p.Name].node,
+				ReplicationLag: -1,
+				Role:           instanceRoleUnknown,
+				Status:         instanceStatusUnavailable,
+				Timeline:       -1,
+			}
+
+			// append this newly created instance to the list that will be returned
+			response.Instances = append(response.Instances, instance)
+		}
+
+		return response, nil
+	}
 
 	// Execute the command that will retrieve the replica information from Patroni
 	commandStdOut, _, err := kubeapi.ExecToPodThroughAPI(
@@ -197,17 +247,25 @@ func ReplicationStatus(request ReplicationStatusRequest, includePrimary bool) (R
 	// We need to iterate through this list to format the information for the
 	// response
 	for _, rawInstance := range rawInstances {
-
 		var role string
+
 		// skip the primary unless explicitly enabled
-		if rawInstance.Type == instanceReplicationInfoTypePrimary ||
-			rawInstance.Type == instanceReplicationInfoTypePrimaryStandby {
-			if !includePrimary {
-				continue
-			}
-			role = "primary"
-		} else {
-			role = "replica"
+		if !includePrimary && (rawInstance.Type == instanceReplicationInfoTypePrimary ||
+			rawInstance.Type == instanceReplicationInfoTypePrimaryStandby) {
+			continue
+		}
+
+		// if this is a busted instance and we are not including it, skip
+		if !includeBusted && rawInstance.State == "" {
+			continue
+		}
+
+		// determine the role of the instnace
+		switch rawInstance.Type {
+		default:
+			role = instanceRoleReplica
+		case instanceReplicationInfoTypePrimary, instanceReplicationInfoTypePrimaryStandby:
+			role = instanceRolePrimary
 		}
 
 		// set up the instance that will be returned
@@ -218,11 +276,14 @@ func ReplicationStatus(request ReplicationStatusRequest, includePrimary bool) (R
 			Role:           role,
 			Name:           instanceInfoMap[rawInstance.PodName].name,
 			Node:           instanceInfoMap[rawInstance.PodName].node,
+			PendingRestart: rawInstance.PendingRestart == "*",
 		}
 
-		// indicate whether or not the instance has a pending restart
-		if rawInstance.PendingRestart == "*" {
-			instance.PendingRestart = true
+		// update the instance info if the instance is busted
+		if rawInstance.State == "" {
+			instance.Status = instanceStatusUnavailable
+			instance.ReplicationLag = -1
+			instance.Timeline = -1
 		}
 
 		// append this newly created instance to the list that will be returned

Original file line number	Diff line number	Diff line change
`@@ -137,7 +137,7 @@ func QueryFailover(name, ns string) msgs.QueryFailoverResponse {`
`137`	`137`	`ClusterName: name,`
`138`	`138`	`}`
`139`	`139`
`140`		`- replicationStatusResponse, err := util.ReplicationStatus(replicationStatusRequest, false)`
	`140`	`+ replicationStatusResponse, err := util.ReplicationStatus(replicationStatusRequest, false, false)`
`141`	`141`
`142`	`142`	`// if an error is return, log the message, and return the response`
`143`	`143`	`if err != nil {`
Original file line number	Diff line number	Diff line change
`@@ -120,9 +120,15 @@ func queryCluster(args []string, ns string) {`
`120`	`120`
`121`	`121`	`log.Debugf("postgresql instance: %v", instance)`
`122`	`122`
`123`		`- fmt.Printf("%-20s\t%-10s\t%-10s\t%12d %-7s\t%15t\n",`
`124`		`- instance.Name, instance.Status, instance.Node, instance.ReplicationLag, "MB",`
`125`		`- instance.PendingRestart)`
	`123`	`+ if instance.ReplicationLag != -1 {`
	`124`	`+ fmt.Printf("%-20s\t%-10s\t%-10s\t%12d %-7s\t%15t\n",`
	`125`	`+ instance.Name, instance.Status, instance.Node, instance.ReplicationLag, "MB",`
	`126`	`+ instance.PendingRestart)`
	`127`	`+ } else {`
	`128`	`+ fmt.Printf("%-20s\t%-10s\t%-10s\t%15s\t%23t\n",`
	`129`	`+ instance.Name, instance.Status, instance.Node, "unknown",`
	`130`	`+ instance.PendingRestart)`
	`131`	`+ }`
`126`	`132`	`}`
`127`	`133`	`}`
`128`	`134`	`}`