Merge pull request #20 from xataio/fix-switchover-support

eminano · web-flow · commit 0f081bc1607c · 2025-09-04T12:52:44.000+02:00
Fix switchover support
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -16,3 +16,9 @@ repos:
     hooks:
       - id: golangci-lint-full
         args: ["--timeout=10m", "--config=.golangci.yml"]
+  - repo: local
+    hooks:
+      - id: generate-manifest
+        language: system
+        name: generate manifest
+        entry: make manifest
diff --git a/README.md b/README.md
@@ -4,15 +4,16 @@ A [CNPG-I](https://github.com/cloudnative-pg/cnpg-i) plugin that automatically h
 
 ## Overview
 
-This plugin monitors PostgreSQL database activity and automatically scales clusters down to zero replicas when they've been inactive for a configurable period. It injects a monitoring sidecar into the primary PostgreSQL pod that tracks database connections and query activity, then hibernates the cluster by setting the `cnpg.io/hibernation` annotation when the inactivity threshold is reached.
+This plugin monitors PostgreSQL database activity and automatically scales clusters down to zero replicas when they've been inactive for a configurable period. It injects a monitoring sidecar into all pods of the PostgreSQL cluster. Only the primary pod actively monitors database connections and manages hibernation, while replica pods run the sidecar in passive mode until promoted to primary.
 
 ### How It Works
 
-1. **Sidecar Injection**: Automatically adds a monitoring sidecar to the primary PostgreSQL pod
-2. **Activity Monitoring**: The sidecar periodically checks for active database connections and recent queries
-3. **Automatic Hibernation**: When the cluster is inactive for the configured duration, it sets the hibernation annotation
-4. **Scheduled Backup Management**: Automatically pauses scheduled backups when the cluster is hibernated to prevent backup failures
-5. **Resource Optimization**: Inactive clusters are scaled to zero, freeing up cluster resources
+1. **Sidecar Injection**: Automatically adds a monitoring sidecar to all PostgreSQL pods in the cluster
+2. **Primary-Only Monitoring**: Only the primary pod actively monitors database connections and query activity
+3. **Passive Replicas**: Replica pods run the sidecar container but remain in passive mode (no monitoring)
+4. **Automatic Hibernation**: When the cluster is inactive for the configured duration, the primary sidecar sets the hibernation annotation
+5. **Scheduled Backup Management**: The primary pod automatically pauses scheduled backups when the cluster is hibernated to prevent backup failures
+6. **Switchover Handling**: During switchovers, the new primary automatically takes over monitoring duties while the old primary becomes passive
 
 ## Installation
 
@@ -176,7 +177,8 @@ These resource configurations apply to all sidecar containers injected by the pl
 The plugin provides logging to help monitor its operation:
 
 - Sidecar injection events are logged during pod creation
-- Activity monitoring status is logged at each check interval
+- Activity monitoring status is logged at each check interval (primary pod only)
+- Primary/replica role transitions are logged when pods change status
 - Hibernation events are logged when clusters are scaled down
 - Scheduled backup pause operations are logged
 
@@ -189,9 +191,15 @@ kubectl logs -n cnpg-system deployment/cnpg-i-scale-to-zero-plugin
 And monitor the sidecar logs in the PostgreSQL pods:
 
 ```shell
-kubectl logs <pod-name> -c scale-to-zero
+# View logs from the primary pod's sidecar (active monitoring)
+kubectl logs <primary-pod-name> -c scale-to-zero
+
+# View logs from replica pods' sidecars (passive mode)
+kubectl logs <replica-pod-name> -c scale-to-zero
 ```
 
+**Note**: Primary pod sidecars will show active monitoring logs, while replica pod sidecars will show minimal passive mode logs.
+
 ## Development
 
 For local development and building from source:
@@ -214,3 +222,17 @@ make kind-deploy-dev
 This plugin uses the [pluginhelper](https://github.com/cloudnative-pg/cnpg-i-machinery/tree/main/pkg/pluginhelper) from [`cnpg-i-machinery`](https://github.com/cloudnative-pg/cnpg-i-machinery) to simplify the plugin's implementation.
 
 For additional details on the plugin implementation, refer to the [development documentation](doc/development.md).
+
+## Limitations
+
+### Primary-Only Activity Tracking
+
+Currently, the plugin only monitors database activity on the **primary instance**. This means:
+
+- **Read-only workloads on replicas are not tracked** - If your application connects directly to replica instances for read queries, this activity will not prevent hibernation
+- **Replica-only traffic** - Clusters with active read traffic exclusively on replicas may be hibernated despite being in use
+- **Connection pooling to replicas** - Applications using connection poolers that direct read traffic to replicas will not be detected as active
+
+**Workaround**: Ensure critical read workloads also maintain at least one connection to the primary instance, or configure longer inactivity periods to account for replica-only usage patterns.
+
+**Future Enhancement**: Replica activity monitoring may be added in future versions to provide more comprehensive activity detection across the entire cluster.
diff --git a/doc/development.md b/doc/development.md
@@ -87,8 +87,9 @@ The `OperatorLifecycleServer` interface requires several methods:
 The scale-to-zero plugin specifically:
 
 - Monitors Pod creation events
-- Injects a sidecar container into the primary PostgreSQL pod only
-- The sidecar monitors database activity and hibernates inactive clusters
+- Injects a sidecar container into all PostgreSQL cluster pods
+- The sidecar on the primary monitors database activity and hibernates inactive clusters
+- The sidecar on the replicas remains passive until they are promoted to primary
 - Manages scheduled backups by pausing them during hibernation
 
 ### Sidecar Implementation
@@ -108,25 +109,22 @@ The sidecar manager handles the startup and configuration of the sidecar process
 
 #### Scale-to-Zero Logic (`scale_to_zero.go`)
 
-The main scale-to-zero functionality monitors database activity and hibernates
-inactive clusters:
+The main scale-to-zero functionality monitors database activity and hibernates inactive clusters:
 
-- **Activity Monitoring**: Connects to PostgreSQL to check for active connections
-  and recent query activity
+- **Activity Monitoring**: Connects to PostgreSQL to check for open connections
+- **Switchover Handling**: Automatically detects primary changes and transfers monitoring responsibility
 - **Configurable Inactivity Threshold**: Uses the `xata.io/scale-to-zero-inactivity-minutes`
   annotation to determine when a cluster should be hibernated (defaults to 30 minutes)
 - **Hibernation**: Sets the `cnpg.io/hibernation` annotation to scale the cluster to zero
-- **Scheduled Backup Management**: Automatically pauses scheduled backups when hibernating
-  clusters to prevent backup failures on inactive clusters
-- **Primary-Only Operation**: Only runs on the primary PostgreSQL instance
+- **Scheduled Backup Management**: Automatically pauses scheduled backups when hibernating clusters to prevent backup failures on inactive clusters
 
 Key features:
 
 - Periodic checks at configurable intervals (default: 1 minute)
 - PostgreSQL connection pooling for activity monitoring
 - Graceful shutdown on context cancellation
-- Error handling for replica instances (stops monitoring if not primary)
 - Automatic scheduled backup pause operations
+- Switchover support
 
 #### Environment Variables
 
@@ -166,16 +164,13 @@ are inactive for a specified period. Here's how it operates:
 1. **Sidecar Injection**: When a PostgreSQL pod is created, the plugin injects a
    sidecar container that monitors database activity.
 
-2. **Primary Pod Only**: The sidecar only runs monitoring on the primary PostgreSQL
-   instance to avoid conflicts and ensure consistent behavior.
+2. **Activity Monitoring**: The sidecar periodically connects to PostgreSQL to check open database connections.
 
-3. **Activity Monitoring**: The sidecar periodically connects to PostgreSQL to check open database connections.
-
-4. **Hibernation**: When the cluster has been inactive for the configured duration,
-   the sidecar sets the `cnpg.io/hibernation` annotation on the cluster, causing
+3. **Hibernation**: When the cluster has been inactive for the configured duration,
+   the primary sidecar sets the `cnpg.io/hibernation` annotation on the cluster, causing
    CloudNativePG to scale it down to zero replicas.
 
-5. **Scheduled Backup Management**: After hibernating a cluster, the sidecar automatically
+4. **Scheduled Backup Management**: After hibernating a cluster, the sidecar automatically
    pauses any associated scheduled backups to prevent backup operations from failing
    on hibernated clusters.
 
diff --git a/internal/plugin/lifecycle/lifecycle.go b/internal/plugin/lifecycle/lifecycle.go
@@ -101,11 +101,6 @@ func (impl Implementation) reconcileMetadata(
 		return nil, err
 	}
 
-	if cluster.Status.CurrentPrimary != "" && pod.Name != cluster.Status.CurrentPrimary {
-		logger.Info("pod is not the current primary, skipping sidecar injection", "pod", pod.Name, "primary", cluster.Status.CurrentPrimary)
-		return &lifecycle.OperatorLifecycleResponse{}, nil
-	}
-
 	mutatedPod := pod.DeepCopy()
 
 	sidecarContainer := &corev1.Container{
diff --git a/internal/sidecar/scale_to_zero.go b/internal/sidecar/scale_to_zero.go
@@ -90,12 +90,23 @@ func (s *scaleToZero) Start(ctx context.Context) error {
 		case <-ctx.Done():
 			return nil
 		case <-ticker.C:
-			scaleToZeroConfig, err := s.getClusterScaleToZeroConfig(ctx)
+			cluster, err := s.client.getCluster(ctx, doNotForceUpdate)
 			if err != nil {
-				contextLogger.Error(err, "failed to get scale to zero configuration")
+				contextLogger.Error(err, "failed to get cluster")
 				continue
 			}
 
+			// only the primary keeps track of activity and hibernation
+			if !s.isPrimary(cluster) {
+				// reset last active time when it's not the primary to make sure
+				// when there's a switchover, the new primary has a clean state
+				s.lastActive = time.Time{}
+				contextLogger.Info("running on non-primary pod, skipping activity monitoring", "primary", cluster.Status.CurrentPrimary)
+				continue
+			}
+
+			scaleToZeroConfig := s.getClusterScaleToZeroConfig(ctx, cluster)
+
 			if !scaleToZeroConfig.enabled {
 				// reset last active time if scale to zero is disabled. This
 				// prevents old activity tracking from kicking in when scale to
@@ -110,6 +121,7 @@ func (s *scaleToZero) Start(ctx context.Context) error {
 				contextLogger.Error(err, "failed to check cluster activity")
 				continue
 			}
+
 			if !isActive {
 				if err := s.hibernate(ctx); err != nil {
 					contextLogger.Error(err, "hibernation failed")
@@ -154,6 +166,13 @@ func (s *scaleToZero) initQuerier(ctx context.Context) error {
 	return err
 }
 
+func (s *scaleToZero) isPrimary(cluster *cnpgv1.Cluster) bool {
+	// when the cluster is first initialised, the current primary might not be
+	// set yet. Assume it's the primary if it's not set to avoid blocking the
+	// scale to zero checks.
+	return cluster.Status.CurrentPrimary == "" || (cluster.Status.CurrentPrimary == s.currentPodName)
+}
+
 // isClusterActive checks if the cluster has any open connections.
 func (s *scaleToZero) isClusterActive(ctx context.Context, inactivityMinutes int) (bool, error) {
 	openConns, err := s.openConnections(ctx)
@@ -241,12 +260,7 @@ func (s *scaleToZero) hibernate(ctx context.Context) error {
 // getClusterScaleToZeroConfig retrieves the scale to zero configuration from
 // the cluster annotations. It returns the enabled status and inactivity
 // minutes. If the annotation is not set, it uses default values.
-func (s *scaleToZero) getClusterScaleToZeroConfig(ctx context.Context) (*scaleToZeroConfig, error) {
-	cluster, err := s.client.getCluster(ctx, doNotForceUpdate)
-	if err != nil {
-		return nil, fmt.Errorf("failed to get cluster: %w", err)
-	}
-
+func (s *scaleToZero) getClusterScaleToZeroConfig(ctx context.Context, cluster *cnpgv1.Cluster) *scaleToZeroConfig {
 	enabled := false
 	inactivityMinutes := defaultInactivityMinutes
 
@@ -266,7 +280,7 @@ func (s *scaleToZero) getClusterScaleToZeroConfig(ctx context.Context) (*scaleTo
 	return &scaleToZeroConfig{
 		enabled:           enabled,
 		inactivityMinutes: inactivityMinutes,
-	}, nil
+	}
 }
 
 func (s *scaleToZero) pauseScheduledBackup(ctx context.Context) error {
diff --git a/internal/sidecar/scale_to_zero_test.go b/internal/sidecar/scale_to_zero_test.go