openshift · ibihim · Jul 30, 2025 · Jul 30, 2025 · Jul 30, 2025 · Jul 30, 2025
diff --git a/pkg/operator/podsecurityreadinesscontroller/README.md b/pkg/operator/podsecurityreadinesscontroller/README.md
@@ -0,0 +1,113 @@
+# Pod Security Readiness Controller
+
+The Pod Security Readiness Controller evaluates namespace compatibility with Pod Security Admission (PSA) enforcement in clusters.
+
+## Purpose
+
+This controller performs dry-run PSA evaluations to determine which namespaces would experience pod creation failures if PSA enforcement labels were applied.
+
+The controller generates telemetry data for `ClusterFleetEvaluation` and helps us to understand PSA compatibility before enabling enforcement.
+
+## Implementation
+
+The controller follows this evaluation algorithm:
+
+1. **Namespace Discovery** - Find namespaces without PSA enforcement
+2. **PSA Level Determination** - Predict what enforcement level would be applied
+3. **Dry-Run Evaluation** - Test namespace against predicted PSA level
+4. **Violation Classification** - Categorize any violations found for telemetry
+
+### Namespace Discovery
+
+Selects namespaces without PSA enforcement labels:
+
+```go
+selector := "!pod-security.kubernetes.io/enforce"
+```
+
+### PSA Level Determination
+
+The controller determines the effective PSA enforcement level using this precedence:
+
+1. `security.openshift.io/MinimallySufficientPodSecurityStandard` annotation
+2. Most restrictive of existing `pod-security.kubernetes.io/warn` or `pod-security.kubernetes.io/audit` labels, if owned by the PSA label syncer
+3. Kube API server's future global default: `restricted`
+
+### Dry-Run Evaluation
+
+The controller performs the equivalent of this oc command:
+
+```bash
+oc label --dry-run=server --overwrite namespace $NAMESPACE_NAME \
+    pod-security.kubernetes.io/enforce=$POD_SECURITY_STANDARD
+```
+
+PSA warnings during dry-run indicate the namespace contains violating workloads.
+
+### Violation Classification
+
+Violating namespaces are categorized for telemetry analysis:
+
+| Classification   | Criteria                                                        | Purpose                                |
+|------------------|-----------------------------------------------------------------|----------------------------------------|
+| `runLevelZero`   | Core namespaces: `kube-system`, `default`, `kube-public`        | Platform infrastructure tracking       |
+| `openshift`      | Namespaces with `openshift-` prefix                             | OpenShift component tracking           |
+| `disabledSyncer` | Label `security.openshift.io/scc.podSecurityLabelSync: "false"` | Intentionally excluded namespaces      |
+| `userSCC`        | Contains user workloads that violate PSA                        | SCC vs PSA policy conflicts |
+| `customer`       | All other violating namespaces                                  | Customer workload compatibility issues |
+| `inconclusive`   | Evaluation failed due to API errors                             | Operational problems                   |
+
+#### User SCC Detection
+
+The PSA label syncer bases its evaluation exclusively on a ServiceAccount's SCCs, ignoring a user's SCCs.
+When a pod's SCC assignment comes from user permissions rather than its ServiceAccount, the syncer's predicted PSA level may be incorrect.
+Therefore we need to evaluate the affected pods (if any) against the target PSA level.
+
+### Inconclusive Handling
+
+When the evaluation process fails, namespaces are marked as `inconclusive`.
+
+Common causes for inconclusive results:
+
+- **API server unavailable** - Network timeouts, etcd issues
+- **Resource conflicts** - Concurrent namespace modifications
+- **Invalid PSA levels** - Malformed enforcement level strings
+- **Pod listing failures** - RBAC issues or resource pressure
+
+High rates of inconclusive results across the fleet may indicate systematic issues that requires investigation.
+
+## Output
+
+The controller updates `OperatorStatus` conditions for each violation type:
+
+```go
+type podSecurityOperatorConditions struct {
+    violatingRunLevelZeroNamespaces   []string
+    violatingOpenShiftNamespaces      []string  
+    violatingDisabledSyncerNamespaces []string
+    violatingCustomerNamespaces       []string
+    userSCCViolationNamespaces        []string
+    inconclusiveNamespaces            []string
+}
+```
+
+Conditions follow the pattern:
+
+- `PodSecurity{Type}EvaluationConditionsDetected`
+- Status: `True` (violations found) / `False` (no violations)
+- Message includes violating namespace list
+
+## Configuration
+
+The controller runs with a configurable interval (default: 4 hours) and uses rate limiting to avoid overwhelming the API server:
+
+```go
+kubeClientCopy.QPS = 2
+kubeClientCopy.Burst = 2  
+```
+
+## Integration Points
+
+- **PSA Label Syncer**: Reads syncer-managed PSA labels to predict enforcement levels
+- **Cluster Operator**: Reports status through standard operator conditions
+- **Telemetry**: Violation data feeds into cluster fleet analysis systems
diff --git a/pkg/operator/podsecurityreadinesscontroller/classification.go b/pkg/operator/podsecurityreadinesscontroller/classification.go
@@ -0,0 +1,101 @@
+package podsecurityreadinesscontroller
+
+import (
+	"context"
+	"errors"
+	"strings"
+
+	securityv1 "github.com/openshift/api/security/v1"
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/util/sets"
+	"k8s.io/klog/v2"
+	psapi "k8s.io/pod-security-admission/api"
+	"k8s.io/pod-security-admission/policy"
+)
+
+var (
+	runLevelZeroNamespaces = sets.New[string](
+		"default",
+		"kube-system",
+		"kube-public",
+		"kube-node-lease",
+	)
+	errNoViolatingPods = errors.New("no violating pods in violating namespace")
+)
+
+func (c *PodSecurityReadinessController) classifyViolatingNamespace(
+	ctx context.Context,
+	conditions *podSecurityOperatorConditions,
+	ns *corev1.Namespace,
+	enforceLevel psapi.Level,
+) error {
+	if runLevelZeroNamespaces.Has(ns.Name) {
+		conditions.addViolatingRunLevelZero(ns)
+		return nil
+	}
+	if strings.HasPrefix(ns.Name, "openshift") {
+		conditions.addViolatingOpenShift(ns)
+		return nil
+	}
+	if ns.Labels[labelSyncControlLabel] == "false" {
+		conditions.addViolatingDisabledSyncer(ns)
+		return nil
+	}
+
+	// Evaluate by individual pod.
+	allPods, err := c.kubeClient.CoreV1().Pods(ns.Name).List(ctx, metav1.ListOptions{})
+	if err != nil {
+		// Will end up in inconclusive as we couldn't diagnose the violation root
+		// cause.
+		klog.V(2).ErrorS(err, "Failed to list pods in namespace", "namespace", ns.Name)
+		return err
+	}
+
+	isViolating := createPodViolationEvaluator(c.psaEvaluator, enforceLevel)
+	violatingPods := []corev1.Pod{}
+	for _, pod := range allPods.Items {
+		if isViolating(pod) {
+			violatingPods = append(violatingPods, pod)
+		}
+	}
+	if len(violatingPods) == 0 {
+		klog.V(2).ErrorS(errNoViolatingPods, "failed to find violating pod", "namespace", ns.Name)
+		return errNoViolatingPods
+	}
+
+	violatingUserSCCPods := []corev1.Pod{}
+	for _, pod := range violatingPods {
+		if pod.Annotations[securityv1.ValidatedSCCSubjectTypeAnnotation] == "user" {
+			violatingUserSCCPods = append(violatingUserSCCPods, pod)
+		}
+	}
+	if len(violatingUserSCCPods) > 0 {
+		conditions.addViolatingUserSCC(ns)
+	}
+	if len(violatingUserSCCPods) != len(violatingPods) {
+		conditions.addUnclassifiedIssue(ns)
+	}
+
+	return nil
+}
+
+func createPodViolationEvaluator(evaluator policy.Evaluator, enforcement psapi.Level) func(pod corev1.Pod) bool {
+	return func(pod corev1.Pod) bool {
+		results := evaluator.EvaluatePod(
+			psapi.LevelVersion{
+				Level:   enforcement,
+				Version: psapi.LatestVersion(),
+			},
+			&pod.ObjectMeta,
+			&pod.Spec,
+		)
+
+		for _, result := range results {
+			if !result.Allowed {
+				return true
+			}
+		}
+		return false
+	}
+}