Skip to content

Commit 480b2a7

Browse files
authored
CMP-3895: Improve rescan wait logic for e2e tests (#835)
The e2e tests have been failing with what appears to be a transient race condition when a test rescans the environment, then waits for the suite or scan to get to a terminal state, like DONE and NON-COMPLIANT. The problem is that the test does something to initiate a rescan (deleting a suite, creating a scan setting binding, etc), and then it immediately starts looking for the ComplianceSuite status to see if it's done. In the case of rescans, the data from the last scan might still be lingering around and if the test is faster than the reconcile loops of the ComplianceScan controller, it could breeze through the wait logic and think the scan is already complete. This commit attempts to address that by using the rescan annotation. By using the annotation to rescan (instead of deleting a scan suite entirely), we can update the function that polls the suite status to also check for the annotations. If the wait function sees the annotation, it should continue polling since that's a clear signal that the ComplianceScan controller has not updated the scan data or reset the states back to PENDING. If none of the scans in the suite have the annotation, then it's probably safe to move forward and assume the status for each scan is updated and accurate, since the ComplianceScan controller will remove the rescan annotation when it restarts the scan.
1 parent 1276cdd commit 480b2a7

File tree

2 files changed

+96
-6
lines changed

2 files changed

+96
-6
lines changed

tests/e2e/framework/common.go

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1176,6 +1176,40 @@ func (f *Framework) WaitForSuiteScansStatus(namespace, name string, targetStatus
11761176
return false, nil
11771177
}
11781178

1179+
// Loop through each scan to make sure it doesn't have the
1180+
// rescan annotation. This makes it safer to poll for suite
1181+
// status because we're making sure the ComplianceScan
1182+
// controller has the opportunity to update the scan status for
1183+
// each scan and remove the annotation before we look at the
1184+
// overall suite status. Without this, it's possible to rerun a
1185+
// scan and immediately pass through the
1186+
// WaitForSuiteScansStatus function because the ComplianceScan
1187+
// objects are still referencing the old scan data and haven't
1188+
// been reset to "PENDING".
1189+
for _, scanStatus := range suite.Status.ScanStatuses {
1190+
key := types.NamespacedName{Name: scanStatus.Name, Namespace: namespace}
1191+
scan := &compv1alpha1.ComplianceScan{}
1192+
err := f.Client.Get(context.TODO(), key, scan)
1193+
if err != nil {
1194+
if apierrors.IsNotFound(err) {
1195+
log.Printf("Waiting for scan %s to appear in suite: %s", scanStatus.Name, name)
1196+
return false, nil
1197+
}
1198+
return false, fmt.Errorf("failed to get scan %s: %s", scanStatus.Name, err)
1199+
}
1200+
if _, exists := scan.Annotations[compv1alpha1.ComplianceScanRescanAnnotation]; !exists {
1201+
// Move along, the rescan annotation has been
1202+
// removed (if the caller did use it to perform
1203+
// a rescan) by the ComplianceScan controller
1204+
// so we know we're not looking at stale
1205+
// results from a previous run.
1206+
continue
1207+
}
1208+
1209+
log.Printf("Waiting for ComplianceScan controller to remove rescan annotation for %s", scan.Name)
1210+
return false, nil
1211+
}
1212+
11791213
if suite.Status.Phase != targetStatus {
11801214
log.Printf("waiting until suite %s reaches target status '%s'. Current status: %s", suite.Name, targetStatus, suite.Status.Phase)
11811215
return false, nil
@@ -1228,6 +1262,62 @@ func (f *Framework) WaitForSuiteScansStatus(namespace, name string, targetStatus
12281262
return nil
12291263
}
12301264

1265+
func (f *Framework) RescanSuite(suiteName, namespace string) error {
1266+
scanList := &compv1alpha1.ComplianceScanList{}
1267+
labelSelector, err := labels.Parse(compv1alpha1.SuiteLabel + "=" + suiteName)
1268+
if err != nil {
1269+
return fmt.Errorf("Failed to parse label selector: %s", err)
1270+
}
1271+
1272+
opts := &client.ListOptions{
1273+
LabelSelector: labelSelector,
1274+
}
1275+
err = f.Client.List(context.TODO(), scanList, opts)
1276+
if err != nil {
1277+
return fmt.Errorf("Failed to get scans for suite %s", suiteName)
1278+
}
1279+
1280+
// Add rescan annotation to each scan
1281+
for i := range scanList.Items {
1282+
scan := &scanList.Items[i]
1283+
err := f.RescanScan(scan.Name, scan.Namespace)
1284+
if err != nil {
1285+
return fmt.Errorf("Failed to apply rescan annotation to scan %s: %s", scan.Name, err)
1286+
}
1287+
}
1288+
return nil
1289+
}
1290+
1291+
func (f *Framework) RescanScan(scanName, namespace string) error {
1292+
key := types.NamespacedName{Name: scanName, Namespace: namespace}
1293+
1294+
interval := 5 * time.Second
1295+
retries := uint64(30)
1296+
bo := backoff.WithMaxRetries(backoff.NewConstantBackOff(interval), retries)
1297+
err := backoff.RetryNotify(func() error {
1298+
s := &compv1alpha1.ComplianceScan{}
1299+
err := f.Client.Get(context.TODO(), key, s)
1300+
if err != nil {
1301+
return fmt.Errorf("failed to get scan %s: %s", scanName, err)
1302+
}
1303+
1304+
// Add rescan annotation
1305+
if s.Annotations == nil {
1306+
s.Annotations = make(map[string]string)
1307+
}
1308+
s.Annotations[compv1alpha1.ComplianceScanRescanAnnotation] = ""
1309+
1310+
return f.Client.Update(context.TODO(), s)
1311+
}, bo, func(err error, d time.Duration) {
1312+
log.Printf("Failed to add rescan annotation to scan %s after %s: %s", scanName, d.String(), err)
1313+
})
1314+
if err != nil {
1315+
return fmt.Errorf("Failed to trigger rescan for scan %s: %s", scanName, err)
1316+
}
1317+
log.Printf("Triggered rescan for scan %s", scanName)
1318+
return nil
1319+
}
1320+
12311321
func (f *Framework) logContainerOutput(namespace, name string) {
12321322
logContainerOutputEnv := os.Getenv("LOG_CONTAINER_OUTPUT")
12331323
if logContainerOutputEnv == "" {

tests/e2e/parallel/main_test.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2546,10 +2546,10 @@ func TestCustomRuleTailoredProfile(t *testing.T) {
25462546
if err := f.Client.Get(context.TODO(), key, suite); err != nil {
25472547
t.Fatal(err)
25482548
}
2549-
// let's rescans and expect the check to be non compliant by deleting the suite
2550-
err = f.Client.Delete(context.TODO(), suite)
2549+
// Annotate all ComplianceScans in the suite to generate fresh results
2550+
err = f.RescanSuite(suiteName, testNamespace)
25512551
if err != nil {
2552-
t.Fatalf("Failed to delete suite: %v", err)
2552+
t.Fatalf("Failed to rescan suite: %s", err)
25532553
}
25542554
err = f.WaitForSuiteScansStatus(testNamespace, suiteName, compv1alpha1.PhaseDone, compv1alpha1.ResultNonCompliant)
25552555
if err != nil {
@@ -3459,10 +3459,10 @@ func TestCustomRuleFailureReasonInCheckResult(t *testing.T) {
34593459
t.Fatalf("Failed to get ComplianceSuite: %v", err)
34603460
}
34613461

3462-
// Delete and recreate the suite to trigger a new scan
3463-
err = f.Client.Delete(context.TODO(), suite)
3462+
// Annotate all ComplianceScans in the suite to generate fresh results
3463+
err = f.RescanSuite(suiteName, testNamespace)
34643464
if err != nil {
3465-
t.Fatalf("Failed to delete ComplianceSuite: %v", err)
3465+
t.Fatalf("Failed to rescan ComplianceSuite: %v", err)
34663466
}
34673467

34683468
// Wait for the new scan to complete

0 commit comments

Comments
 (0)