|
52 | 52 | import org.junit.jupiter.params.provider.Arguments; |
53 | 53 | import org.junit.jupiter.params.provider.MethodSource; |
54 | 54 |
|
| 55 | +import java.io.IOException; |
55 | 56 | import java.time.Instant; |
56 | 57 | import java.util.HashMap; |
57 | 58 | import java.util.List; |
@@ -149,11 +150,32 @@ public void verifyBasicTransition( |
149 | 150 | private TestingFlinkBlueGreenDeploymentController.BlueGreenReconciliationResult handleSavepoint( |
150 | 151 | TestingFlinkBlueGreenDeploymentController.BlueGreenReconciliationResult rs) |
151 | 152 | throws Exception { |
| 153 | + return handleSavepointWithFailure(rs, null); |
| 154 | + } |
| 155 | + |
| 156 | + @NotNull |
| 157 | + private TestingFlinkBlueGreenDeploymentController.BlueGreenReconciliationResult |
| 158 | + handleSavepointWithFailure( |
| 159 | + TestingFlinkBlueGreenDeploymentController.BlueGreenReconciliationResult rs, |
| 160 | + Exception expectedException) |
| 161 | + throws Exception { |
| 162 | + |
| 163 | + if (expectedException != null) { |
| 164 | + flinkService.setSavepointTriggerException(expectedException); |
| 165 | + } |
| 166 | + |
152 | 167 | var triggers = flinkService.getSavepointTriggers(); |
153 | 168 | triggers.clear(); |
154 | 169 |
|
155 | 170 | rs = reconcile(rs.deployment); |
156 | 171 |
|
| 172 | + if (expectedException != null) { |
| 173 | + // Should fail immediately without entering savepointing state |
| 174 | + assertFailingJobStatus(rs); |
| 175 | + return rs; |
| 176 | + } |
| 177 | + |
| 178 | + // Continue with existing successful savepoint logic... |
157 | 179 | // Simulating a pending savepoint |
158 | 180 | triggers.put(rs.deployment.getStatus().getSavepointTriggerId(), false); |
159 | 181 |
|
@@ -433,8 +455,103 @@ public void verifyPatchScenario(FlinkVersion flinkVersion, PatchTestCase testCas |
433 | 455 | } |
434 | 456 | } |
435 | 457 |
|
| 458 | + @ParameterizedTest |
| 459 | + @MethodSource("org.apache.flink.kubernetes.operator.TestUtils#flinkVersions") |
| 460 | + public void verifySavepointFailureRecovery(FlinkVersion flinkVersion) throws Exception { |
| 461 | + var blueGreenDeployment = |
| 462 | + buildSessionCluster( |
| 463 | + TEST_DEPLOYMENT_NAME, |
| 464 | + TEST_NAMESPACE, |
| 465 | + flinkVersion, |
| 466 | + null, |
| 467 | + UpgradeMode.LAST_STATE); |
| 468 | + |
| 469 | + var rs = executeBasicDeployment(flinkVersion, blueGreenDeployment, false, null); |
| 470 | + |
| 471 | + // First attempt: Configure service to throw exception |
| 472 | + flinkService.setSavepointTriggerException( |
| 473 | + new IllegalStateException("Job not in valid state for savepoint")); |
| 474 | + |
| 475 | + String customValue = UUID.randomUUID().toString(); |
| 476 | + simulateChangeInSpec(rs.deployment, customValue, 0, null); |
| 477 | + |
| 478 | + // Should fail with savepoint error |
| 479 | + rs = reconcile(rs.deployment); |
| 480 | + assertFailingJobStatus(rs); |
| 481 | + assertTrue(rs.reconciledStatus.getError().contains("Job not in valid state for savepoint")); |
| 482 | + |
| 483 | + // Recovery: Clear the exception and try again with new spec change |
| 484 | + flinkService.clearSavepointTriggerException(); |
| 485 | + customValue = UUID.randomUUID().toString() + "_recovery"; |
| 486 | + simulateChangeInSpec(rs.deployment, customValue, ALT_DELETION_DELAY_VALUE, null); |
| 487 | + |
| 488 | + // Should now succeed and trigger savepoint properly |
| 489 | + rs = handleSavepoint(rs); |
| 490 | + |
| 491 | + // Continue with successful transition |
| 492 | + testTransitionToGreen(rs, customValue, "savepoint_1"); |
| 493 | + } |
| 494 | + |
| 495 | + @ParameterizedTest |
| 496 | + @MethodSource("savepointExceptionProvider") |
| 497 | + public void verifySavepointFailureWithDifferentExceptionTypes( |
| 498 | + FlinkVersion flinkVersion, Exception savepointException, String expectedErrorFragment) |
| 499 | + throws Exception { |
| 500 | + |
| 501 | + var blueGreenDeployment = |
| 502 | + buildSessionCluster( |
| 503 | + TEST_DEPLOYMENT_NAME, |
| 504 | + TEST_NAMESPACE, |
| 505 | + flinkVersion, |
| 506 | + null, |
| 507 | + UpgradeMode.SAVEPOINT); |
| 508 | + var rs = executeBasicDeployment(flinkVersion, blueGreenDeployment, false, null); |
| 509 | + |
| 510 | + flinkService.setSavepointTriggerException(savepointException); |
| 511 | + simulateChangeInSpec(rs.deployment, UUID.randomUUID().toString(), 0, null); |
| 512 | + |
| 513 | + rs = reconcile(rs.deployment); |
| 514 | + |
| 515 | + assertFailingJobStatus(rs); |
| 516 | + assertTrue(rs.reconciledStatus.getError().contains("Could not trigger Savepoint")); |
| 517 | + assertTrue(rs.reconciledStatus.getError().contains(expectedErrorFragment)); |
| 518 | + |
| 519 | + // Should remain in ACTIVE_BLUE state (no transition started) |
| 520 | + assertEquals( |
| 521 | + FlinkBlueGreenDeploymentState.ACTIVE_BLUE, rs.reconciledStatus.getBlueGreenState()); |
| 522 | + |
| 523 | + // Verify only Blue deployment exists (Green was never created) |
| 524 | + var flinkDeployments = getFlinkDeployments(); |
| 525 | + assertEquals(1, flinkDeployments.size()); |
| 526 | + } |
| 527 | + |
436 | 528 | // ==================== Parameterized Test Inputs ==================== |
437 | 529 |
|
| 530 | + static Stream<Arguments> savepointExceptionProvider() { |
| 531 | + return TestUtils.flinkVersions() |
| 532 | + .flatMap( |
| 533 | + flinkVersionArgs -> { |
| 534 | + FlinkVersion version = (FlinkVersion) flinkVersionArgs.get()[0]; |
| 535 | + return Stream.of( |
| 536 | + Arguments.of( |
| 537 | + version, |
| 538 | + new IOException("Network timeout"), |
| 539 | + "Network timeout"), |
| 540 | + Arguments.of( |
| 541 | + version, |
| 542 | + new IllegalStateException("Job not running"), |
| 543 | + "Job not running"), |
| 544 | + Arguments.of( |
| 545 | + version, |
| 546 | + new RuntimeException("Service unavailable"), |
| 547 | + "Service unavailable"), |
| 548 | + Arguments.of( |
| 549 | + version, |
| 550 | + new Exception("Generic error"), |
| 551 | + "Generic error")); |
| 552 | + }); |
| 553 | + } |
| 554 | + |
438 | 555 | static Stream<Arguments> patchScenarioProvider() { |
439 | 556 | // Extract FlinkVersions from TestUtils and combine with PatchTypes |
440 | 557 | return TestUtils.flinkVersions() |
|
0 commit comments