Skip to content

Commit 379c690

Browse files
committed
[FLINK-37229] Add/record DELETING/DELETED lifecycle states
1 parent d5d027e commit 379c690

File tree

12 files changed

+81
-35
lines changed

12 files changed

+81
-35
lines changed

flink-kubernetes-operator-api/src/main/java/org/apache/flink/kubernetes/operator/api/lifecycle/ResourceLifecycleState.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,9 @@ public enum ResourceLifecycleState {
3535
STABLE(true, "The resource deployment is considered to be stable and won’t be rolled back"),
3636
ROLLING_BACK(false, "The resource is being rolled back to the last stable spec"),
3737
ROLLED_BACK(true, "The resource is deployed with the last stable spec"),
38-
FAILED(true, "The job terminally failed");
38+
FAILED(true, "The job terminally failed"),
39+
DELETING(false, "The resource is being deleted"),
40+
DELETED(true, "The resource is deleted");
3941

4042
@JsonIgnore private final boolean terminal;
4143
@JsonIgnore @Getter private final String description;

flink-kubernetes-operator-api/src/main/java/org/apache/flink/kubernetes/operator/api/status/CommonStatus.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,11 @@ public abstract class CommonStatus<SPEC extends AbstractFlinkSpec> {
6060
public abstract ReconciliationStatus<SPEC> getReconciliationStatus();
6161

6262
public ResourceLifecycleState getLifecycleState() {
63+
if (ResourceLifecycleState.DELETING == lifecycleState
64+
|| ResourceLifecycleState.DELETED == lifecycleState) {
65+
return lifecycleState;
66+
}
67+
6368
var reconciliationStatus = getReconciliationStatus();
6469

6570
if (reconciliationStatus.isBeforeFirstDeployment()) {

flink-kubernetes-operator/src/main/java/org/apache/flink/kubernetes/operator/controller/FlinkDeploymentController.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import org.apache.flink.api.common.JobStatus;
2121
import org.apache.flink.kubernetes.operator.api.FlinkDeployment;
2222
import org.apache.flink.kubernetes.operator.api.FlinkStateSnapshot;
23+
import org.apache.flink.kubernetes.operator.api.lifecycle.ResourceLifecycleState;
2324
import org.apache.flink.kubernetes.operator.api.status.FlinkDeploymentStatus;
2425
import org.apache.flink.kubernetes.operator.api.status.JobManagerDeploymentStatus;
2526
import org.apache.flink.kubernetes.operator.exception.DeploymentFailedException;
@@ -104,6 +105,7 @@ public DeleteControl cleanup(FlinkDeployment flinkApp, Context josdkContext) {
104105
"Cleaning up FlinkDeployment",
105106
josdkContext.getClient());
106107
statusRecorder.updateStatusFromCache(flinkApp);
108+
flinkApp.getStatus().setLifecycleState(ResourceLifecycleState.DELETING);
107109
var ctx = ctxFactory.getResourceContext(flinkApp, josdkContext);
108110
try {
109111
observerFactory.getOrCreate(flinkApp).observe(ctx);
@@ -113,7 +115,8 @@ public DeleteControl cleanup(FlinkDeployment flinkApp, Context josdkContext) {
113115

114116
var deleteControl = reconcilerFactory.getOrCreate(flinkApp).cleanup(ctx);
115117
if (deleteControl.isRemoveFinalizer()) {
116-
statusRecorder.removeCachedStatus(flinkApp);
118+
flinkApp.getStatus().setLifecycleState(ResourceLifecycleState.DELETED);
119+
statusRecorder.cleanupForDeletion(flinkApp);
117120
ctxFactory.cleanup(flinkApp);
118121
} else {
119122
statusRecorder.patchAndCacheStatus(flinkApp, ctx.getKubernetesClient());

flink-kubernetes-operator/src/main/java/org/apache/flink/kubernetes/operator/controller/FlinkSessionJobController.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import org.apache.flink.kubernetes.operator.api.FlinkDeployment;
2121
import org.apache.flink.kubernetes.operator.api.FlinkSessionJob;
2222
import org.apache.flink.kubernetes.operator.api.FlinkStateSnapshot;
23+
import org.apache.flink.kubernetes.operator.api.lifecycle.ResourceLifecycleState;
2324
import org.apache.flink.kubernetes.operator.api.status.FlinkSessionJobStatus;
2425
import org.apache.flink.kubernetes.operator.exception.ReconciliationException;
2526
import org.apache.flink.kubernetes.operator.health.CanaryResourceManager;
@@ -145,6 +146,8 @@ public DeleteControl cleanup(FlinkSessionJob sessionJob, Context josdkContext) {
145146
EventRecorder.Component.Operator,
146147
"Cleaning up FlinkSessionJob",
147148
josdkContext.getClient());
149+
statusRecorder.updateStatusFromCache(sessionJob);
150+
sessionJob.getStatus().setLifecycleState(ResourceLifecycleState.DELETING);
148151
var ctx = ctxFactory.getResourceContext(sessionJob, josdkContext);
149152
try {
150153
observer.observe(ctx);
@@ -154,8 +157,9 @@ public DeleteControl cleanup(FlinkSessionJob sessionJob, Context josdkContext) {
154157

155158
var deleteControl = reconciler.cleanup(ctx);
156159
if (deleteControl.isRemoveFinalizer()) {
160+
sessionJob.getStatus().setLifecycleState(ResourceLifecycleState.DELETED);
157161
ctxFactory.cleanup(sessionJob);
158-
statusRecorder.removeCachedStatus(sessionJob);
162+
statusRecorder.cleanupForDeletion(sessionJob);
159163
} else {
160164
statusRecorder.patchAndCacheStatus(sessionJob, ctx.getKubernetesClient());
161165
}

flink-kubernetes-operator/src/main/java/org/apache/flink/kubernetes/operator/utils/StatusRecorder.java

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -98,19 +98,7 @@ public void patchAndCacheStatus(CR resource, KubernetesClient client) {
9898
return;
9999
}
100100

101-
Class<?> statusClass;
102-
if (resource instanceof FlinkDeployment) {
103-
statusClass = FlinkDeploymentStatus.class;
104-
} else if (resource instanceof FlinkSessionJob) {
105-
statusClass = FlinkSessionJobStatus.class;
106-
} else if (resource instanceof FlinkStateSnapshot) {
107-
statusClass = FlinkStateSnapshotStatus.class;
108-
} else {
109-
throw new RuntimeException(
110-
String.format("Resource is unknown class: %s", resource.getClass()));
111-
}
112-
113-
var prevStatus = (STATUS) objectMapper.convertValue(previousStatusNode, statusClass);
101+
var prevStatus = convertPreviousStatus(resource, previousStatusNode);
114102

115103
Exception err = null;
116104
for (int i = 0; i < 3; i++) {
@@ -134,6 +122,21 @@ public void patchAndCacheStatus(CR resource, KubernetesClient client) {
134122
metricManager.onUpdate(resource);
135123
}
136124

125+
private STATUS convertPreviousStatus(CR resource, ObjectNode previousStatusNode) {
126+
Class<?> statusClass;
127+
if (resource instanceof FlinkDeployment) {
128+
statusClass = FlinkDeploymentStatus.class;
129+
} else if (resource instanceof FlinkSessionJob) {
130+
statusClass = FlinkSessionJobStatus.class;
131+
} else if (resource instanceof FlinkStateSnapshot) {
132+
statusClass = FlinkStateSnapshotStatus.class;
133+
} else {
134+
throw new RuntimeException(
135+
String.format("Resource is unknown class: %s", resource.getClass()));
136+
}
137+
return (STATUS) objectMapper.convertValue(previousStatusNode, statusClass);
138+
}
139+
137140
private void replaceStatus(CR resource, STATUS prevStatus, KubernetesClient client)
138141
throws JsonProcessingException {
139142
int retries = 0;
@@ -240,13 +243,15 @@ public void updateStatusFromCache(CR resource) {
240243
}
241244

242245
/**
243-
* Remove cached status for Flink resource.
246+
* Clean up resource after deletion and send a last status update.
244247
*
245248
* @param resource Flink resource.
246249
*/
247-
public void removeCachedStatus(CR resource) {
248-
statusCache.remove(ResourceID.fromResource(resource));
250+
public void cleanupForDeletion(CR resource) {
251+
var prevJson = statusCache.remove(ResourceID.fromResource(resource));
252+
var prevStatus = convertPreviousStatus(resource, prevJson);
249253
metricManager.onRemove(resource);
254+
statusUpdateListener.accept(resource, prevStatus);
250255
}
251256

252257
public static <S extends CommonStatus<?>, CR extends AbstractFlinkResource<?, S>>

flink-kubernetes-operator/src/test/java/org/apache/flink/kubernetes/operator/config/FlinkConfigBuilderTest.java

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@
6363
import java.util.Collections;
6464
import java.util.List;
6565
import java.util.Map;
66-
import java.util.stream.Stream;
6766

6867
import static org.apache.flink.configuration.DeploymentOptions.SHUTDOWN_ON_APPLICATION_FINISH;
6968
import static org.apache.flink.kubernetes.operator.api.utils.BaseTestUtils.IMAGE;
@@ -934,13 +933,4 @@ private PodTemplateSpec createTestPodWithContainers() {
934933
TestUtils.getTestPodTemplate("hostname", List.of(mainContainer, sideCarContainer));
935934
return pod;
936935
}
937-
938-
private static Stream<KubernetesConfigOptions.ServiceExposedType> serviceExposedTypes() {
939-
return Stream.of(
940-
null,
941-
KubernetesConfigOptions.ServiceExposedType.ClusterIP,
942-
KubernetesConfigOptions.ServiceExposedType.LoadBalancer,
943-
KubernetesConfigOptions.ServiceExposedType.Headless_ClusterIP,
944-
KubernetesConfigOptions.ServiceExposedType.NodePort);
945-
}
946936
}

flink-kubernetes-operator/src/test/java/org/apache/flink/kubernetes/operator/controller/FlinkDeploymentControllerTest.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,17 @@ public void verifyBasicReconcileLoop(FlinkVersion flinkVersion) throws Exception
153153
assertEquals(
154154
appCluster.getStatus().getReconciliationStatus().getLastReconciledSpec(),
155155
appCluster.getStatus().getReconciliationStatus().getLastStableSpec());
156+
157+
testController.cleanup(appCluster, context);
158+
// Make sure status is recorded and sent out at the end of cleanup
159+
assertEquals(
160+
ResourceLifecycleState.DELETED,
161+
testController
162+
.getStatusUpdateCounter()
163+
.currentResource
164+
.getStatus()
165+
.getLifecycleState());
166+
assertEquals(ResourceLifecycleState.DELETED, appCluster.getStatus().getLifecycleState());
156167
}
157168

158169
@ParameterizedTest

flink-kubernetes-operator/src/test/java/org/apache/flink/kubernetes/operator/controller/FlinkSessionJobControllerTest.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.apache.flink.kubernetes.operator.TestingFlinkService;
2424
import org.apache.flink.kubernetes.operator.api.FlinkDeployment;
2525
import org.apache.flink.kubernetes.operator.api.FlinkSessionJob;
26+
import org.apache.flink.kubernetes.operator.api.lifecycle.ResourceLifecycleState;
2627
import org.apache.flink.kubernetes.operator.api.spec.FlinkVersion;
2728
import org.apache.flink.kubernetes.operator.api.spec.JobState;
2829
import org.apache.flink.kubernetes.operator.api.spec.UpgradeMode;
@@ -652,6 +653,14 @@ public void testCancelJobNotFound() throws Exception {
652653

653654
assertEquals(CANCELLING, sessionJob.getStatus().getJobStatus().getState());
654655
assertFalse(deleteControl.isRemoveFinalizer());
656+
assertEquals(
657+
ResourceLifecycleState.DELETING,
658+
testController
659+
.getStatusUpdateCounter()
660+
.currentResource
661+
.getStatus()
662+
.getLifecycleState());
663+
assertEquals(ResourceLifecycleState.DELETING, sessionJob.getStatus().getLifecycleState());
655664
assertEquals(
656665
configManager.getOperatorConfiguration().getProgressCheckInterval().toMillis(),
657666
deleteControl.getScheduleDelay().get());
@@ -660,6 +669,14 @@ public void testCancelJobNotFound() throws Exception {
660669
flinkService.setFlinkJobNotFound(true);
661670
deleteControl = testController.cleanup(sessionJob, context);
662671
assertTrue(deleteControl.isRemoveFinalizer());
672+
assertEquals(
673+
ResourceLifecycleState.DELETED,
674+
testController
675+
.getStatusUpdateCounter()
676+
.currentResource
677+
.getStatus()
678+
.getLifecycleState());
679+
assertEquals(ResourceLifecycleState.DELETED, sessionJob.getStatus().getLifecycleState());
663680
}
664681

665682
private void verifyReconcileInitialSuspendedDeployment(FlinkSessionJob sessionJob)

flink-kubernetes-operator/src/test/java/org/apache/flink/kubernetes/operator/controller/TestingFlinkDeploymentController.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ public class TestingFlinkDeploymentController
6969

7070
@Getter private ReconcilerFactory reconcilerFactory;
7171
private FlinkDeploymentController flinkDeploymentController;
72-
private StatusUpdateCounter statusUpdateCounter = new StatusUpdateCounter();
72+
@Getter private StatusUpdateCounter statusUpdateCounter = new StatusUpdateCounter();
7373
private FlinkResourceEventCollector flinkResourceEventCollector =
7474
new FlinkResourceEventCollector();
7575

@@ -174,11 +174,12 @@ public Queue<Event> flinkResourceEvents() {
174174
return flinkResourceEventCollector.events;
175175
}
176176

177-
private static class StatusUpdateCounter
177+
/** Test status consumer. */
178+
protected static class StatusUpdateCounter
178179
implements BiConsumer<FlinkDeployment, FlinkDeploymentStatus> {
179180

180-
private FlinkDeployment currentResource;
181-
private int counter;
181+
FlinkDeployment currentResource;
182+
int counter;
182183

183184
@Override
184185
public void accept(

flink-kubernetes-operator/src/test/java/org/apache/flink/kubernetes/operator/controller/TestingFlinkSessionJobController.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,11 @@ public class TestingFlinkSessionJobController
6565

6666
@Getter private CanaryResourceManager<FlinkSessionJob> canaryResourceManager;
6767
private FlinkSessionJobController flinkSessionJobController;
68+
69+
@Getter
6870
private TestingFlinkSessionJobController.StatusUpdateCounter statusUpdateCounter =
6971
new TestingFlinkSessionJobController.StatusUpdateCounter();
72+
7073
private FlinkResourceEventCollector flinkResourceEventCollector =
7174
new FlinkResourceEventCollector();
7275
private EventRecorder eventRecorder;
@@ -161,10 +164,11 @@ public Queue<Event> events() {
161164
return flinkResourceEventCollector.events;
162165
}
163166

164-
private static class StatusUpdateCounter
167+
/** Test status consumer. */
168+
protected static class StatusUpdateCounter
165169
implements BiConsumer<FlinkSessionJob, FlinkSessionJobStatus> {
166170

167-
private FlinkSessionJob currentResource;
171+
FlinkSessionJob currentResource;
168172
private int counter;
169173

170174
@Override

0 commit comments

Comments
 (0)