@@ -101,8 +101,7 @@ public UpdateControl<FlinkBlueGreenDeployment> reconcile(
101101
102102 if (deploymentStatus == null ) {
103103 deploymentStatus = new FlinkBlueGreenDeploymentStatus ();
104- deploymentStatus .setLastReconciledSpec (
105- SpecUtils .serializeObject (flinkBlueGreenDeployment .getSpec (), "spec" ));
104+ setLastReconciledSpec (flinkBlueGreenDeployment , deploymentStatus );
106105 return initiateDeployment (
107106 flinkBlueGreenDeployment ,
108107 deploymentStatus ,
@@ -115,12 +114,6 @@ public UpdateControl<FlinkBlueGreenDeployment> reconcile(
115114 FlinkBlueGreenDeployments deployments =
116115 FlinkBlueGreenDeployments .fromSecondaryResources (josdkContext );
117116
118- // TODO: if a new deployment request comes while in the middle of a transition it's
119- // currently ignored, but the new spec remains changed, should we roll it back?
120- // TODO: if we choose to leave a previously failed deployment 'running' for debug
121- // purposes,
122- // we should flag it somehow as 'ROLLED_BACK' to signal that it can be overriden by a
123- // new deployment attempt.
124117 switch (deploymentStatus .getBlueGreenState ()) {
125118 case ACTIVE_BLUE :
126119 return checkAndInitiateDeployment (
@@ -161,7 +154,21 @@ private UpdateControl<FlinkBlueGreenDeployment> monitorTransition(
161154 FlinkBlueGreenDeployments deployments ,
162155 FlinkBlueGreenDeploymentStatus deploymentStatus ,
163156 DeploymentType currentDeploymentType ,
164- Context <FlinkBlueGreenDeployment > josdkContext ) {
157+ Context <FlinkBlueGreenDeployment > josdkContext )
158+ throws JsonProcessingException {
159+
160+ if (hasSpecChanged (bgDeployment .getSpec (), deploymentStatus , currentDeploymentType )) {
161+ // this means the spec was changed during transition,
162+ // ignore the new change, revert the spec and log as warning
163+ bgDeployment .setSpec (
164+ SpecUtils .deserializeObject (
165+ deploymentStatus .getLastReconciledSpec (),
166+ "spec" ,
167+ FlinkBlueGreenDeploymentSpec .class ));
168+ josdkContext .getClient ().resource (bgDeployment ).replace ();
169+ LOG .warn (
170+ "Blue/Green Spec change detected during transition, ignored and reverted to the last reconciled spec" );
171+ }
165172
166173 var nextState = FlinkBlueGreenDeploymentState .ACTIVE_BLUE ;
167174 FlinkDeployment currentDeployment ;
@@ -185,18 +192,8 @@ private UpdateControl<FlinkBlueGreenDeployment> monitorTransition(
185192
186193 if (isDeploymentReady (nextDeployment , josdkContext , deploymentStatus )) {
187194 return deleteAndFinalize (
188- bgDeployment ,
189- deploymentStatus ,
190- currentDeploymentType ,
191- josdkContext ,
192- currentDeployment ,
193- nextState );
195+ bgDeployment , deploymentStatus , josdkContext , currentDeployment , nextState );
194196 } else {
195- // This phase requires rescheduling the reconciliation because the pod initialization
196- // could get stuck
197- // (e.g. waiting for resources)
198- // TODO: figure out the course of action for error/failure cases
199-
200197 int maxNumRetries = bgDeployment .getSpec ().getTemplate ().getMaxNumRetries ();
201198 if (maxNumRetries <= 0 ) {
202199 maxNumRetries = DEFAULT_MAX_NUM_RETRIES ;
@@ -242,7 +239,6 @@ private static int getReconciliationReschedInterval(FlinkBlueGreenDeployment bgD
242239 private UpdateControl <FlinkBlueGreenDeployment > deleteAndFinalize (
243240 FlinkBlueGreenDeployment bgDeployment ,
244241 FlinkBlueGreenDeploymentStatus deploymentStatus ,
245- DeploymentType currentDeploymentType ,
246242 Context <FlinkBlueGreenDeployment > josdkContext ,
247243 FlinkDeployment currentDeployment ,
248244 FlinkBlueGreenDeploymentState nextState ) {
@@ -251,11 +247,6 @@ private UpdateControl<FlinkBlueGreenDeployment> deleteAndFinalize(
251247 deleteDeployment (currentDeployment , josdkContext );
252248 return UpdateControl .noUpdate ();
253249 } else {
254- deploymentStatus .setLastReconciledSpec (
255- SpecUtils .serializeObject (bgDeployment .getSpec (), "spec" ));
256-
257- // TODO: Set the new child job STATUS to RUNNING too
258-
259250 return patchStatusUpdateControl (
260251 bgDeployment , deploymentStatus , nextState , JobStatus .RUNNING , false );
261252 }
@@ -272,6 +263,9 @@ private UpdateControl<FlinkBlueGreenDeployment> checkAndInitiateDeployment(
272263 if (hasSpecChanged (
273264 flinkBlueGreenDeployment .getSpec (), deploymentStatus , currentDeploymentType )) {
274265
266+ // Ack the change in the spec (setLastReconciledSpec)
267+ setLastReconciledSpec (flinkBlueGreenDeployment , deploymentStatus );
268+
275269 FlinkDeployment currentFlinkDeployment =
276270 DeploymentType .BLUE == currentDeploymentType
277271 ? deployments .getFlinkDeploymentBlue ()
@@ -286,10 +280,6 @@ private UpdateControl<FlinkBlueGreenDeployment> checkAndInitiateDeployment(
286280 FlinkResourceContext <FlinkDeployment > resourceContext =
287281 ctxFactory .getResourceContext (currentFlinkDeployment , josdkContext );
288282
289- // TODO: this operation is already done by hasSpecChanged() above, dedup later
290- String serializedSpec =
291- SpecUtils .serializeObject (flinkBlueGreenDeployment .getSpec (), "spec" );
292-
293283 // Updating status
294284 if (DeploymentType .BLUE == currentDeploymentType ) {
295285 nextState = FlinkBlueGreenDeploymentState .TRANSITIONING_TO_GREEN ;
@@ -323,6 +313,14 @@ private UpdateControl<FlinkBlueGreenDeployment> checkAndInitiateDeployment(
323313 return UpdateControl .noUpdate ();
324314 }
325315
316+ private static void setLastReconciledSpec (
317+ FlinkBlueGreenDeployment flinkBlueGreenDeployment ,
318+ FlinkBlueGreenDeploymentStatus deploymentStatus ) {
319+ deploymentStatus .setLastReconciledSpec (
320+ SpecUtils .serializeObject (flinkBlueGreenDeployment .getSpec (), "spec" ));
321+ deploymentStatus .setLastReconciledTimestamp (System .currentTimeMillis ());
322+ }
323+
326324 public void logPotentialWarnings (
327325 FlinkDeployment flinkDeployment ,
328326 Context <FlinkBlueGreenDeployment > josdkContext ,
@@ -345,7 +343,7 @@ public void logPotentialWarnings(
345343 LOG .warn ("Deployment not healthy, some Pods have the following status: " + podPhases );
346344 }
347345
348- List <Event > badEvents =
346+ List <Event > abnormalEvents =
349347 josdkContext
350348 .getClient ()
351349 .v1 ()
@@ -373,8 +371,8 @@ public void logPotentialWarnings(
373371 .contains (p )))
374372 .collect (Collectors .toList ());
375373
376- if (!badEvents .isEmpty ()) {
377- LOG .warn ("Bad events detected: " + badEvents );
374+ if (!abnormalEvents .isEmpty ()) {
375+ LOG .warn ("Abnormal events detected: " + abnormalEvents );
378376 }
379377 }
380378
@@ -393,9 +391,7 @@ private static Savepoint configureSavepoint(
393391 .getJobId ()),
394392 resourceContext .getObserveConfig ());
395393
396- // TODO 1: check the last CP age with the logic from
397- // AbstractJobReconciler.changeLastStateIfCheckpointTooOld
398- // TODO 2: if no checkpoint is available, take a savepoint? throw error?
394+ // TODO: alternative action if no checkpoint is available?
399395 if (lastCheckpoint .isEmpty ()) {
400396 throw new IllegalStateException (
401397 "Last Checkpoint for Job "
@@ -422,8 +418,6 @@ private UpdateControl<FlinkBlueGreenDeployment> initiateDeployment(
422418 josdkContext ,
423419 isFirstDeployment );
424420
425- // TODO: set child job status to JobStatus.INITIALIZING
426-
427421 return patchStatusUpdateControl (
428422 flinkBlueGreenDeployment ,
429423 deploymentStatus ,
@@ -439,8 +433,7 @@ private boolean isDeploymentReady(
439433 FlinkBlueGreenDeploymentStatus deploymentStatus ) {
440434 if (ResourceLifecycleState .STABLE == deployment .getStatus ().getLifecycleState ()
441435 && JobStatus .RUNNING == deployment .getStatus ().getJobStatus ().getState ()) {
442- // TODO: verify, e.g. will pods be "pending" after the FlinkDeployment is RUNNING and
443- // STABLE?
436+ // TODO: checking for running pods seems to be redundant, check if this can be removed
444437 int notRunningPods =
445438 (int )
446439 getDeploymentPods (josdkContext , deployment )
@@ -501,7 +494,6 @@ private UpdateControl<FlinkBlueGreenDeployment> patchStatusUpdateControl(
501494 deploymentStatus .getJobStatus ().setState (jobState );
502495 }
503496
504- deploymentStatus .setLastReconciledTimestamp (System .currentTimeMillis ());
505497 flinkBlueGreenDeployment .setStatus (deploymentStatus );
506498 return UpdateControl .patchStatus (flinkBlueGreenDeployment );
507499 }
@@ -553,9 +545,6 @@ private void deploy(
553545
554546 private static void deleteDeployment (
555547 FlinkDeployment currentDeployment , Context <FlinkBlueGreenDeployment > josdkContext ) {
556- // TODO: This gets called multiple times, check to see if it's already in a TERMINATING
557- // state
558- // (or only execute if RUNNING)
559548 List <StatusDetails > deletedStatus =
560549 josdkContext
561550 .getClient ()
0 commit comments