3030
3131import io .fabric8 .kubernetes .api .model .HasMetadata ;
3232import io .fabric8 .kubernetes .api .model .Pod ;
33- import lombok .AllArgsConstructor ;
3433import lombok .NoArgsConstructor ;
3534import lombok .extern .slf4j .Slf4j ;
3635import org .apache .commons .lang3 .StringUtils ;
4746import org .apache .spark .k8s .operator .status .ApplicationStatus ;
4847import org .apache .spark .k8s .operator .utils .ReconcilerUtils ;
4948import org .apache .spark .k8s .operator .utils .SparkAppStatusRecorder ;
49+ import org .apache .spark .k8s .operator .utils .SparkAppStatusUtils ;
5050
5151/**
5252 * Cleanup all secondary resources when application is deleted, or at the end of each attempt.
5353 * Update Application status to indicate whether another attempt would be made.
5454 */
55- @ AllArgsConstructor
5655@ NoArgsConstructor
5756@ Slf4j
5857public class AppCleanUpStep extends AppReconcileStep {
5958 private Supplier <ApplicationState > onDemandCleanUpReason ;
59+ private String stateUpdateMessage ;
60+
61+ public AppCleanUpStep (Supplier <ApplicationState > onDemandCleanUpReason ) {
62+ super ();
63+ this .onDemandCleanUpReason = onDemandCleanUpReason ;
64+ }
6065
6166 /**
6267 * Cleanup secondary resources for an application if needed and updates application status
@@ -88,42 +93,37 @@ public ReconcileProgress reconcile(
8893 ApplicationStatus currentStatus = application .getStatus ();
8994 ApplicationState currentState = currentStatus .getCurrentState ();
9095 ApplicationTolerations tolerations = application .getSpec ().getApplicationTolerations ();
91- if (ApplicationStateSummary .ResourceReleased .equals (currentState .getCurrentStateSummary ())) {
92- statusRecorder .removeCachedStatus (application );
93- return ReconcileProgress .completeAndNoRequeue ();
94- }
95- String stateMessage = null ;
96- if (isOnDemandCleanup ()) {
97- log .info ("Cleaning up application resources on demand" );
98- } else {
99- if (ApplicationStateSummary .TerminatedWithoutReleaseResources .equals (
100- currentState .getCurrentStateSummary ())) {
101- statusRecorder .removeCachedStatus (application );
102- return ReconcileProgress .completeAndNoRequeue ();
103- } else if (currentState .getCurrentStateSummary ().isStopping ()) {
104- if (retainReleaseResourceForPolicyAndState (
105- tolerations .getResourceRetainPolicy (), currentState )) {
106- if (tolerations .getRestartConfig () != null
107- && !RestartPolicy .Never .equals (tolerations .getRestartConfig ().getRestartPolicy ())) {
108- stateMessage =
109- "Application is configured to restart, resources created in current "
110- + "attempt would be force released." ;
111- log .warn (stateMessage );
112- } else {
113- ApplicationState terminationState =
114- new ApplicationState (
115- ApplicationStateSummary .TerminatedWithoutReleaseResources ,
116- "Application is terminated without releasing resources as configured." );
117- long requeueAfterMillis =
118- tolerations .getApplicationTimeoutConfig ().getTerminationRequeuePeriodMillis ();
119- return appendStateAndRequeueAfter (
120- context , statusRecorder , terminationState , Duration .ofMillis (requeueAfterMillis ));
121- }
96+ if (currentState .getCurrentStateSummary ().isTerminated ()) {
97+ Optional <ReconcileProgress > terminatedAppProgress =
98+ checkEarlyExitForTerminatedApp (application , statusRecorder );
99+ if (terminatedAppProgress .isPresent ()) {
100+ return terminatedAppProgress .get ();
101+ }
102+ } else if (isOnDemandCleanup ()) {
103+ log .info ("Releasing secondary resources for application on demand." );
104+ } else if (currentState .getCurrentStateSummary ().isStopping ()) {
105+ if (retainReleaseResourceForPolicyAndState (
106+ tolerations .getResourceRetainPolicy (), currentState )) {
107+ if (tolerations .getRestartConfig () != null
108+ && !RestartPolicy .Never .equals (tolerations .getRestartConfig ().getRestartPolicy ())) {
109+ stateUpdateMessage =
110+ "Application is configured to restart, resources created in current "
111+ + "attempt would be force released." ;
112+ log .warn (stateUpdateMessage );
113+ } else {
114+ ApplicationState terminationState =
115+ new ApplicationState (
116+ ApplicationStateSummary .TerminatedWithoutReleaseResources ,
117+ "Application is terminated without releasing resources as configured." );
118+ long requeueAfterMillis =
119+ tolerations .getApplicationTimeoutConfig ().getTerminationRequeuePeriodMillis ();
120+ return appendStateAndRequeueAfter (
121+ context , statusRecorder , terminationState , Duration .ofMillis (requeueAfterMillis ));
122122 }
123- } else {
124- log .debug ("Clean up is not expected for app, proceeding to next step." );
125- return ReconcileProgress .proceed ();
126123 }
124+ } else {
125+ log .debug ("Clean up is not expected for app, proceeding to next step." );
126+ return ReconcileProgress .proceed ();
127127 }
128128
129129 List <HasMetadata > resourcesToRemove = new ArrayList <>();
@@ -159,8 +159,8 @@ public ReconcileProgress reconcile(
159159 ApplicationStatus updatedStatus ;
160160 if (onDemandCleanUpReason != null ) {
161161 ApplicationState state = onDemandCleanUpReason .get ();
162- if (StringUtils .isNotEmpty (stateMessage )) {
163- state .setMessage (stateMessage );
162+ if (StringUtils .isNotEmpty (stateUpdateMessage )) {
163+ state .setMessage (stateUpdateMessage );
164164 }
165165 long requeueAfterMillis =
166166 tolerations .getApplicationTimeoutConfig ().getTerminationRequeuePeriodMillis ();
@@ -171,7 +171,7 @@ public ReconcileProgress reconcile(
171171 currentStatus .terminateOrRestart (
172172 tolerations .getRestartConfig (),
173173 tolerations .getResourceRetainPolicy (),
174- stateMessage ,
174+ stateUpdateMessage ,
175175 SparkOperatorConf .TRIM_ATTEMPT_STATE_TRANSITION_HISTORY .getValue ());
176176 long requeueAfterMillis =
177177 tolerations .getApplicationTimeoutConfig ().getTerminationRequeuePeriodMillis ();
@@ -184,6 +184,41 @@ public ReconcileProgress reconcile(
184184 }
185185 }
186186
187+ protected Optional <ReconcileProgress > checkEarlyExitForTerminatedApp (
188+ final SparkApplication application , final SparkAppStatusRecorder statusRecorder ) {
189+ ApplicationStatus currentStatus = application .getStatus ();
190+ ApplicationState currentState = currentStatus .getCurrentState ();
191+ ApplicationTolerations tolerations = application .getSpec ().getApplicationTolerations ();
192+ if (ApplicationStateSummary .ResourceReleased .equals (currentState .getCurrentStateSummary ())) {
193+ statusRecorder .removeCachedStatus (application );
194+ return Optional .of (ReconcileProgress .completeAndNoRequeue ());
195+ }
196+ if (isOnDemandCleanup ()) {
197+ return Optional .empty ();
198+ }
199+ if (ApplicationStateSummary .TerminatedWithoutReleaseResources .equals (
200+ currentState .getCurrentStateSummary ())) {
201+ if (tolerations .isRetainDurationEnabled ()) {
202+ Instant now = Instant .now ();
203+ if (tolerations .exceedRetainDurationAtInstant (currentState , now )) {
204+ onDemandCleanUpReason = SparkAppStatusUtils ::appExceededRetainDuration ;
205+ return Optional .empty ();
206+ } else {
207+ Duration nextCheckDuration =
208+ Duration .between (
209+ Instant .now (),
210+ Instant .parse (currentState .getLastTransitionTime ())
211+ .plusMillis (tolerations .getResourceRetainDurationMillis ()));
212+ return Optional .of (ReconcileProgress .completeAndRequeueAfter (nextCheckDuration ));
213+ }
214+ } else {
215+ statusRecorder .removeCachedStatus (application );
216+ return Optional .of (ReconcileProgress .completeAndNoRequeue ());
217+ }
218+ }
219+ return Optional .empty ();
220+ }
221+
187222 protected boolean isOnDemandCleanup () {
188223 return onDemandCleanUpReason != null ;
189224 }
0 commit comments