|
32 | 32 | import org.slf4j.Logger;
|
33 | 33 | import org.slf4j.LoggerFactory;
|
34 | 34 |
|
| 35 | +import static org.apache.flink.configuration.WebOptions.CHECKPOINTS_HISTORY_SIZE; |
35 | 36 | import static org.apache.flink.kubernetes.operator.api.status.FlinkStateSnapshotStatus.State.IN_PROGRESS;
|
36 | 37 |
|
37 | 38 | /** The observer of {@link org.apache.flink.kubernetes.operator.api.FlinkStateSnapshot}. */
|
@@ -142,12 +143,29 @@ private void handleCheckpoint(
|
142 | 143 |
|
143 | 144 | if (checkpointStatsResult.isPending()) {
|
144 | 145 | return;
|
145 |
| - } else if (checkpointStatsResult.getError() != null) { |
146 |
| - throw new ReconciliationException(checkpointStatsResult.getError()); |
147 | 146 | }
|
148 | 147 |
|
149 |
| - LOG.info("Checkpoint {} successful: {}", resourceName, checkpointStatsResult.getPath()); |
150 |
| - FlinkStateSnapshotUtils.snapshotSuccessful( |
151 |
| - resource, checkpointStatsResult.getPath(), false); |
| 148 | + String path = checkpointStatsResult.getPath(); |
| 149 | + // At this point the checkpoint is already assumed to be complete, so we can mark the |
| 150 | + // snapshot complete with empty path and trigger an event. |
| 151 | + if (checkpointStatsResult.getError() != null) { |
| 152 | + path = ""; |
| 153 | + var error = |
| 154 | + String.format( |
| 155 | + "Checkpoint %s was successful, but failed to fetch path. Flink webserver stores only a limited amount of checkpoints in its cache, try increasing '%s' config for this job.\n%s", |
| 156 | + resourceName, |
| 157 | + CHECKPOINTS_HISTORY_SIZE.key(), |
| 158 | + checkpointStatsResult.getError()); |
| 159 | + eventRecorder.triggerSnapshotEvent( |
| 160 | + resource, |
| 161 | + EventRecorder.Type.Warning, |
| 162 | + EventRecorder.Reason.CheckpointError, |
| 163 | + EventRecorder.Component.Snapshot, |
| 164 | + error, |
| 165 | + ctx.getKubernetesClient()); |
| 166 | + } |
| 167 | + |
| 168 | + LOG.info("Checkpoint {} successful: {}", resourceName, path); |
| 169 | + FlinkStateSnapshotUtils.snapshotSuccessful(resource, path, false); |
152 | 170 | }
|
153 | 171 | }
|
0 commit comments