|
35 | 35 | import org.apache.cloudstack.framework.jobs.AsyncJobManager; |
36 | 36 | import org.apache.cloudstack.framework.jobs.dao.AsyncJobDao; |
37 | 37 | import org.apache.cloudstack.framework.jobs.impl.AsyncJobVO; |
| 38 | +import org.apache.cloudstack.jobs.JobInfo; |
38 | 39 | import org.apache.cloudstack.managed.context.ManagedContextTimerTask; |
39 | 40 | import org.springframework.stereotype.Component; |
40 | 41 |
|
|
47 | 48 | import com.cloud.storage.Snapshot; |
48 | 49 | import com.cloud.storage.SnapshotPolicyVO; |
49 | 50 | import com.cloud.storage.SnapshotScheduleVO; |
50 | | -import com.cloud.storage.SnapshotVO; |
51 | 51 | import com.cloud.storage.VolumeVO; |
52 | 52 | import com.cloud.storage.dao.SnapshotDao; |
53 | 53 | import com.cloud.storage.dao.SnapshotPolicyDao; |
|
64 | 64 | import com.cloud.utils.concurrency.TestClock; |
65 | 65 | import com.cloud.utils.db.DB; |
66 | 66 | import com.cloud.utils.db.GlobalLock; |
67 | | -import com.cloud.utils.db.SearchCriteria; |
68 | 67 | import com.cloud.utils.db.TransactionLegacy; |
69 | 68 | import com.cloud.vm.snapshot.VMSnapshotManager; |
70 | 69 | import com.cloud.vm.snapshot.VMSnapshotVO; |
@@ -144,7 +143,7 @@ public void poll(final Date currentTimestamp) { |
144 | 143 | try { |
145 | 144 | if (scanLock.lock(ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_COOPERATION)) { |
146 | 145 | try { |
147 | | - checkStatusOfCurrentlyExecutingSnapshots(); |
| 146 | + scheduleNextSnapshotJobsIfNecessary(); |
148 | 147 | } finally { |
149 | 148 | scanLock.unlock(); |
150 | 149 | } |
@@ -174,68 +173,37 @@ public void poll(final Date currentTimestamp) { |
174 | 173 | } |
175 | 174 | } |
176 | 175 |
|
177 | | - private void checkStatusOfCurrentlyExecutingSnapshots() { |
178 | | - final SearchCriteria<SnapshotScheduleVO> sc = _snapshotScheduleDao.createSearchCriteria(); |
179 | | - sc.addAnd("asyncJobId", SearchCriteria.Op.NNULL); |
180 | | - final List<SnapshotScheduleVO> snapshotSchedules = _snapshotScheduleDao.search(sc, null); |
181 | | - for (final SnapshotScheduleVO snapshotSchedule : snapshotSchedules) { |
182 | | - final Long asyncJobId = snapshotSchedule.getAsyncJobId(); |
183 | | - final AsyncJobVO asyncJob = _asyncJobDao.findByIdIncludingRemoved(asyncJobId); |
184 | | - switch (asyncJob.getStatus()) { |
185 | | - case SUCCEEDED: |
186 | | - // The snapshot has been successfully backed up. |
187 | | - // The snapshot state has also been cleaned up. |
188 | | - // We can schedule the next job for this snapshot. |
189 | | - // Remove the existing entry in the snapshot_schedule table. |
190 | | - scheduleNextSnapshotJob(snapshotSchedule); |
191 | | - break; |
192 | | - case FAILED: |
193 | | - // Check the snapshot status. |
194 | | - final Long snapshotId = snapshotSchedule.getSnapshotId(); |
195 | | - if (snapshotId == null) { |
196 | | - // createSnapshotAsync exited, successfully or unsuccessfully, |
197 | | - // even before creating a snapshot record |
198 | | - // No cleanup needs to be done. |
199 | | - // Schedule the next snapshot. |
200 | | - scheduleNextSnapshotJob(snapshotSchedule); |
201 | | - } else { |
202 | | - final SnapshotVO snapshot = _snapshotDao.findById(snapshotId); |
203 | | - if (snapshot == null || snapshot.getRemoved() != null) { |
204 | | - // This snapshot has been deleted successfully from the primary storage |
205 | | - // Again no cleanup needs to be done. |
206 | | - // Schedule the next snapshot. |
207 | | - // There's very little probability that the code reaches this point. |
208 | | - // The snapshotId is a foreign key for the snapshot_schedule table |
209 | | - // set to ON DELETE CASCADE. So if the snapshot entry is deleted, the snapshot_schedule entry will be too. |
210 | | - // But what if it has only been marked as removed? |
211 | | - scheduleNextSnapshotJob(snapshotSchedule); |
212 | | - } else { |
213 | | - // The management server executing this snapshot job appears to have crashed |
214 | | - // while creating the snapshot on primary storage/or backing it up. |
215 | | - // We have no idea whether the snapshot was successfully taken on the primary or not. |
216 | | - // Schedule the next snapshot job. |
217 | | - // The ValidatePreviousSnapshotCommand will take appropriate action on this snapshot |
218 | | - // If the snapshot was taken successfully on primary, it will retry backing it up. |
219 | | - // and cleanup the previous snapshot |
220 | | - // Set the userId to that of system. |
221 | | - //_snapshotManager.validateSnapshot(1L, snapshot); |
222 | | - // In all cases, schedule the next snapshot job |
223 | | - scheduleNextSnapshotJob(snapshotSchedule); |
224 | | - } |
225 | | - } |
| 176 | + private void scheduleNextSnapshotJobsIfNecessary() { |
| 177 | + List<SnapshotScheduleVO> snapshotSchedules = _snapshotScheduleDao.getSchedulesAssignedWithAsyncJob(); |
| 178 | + logger.info("Verifying the current state of [{}] snapshot schedules and scheduling next jobs, if necessary.", snapshotSchedules.size()); |
| 179 | + for (SnapshotScheduleVO snapshotSchedule : snapshotSchedules) { |
| 180 | + scheduleNextSnapshotJobIfNecessary(snapshotSchedule); |
| 181 | + } |
| 182 | + } |
226 | 183 |
|
227 | | - break; |
228 | | - case IN_PROGRESS: |
229 | | - // There is no way of knowing from here whether |
230 | | - // 1) Another management server is processing this snapshot job |
231 | | - // 2) The management server has crashed and this snapshot is lying |
232 | | - // around in an inconsistent state. |
233 | | - // Hopefully, this can be resolved at the backend when the current snapshot gets executed. |
234 | | - // But if it remains in this state, the current snapshot will not get executed. |
235 | | - // And it will remain in stasis. |
236 | | - break; |
237 | | - } |
| 184 | + protected void scheduleNextSnapshotJobIfNecessary(SnapshotScheduleVO snapshotSchedule) { |
| 185 | + Long asyncJobId = snapshotSchedule.getAsyncJobId(); |
| 186 | + AsyncJobVO asyncJob = _asyncJobDao.findByIdIncludingRemoved(asyncJobId); |
| 187 | + |
| 188 | + if (asyncJob == null) { |
| 189 | + logger.debug("The async job [{}] of snapshot schedule [{}] does not exist anymore. Considering it as finished and scheduling the next snapshot job.", |
| 190 | + asyncJobId, snapshotSchedule); |
| 191 | + scheduleNextSnapshotJob(snapshotSchedule); |
| 192 | + return; |
238 | 193 | } |
| 194 | + |
| 195 | + JobInfo.Status status = asyncJob.getStatus(); |
| 196 | + |
| 197 | + if (JobInfo.Status.SUCCEEDED.equals(status)) { |
| 198 | + logger.debug("Last job of schedule [{}] succeeded; scheduling the next snapshot job.", snapshotSchedule); |
| 199 | + } else if (JobInfo.Status.FAILED.equals(status)) { |
| 200 | + logger.debug("Last job of schedule [{}] failed with [{}]; scheduling a new snapshot job.", snapshotSchedule, asyncJob.getResult()); |
| 201 | + } else { |
| 202 | + logger.debug("Schedule [{}] is still in progress, skipping next job scheduling.", snapshotSchedule); |
| 203 | + return; |
| 204 | + } |
| 205 | + |
| 206 | + scheduleNextSnapshotJob(snapshotSchedule); |
239 | 207 | } |
240 | 208 |
|
241 | 209 | @DB |
|
0 commit comments