Skip to content

Commit 64720cc

Browse files
netomijanbro
andauthored
Let DB errors propagate. Handle enqueue failures gracefully. Add explicit failure modes to watchdog (#1606)
Co-authored-by: Alejandro Munoz <[email protected]>
1 parent e622538 commit 64720cc

File tree

2 files changed

+54
-63
lines changed

2 files changed

+54
-63
lines changed

server/src/main/java/org/eclipse/openvsx/scanning/ExtensionScanJobRecoveryService.java

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -263,16 +263,25 @@ private RecoveryResult recoverValidatingScan(ExtensionScan scan) {
263263
return RecoveryResult.ERRORED;
264264
}
265265

266-
boolean scannersSubmitted = scanService.submitScannerJobs(scan, extVersion);
267-
if (!scannersSubmitted) {
268-
logger.info("Scan {} ({}.{}.{}) has no scanners, activating extension",
266+
try {
267+
boolean submitted = scanService.submitScannerJobs(scan, extVersion);
268+
if (!submitted) {
269+
// No scanners configured — safe to activate
270+
logger.info("Scan {} ({}.{}.{}) has no scanners, activating extension",
271+
scan.getId(), scan.getNamespaceName(), scan.getExtensionName(),
272+
scan.getExtensionVersion());
273+
publishService.activateExtension(extVersion, extensionService);
274+
scanService.markScanPassed(scan);
275+
}
276+
return RecoveryResult.RECOVERED;
277+
} catch (Exception e) {
278+
// Enqueue attempts failed — do NOT activate. Mark as errored.
279+
logger.error("Scan {} ({}.{}.{}) failed to submit scanner jobs during recovery",
269280
scan.getId(), scan.getNamespaceName(), scan.getExtensionName(),
270-
scan.getExtensionVersion());
271-
publishService.activateExtension(extVersion, extensionService);
272-
scanService.markScanPassed(scan);
281+
scan.getExtensionVersion(), e);
282+
persistenceService.markAsErrored(scan, "Recovery failed to submit scanner jobs: " + e.getMessage());
283+
return RecoveryResult.ERRORED;
273284
}
274-
275-
return RecoveryResult.RECOVERED;
276285
}
277286

278287
private ExtensionVersion findExtensionVersion(ExtensionScan scan) {
@@ -410,6 +419,7 @@ private void recoverStuckQueuedJobs() {
410419

411420
if (job.getCreatedAt().isBefore(oneHourAgo)) {
412421
markFailed(job, "Stuck in QUEUED for over 1 hour");
422+
completionService.checkCompletionSafely(job.getScanId());
413423
} else {
414424
try {
415425
job.setRecoveryInProgress(true);
@@ -423,6 +433,7 @@ private void recoverStuckQueuedJobs() {
423433
logger.error("Failed to re-enqueue job {}", job.getScanId());
424434
job.setRecoveryInProgress(false);
425435
markFailed(job, "Recovery failed: " + e.getMessage());
436+
completionService.checkCompletionSafely(job.getScanId());
426437
}
427438
}
428439
}
@@ -447,6 +458,7 @@ private void checkTimeouts() {
447458

448459
if (ageMinutes >= timeoutMinutes) {
449460
markFailed(job, String.format("Timeout: exceeded %d min limit (age: %d min)", timeoutMinutes, ageMinutes));
461+
completionService.checkCompletionSafely(job.getScanId());
450462
timedOut++;
451463
logger.error("Job {} timed out: scanner={}, age={}min", job.getId(), job.getScannerType(), ageMinutes);
452464
}

server/src/main/java/org/eclipse/openvsx/scanning/ExtensionScanService.java

Lines changed: 34 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131

3232
/**
3333
* Service for managing extension scans.
34-
*
34+
* <p>
3535
* Owns scan lifecycle (STARTED → VALIDATING → SCANNING → PASSED/QUARANTINED),
3636
* runs validation checks, and submits scanner jobs via JobRunr.
3737
*/
@@ -181,23 +181,10 @@ public void runValidation(
181181

182182
/**
183183
* Submit long-running scanner jobs for an extension version.
184-
*
185-
* This method:
186-
* 1. Gets all registered scanners from the registry
187-
* 2. Creates ScanJob records in QUEUED status
188-
* 3. Enqueues each scanner invocation to JobRunr for parallel execution
189-
* 4. Transitions the ExtensionScan to SCANNING status
190-
* 5. Returns the scan ID for tracking
191-
*
192-
* The scan ID is the ExtensionScan.id which links
193-
* the high-level scan record with individual ScanJob records.
194-
*
195-
* IMPORTANT: ScanJob records are created BEFORE JobRunr jobs are enqueued.
196-
* This avoids a race condition where AsyncScanCompletionService checks for
197-
* scan jobs before the JobRunr handler has created them.
198-
*
199-
* JobRunr handles parallel execution, automatic retry, and persistence.
200-
* AsyncScanCompletionService will activate extensions when all scans complete.
184+
* <p>
185+
* Flow: transition scan to SCANNING, then for each scanner create a ScanJob (QUEUED)
186+
* and try to enqueue a JobRunr request. On enqueue failure we only log since the watchdog
187+
* will re-enqueue them for retry.
201188
*/
202189
public boolean submitScannerJobs(@Nonnull ExtensionScan scan, @Nonnull ExtensionVersion extVersion) {
203190
if (!config.isEnabled()) {
@@ -225,54 +212,46 @@ public boolean submitScannerJobs(@Nonnull ExtensionScan scan, @Nonnull Extension
225212
// Transition to SCANNING status before submitting jobs
226213
transitionTo(scan, ScanStatus.SCANNING);
227214

228-
// Create ScanJob records and enqueue JobRunr jobs
229215
int enqueuedCount = 0;
230-
for (Scanner scannerDef : scanners) {
216+
217+
for (Scanner scanner : scanners) {
218+
String scannerType = scanner.getScannerType();
219+
220+
ScannerJob job = new ScannerJob();
221+
job.setScanId(scanId);
222+
job.setScannerType(scannerType);
223+
job.setExtensionVersionId(extensionVersionId);
224+
job.setStatus(ScannerJob.JobStatus.QUEUED);
225+
job.setCreatedAt(LocalDateTime.now());
226+
job.setUpdatedAt(LocalDateTime.now());
227+
job.setPollLeaseUntil(null);
228+
job.setPollAttempts(0);
229+
job.setRecoveryInProgress(false);
230+
scanJobRepository.save(job);
231+
232+
logger.debug("Created ScanJob record: {} for {} (scanId={})",
233+
scannerType, NamingUtil.toLogFormat(extVersion), scanId);
234+
231235
try {
232-
String scannerType = scannerDef.getScannerType();
233-
234-
// Create ScanJob record FIRST (before enqueuing to JobRunr)
235-
// This ensures AsyncScanCompletionService can find the job records
236-
// even if it runs before the JobRunr handler executes
237-
ScannerJob job = new ScannerJob();
238-
job.setScanId(scanId);
239-
job.setScannerType(scannerType);
240-
job.setExtensionVersionId(extensionVersionId);
241-
job.setStatus(ScannerJob.JobStatus.QUEUED);
242-
job.setCreatedAt(LocalDateTime.now());
243-
job.setUpdatedAt(LocalDateTime.now());
244-
job.setPollLeaseUntil(null);
245-
job.setPollAttempts(0);
246-
job.setRecoveryInProgress(false);
247-
scanJobRepository.save(job);
248-
249-
logger.debug("Created ScanJob record: {} for {} (scanId={})",
250-
scannerType, NamingUtil.toLogFormat(extVersion), scanId);
251-
252-
// Now enqueue to JobRunr - the handler will find the existing ScanJob
253236
ScannerInvocationRequest jobRequest = new ScannerInvocationRequest(
254-
scannerType,
255-
extensionVersionId,
256-
scanId
257-
);
258-
237+
scannerType, extensionVersionId, scanId);
238+
259239
jobScheduler.enqueue(jobRequest);
260240
enqueuedCount++;
261-
262-
logger.debug("Enqueued scanner job: {} for {} (scanId={})",
241+
242+
logger.debug("Enqueued scanner job: {} for {} (scanId={})",
263243
scannerType, NamingUtil.toLogFormat(extVersion), scanId);
264-
244+
265245
} catch (Exception e) {
266-
logger.error("Failed to enqueue scanner {} for scanId={}",
267-
scannerDef.getScannerType(), scanId, e);
268-
// Continue with other scanners even if one fails to enqueue
246+
logger.error("Failed to enqueue scanner {} for scanId={}: {}. ",
247+
scannerType, scanId, e.getMessage(), e);
269248
}
270249
}
271-
250+
272251
logger.debug("Enqueued {} of {} scanner jobs for: {} (scanId={})",
273252
enqueuedCount, scanners.size(), NamingUtil.toLogFormat(extVersion), scanId);
274-
275-
return enqueuedCount > 0;
253+
254+
return true;
276255
}
277256

278257
/**

0 commit comments

Comments
 (0)