Skip to content

Commit bdb8819

Browse files
joschiGitHub CopilotClaude Haiku 4.5
committed
validate: read from all.json and use explicit checksum file fields
Simplify validation to read from single all.json file instead of iterating individual metadata files: - Read docs/metadata/all.json containing list of metadata objects - Validate 'url' field for each metadata entry - Use 'md5_file', 'sha1_file', 'sha256_file', 'sha512_file' fields to identify checksum files - Delete checksum files using explicit filenames from metadata + vendor directory - No longer reads individual metadata JSON files in hierarchy Benefits: - Single source of truth for validation - More efficient (no directory walk required) - Explicit checksum file references eliminate guessing - Cleaner implementation All tests pass. No breaking changes. Co-authored-by: GitHub Copilot <[email protected]> Co-authored-by: Claude Haiku 4.5 <[email protected]>
1 parent e248469 commit bdb8819

File tree

1 file changed

+53
-65
lines changed

1 file changed

+53
-65
lines changed

cmd/java-metadata/main.go

Lines changed: 53 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -453,31 +453,25 @@ func runValidate(parentCtx context.Context, metadataDir string, concurrency int,
453453
logger.Warn(parentCtx, "delete mode enabled: files with failed URLs will be deleted")
454454
}
455455

456-
// Find all metadata JSON files
457-
var metadataFiles []string
458-
err := filepath.Walk(metadataDir, func(path string, info os.FileInfo, err error) error {
459-
if err != nil {
460-
return err
461-
}
462-
// Respect cancellation during directory walk
463-
if cerr := parentCtx.Err(); cerr != nil {
464-
return cerr
465-
}
466-
if !info.IsDir() && filepath.Ext(path) == ".json" && filepath.Base(path) != "all.json" {
467-
metadataFiles = append(metadataFiles, path)
468-
}
469-
return nil
470-
})
456+
// Read all.json file
457+
allJsonPath := filepath.Join(metadataDir, "all.json")
458+
data, err := os.ReadFile(allJsonPath)
471459
if err != nil {
472-
return fmt.Errorf("failed to walk metadata directory: %w", err)
460+
return fmt.Errorf("failed to read all.json: %w", err)
473461
}
474462

475-
if len(metadataFiles) == 0 {
476-
logger.Info(parentCtx, "no metadata files found")
463+
// Parse metadata list
464+
var allMetadata []models.Metadata
465+
if err := json.Unmarshal(data, &allMetadata); err != nil {
466+
return fmt.Errorf("failed to parse all.json: %w", err)
467+
}
468+
469+
if len(allMetadata) == 0 {
470+
logger.Info(parentCtx, "no metadata entries found in all.json")
477471
return nil
478472
}
479473

480-
logger.Info(parentCtx, "found metadata files to validate", "count", len(metadataFiles))
474+
logger.Info(parentCtx, "found metadata entries to validate", "count", len(allMetadata))
481475

482476
// Create downloader for URL checking
483477
dl := downloader.NewDownloader(downloader.WithProgress(false))
@@ -486,12 +480,12 @@ func runValidate(parentCtx context.Context, metadataDir string, concurrency int,
486480
var wg sync.WaitGroup
487481
var checked, failed int64
488482
semaphore := make(chan struct{}, concurrency)
489-
failedFilesChan := make(chan string, len(metadataFiles))
483+
failedFilesChan := make(chan models.Metadata, len(allMetadata))
490484

491485
startTime := time.Now()
492-
for _, file := range metadataFiles {
486+
for _, metadata := range allMetadata {
493487
wg.Add(1)
494-
go func(metadataFile string) {
488+
go func(m models.Metadata) {
495489
defer wg.Done()
496490

497491
// Acquire semaphore
@@ -503,50 +497,30 @@ func runValidate(parentCtx context.Context, metadataDir string, concurrency int,
503497
return
504498
}
505499

506-
// Read metadata file
507-
data, err := os.ReadFile(metadataFile)
508-
if err != nil {
509-
logger.Error(context.Background(), "failed to read metadata file", "file", metadataFile, "error", err)
510-
atomic.AddInt64(&failed, 1)
511-
return
512-
}
513-
514-
// Parse metadata
515-
var metadata models.Metadata
516-
if err := json.Unmarshal(data, &metadata); err != nil {
517-
logger.Error(context.Background(), "failed to parse metadata file", "file", metadataFile, "error", err)
518-
atomic.AddInt64(&failed, 1)
519-
return
520-
}
521-
522500
// Check URL
523-
if err := parentCtx.Err(); err != nil {
524-
return
525-
}
526-
527-
if err := dl.CheckURLExists(parentCtx, metadata.URL); err != nil {
528-
logger.Debug(context.Background(), "URL not accessible", "file", metadataFile, "url", metadata.URL, "error", err)
529-
failedFilesChan <- metadataFile
501+
if err := dl.CheckURLExists(parentCtx, m.URL); err != nil {
502+
logger.Info(context.Background(), "URL not accessible", "filename", m.Filename, "url", m.URL, "error", err)
503+
failedFilesChan <- m
530504
atomic.AddInt64(&failed, 1)
531505
}
532506

533507
atomic.AddInt64(&checked, 1)
534508

535509
// Progress indicator
536510
if c := atomic.LoadInt64(&checked); c%100 == 0 {
537-
logger.Info(context.Background(), "validation progress", "checked", c, "total", len(metadataFiles))
511+
logger.Info(context.Background(), "validation progress", "checked", c, "total", len(allMetadata))
538512
}
539-
}(file)
513+
}(metadata)
540514
}
541515

542516
// Wait for all validations to complete
543517
wg.Wait()
544518
close(failedFilesChan)
545519

546-
// Collect failed files
547-
var failedFiles []string
548-
for file := range failedFilesChan {
549-
failedFiles = append(failedFiles, file)
520+
// Collect failed entries
521+
var failedEntries []models.Metadata
522+
for m := range failedFilesChan {
523+
failedEntries = append(failedEntries, m)
550524
}
551525

552526
duration := time.Since(startTime)
@@ -559,28 +533,42 @@ func runValidate(parentCtx context.Context, metadataDir string, concurrency int,
559533
"duration", duration,
560534
)
561535

562-
if len(failedFiles) > 0 {
563-
logger.Warn(parentCtx, "found inaccessible URLs", "count", len(failedFiles))
564-
for _, file := range failedFiles {
565-
logger.Warn(parentCtx, "inaccessible file", "path", file)
536+
if len(failedEntries) > 0 {
537+
logger.Warn(parentCtx, "found inaccessible URLs", "count", len(failedEntries))
538+
for _, m := range failedEntries {
539+
logger.Warn(parentCtx, "inaccessible file", "filename", m.Filename, "vendor", m.Vendor)
566540
}
567541

568542
// Delete files if requested
569543
if deleteOnFailure {
570-
logger.Info(parentCtx, "deleting failed files", "count", len(failedFiles))
571-
var deletedCount, deleteFailedCount int
572-
for _, file := range failedFiles {
573-
if err := os.Remove(file); err != nil {
574-
logger.Error(parentCtx, "failed to delete file", "file", file, "error", err)
575-
deleteFailedCount++
576-
} else {
577-
deletedCount++
544+
logger.Info(parentCtx, "deleting failed entries", "count", len(failedEntries))
545+
checksumBase := filepath.Join(filepath.Dir(metadataDir), "checksums")
546+
var deletedChecksumCount, deleteFailedCount int
547+
548+
for _, m := range failedEntries {
549+
// Delete checksum files using the explicit file paths from metadata
550+
checksumFiles := []string{m.MD5File, m.SHA1File, m.SHA256File, m.SHA512File}
551+
for _, checksumFile := range checksumFiles {
552+
if checksumFile == "" {
553+
continue
554+
}
555+
checksumPath := filepath.Join(checksumBase, m.Vendor, checksumFile)
556+
if err := os.Remove(checksumPath); err != nil {
557+
// Only log if file exists and deletion failed; ignore missing files
558+
if _, statErr := os.Stat(checksumPath); statErr == nil {
559+
logger.Error(parentCtx, "failed to delete checksum file", "file", checksumPath, "error", err)
560+
deleteFailedCount++
561+
}
562+
} else {
563+
deletedChecksumCount++
564+
}
578565
}
579566
}
580-
logger.Info(parentCtx, "deletion complete", "deleted", deletedCount, "failed", deleteFailedCount)
567+
568+
logger.Info(parentCtx, "deletion complete", "checksumsDeleted", deletedChecksumCount, "failed", deleteFailedCount)
581569
}
582570

583-
return fmt.Errorf("%d URLs are not accessible", len(failedFiles))
571+
return fmt.Errorf("%d URLs are not accessible", len(failedEntries))
584572
}
585573

586574
logger.Info(parentCtx, "all URLs are accessible")

0 commit comments

Comments
 (0)