Skip to content

Commit 5c24db5

Browse files
authored
feat: add --skip-existing flag to seed script (#43)
Connects to the database to check which packages have already been scanned and filters them out before pushing to the queue, avoiding redundant scan jobs when re-running the seeder.
1 parent 8064a2d commit 5c24db5

File tree

1 file changed

+29
-1
lines changed

1 file changed

+29
-1
lines changed

crates/seed/src/main.rs

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,17 @@ struct Args {
5353
#[arg(long)]
5454
dry_run: bool,
5555

56+
/// Skip packages that already exist in the database
57+
#[arg(long)]
58+
skip_existing: bool,
59+
5660
/// Redis URL (can also use REDIS_URL env var)
5761
#[arg(long, env = "REDIS_URL")]
5862
redis_url: String,
63+
64+
/// Database URL (can also use DATABASE_URL env var). Required when --skip-existing is set.
65+
#[arg(long, env = "DATABASE_URL")]
66+
database_url: Option<String>,
5967
}
6068

6169
/// OSV vulnerability response
@@ -352,13 +360,33 @@ async fn main() -> Result<()> {
352360
}
353361

354362
// Deduplicate and report
355-
let packages: Vec<String> = packages.into_iter().collect();
363+
let mut packages: Vec<String> = packages.into_iter().collect();
356364
println!(
357365
"\n📊 Total unique {} packages to seed: {}",
358366
registry_name,
359367
packages.len()
360368
);
361369

370+
// Filter out packages already in the database
371+
if args.skip_existing {
372+
let db_url = args
373+
.database_url
374+
.as_deref()
375+
.ok_or_else(|| anyhow::anyhow!("--skip-existing requires DATABASE_URL to be set"))?;
376+
println!("\n🔍 Checking database for existing packages...");
377+
let db = common::Database::new(db_url).await?;
378+
let existing_names = db.get_package_names_by_registry(registry).await?;
379+
let existing_set: HashSet<String> = existing_names.into_iter().collect();
380+
let before = packages.len();
381+
packages.retain(|p| !existing_set.contains(p));
382+
let skipped = before - packages.len();
383+
println!(
384+
" Skipped {} already-scanned packages, {} remaining",
385+
skipped,
386+
packages.len()
387+
);
388+
}
389+
362390
if args.dry_run {
363391
println!("\n🔍 Dry run - packages that would be queued:");
364392
for (i, pkg) in packages.iter().enumerate().take(20) {

0 commit comments

Comments
 (0)