@@ -326,7 +326,10 @@ def pretty_name_from_artifact_name(artifact_name: str) -> str:
326
326
327
327
328
328
def download_dca_databases (
329
- experiment_name : str , pat : str , projects : List [Project ]
329
+ language : str ,
330
+ experiment_name : str ,
331
+ pat : str ,
332
+ projects : List [Project ],
330
333
) -> List [tuple [Project , str | None ]]:
331
334
"""
332
335
Download databases from a DCA experiment.
@@ -337,7 +340,6 @@ def download_dca_databases(
337
340
Returns:
338
341
List of (project_name, database_dir) pairs, where database_dir is None if the download failed.
339
342
"""
340
- database_results = {}
341
343
print ("\n === Finding projects ===" )
342
344
response = get_json_from_github (
343
345
f"https://raw.githubusercontent.com/github/codeql-dca-main/data/{ experiment_name } /reports/downloads.json" ,
@@ -363,7 +365,7 @@ def download_dca_databases(
363
365
364
366
artifact_map [pretty_name ] = analyzed_database
365
367
366
- def download (item : tuple [str , dict ]) -> str :
368
+ def download_and_extract (item : tuple [str , dict ]) -> str :
367
369
pretty_name , analyzed_database = item
368
370
artifact_name = analyzed_database ["artifact_name" ]
369
371
repository = analyzed_database ["repository" ]
@@ -391,16 +393,19 @@ def download(item: tuple[str, dict]) -> str:
391
393
# And then we extract it to build_dir/artifact_name
392
394
zip_ref .extractall (artifact_unzipped_location )
393
395
# And then we iterate over the contents of the extracted directory
394
- # and extract the tar.gz files inside it
395
- for entry in os .listdir (artifact_unzipped_location ):
396
- artifact_tar_location = os .path .join (artifact_unzipped_location , entry )
397
- with tarfile .open (artifact_tar_location , "r:gz" ) as tar_ref :
398
- # And we just untar it to the same directory as the zip file
399
- tar_ref .extractall (artifact_unzipped_location )
400
- return os .path .join (artifact_unzipped_location , remove_extension (entry ))
396
+ # and extract the language tar.gz file inside it
397
+ artifact_tar_location = os .path .join (
398
+ artifact_unzipped_location , f"{ language } .tar.gz"
399
+ )
400
+ with tarfile .open (artifact_tar_location , "r:gz" ) as tar_ref :
401
+ # And we just untar it to the same directory as the zip file
402
+ tar_ref .extractall (artifact_unzipped_location )
403
+ ret = os .path .join (artifact_unzipped_location , language )
404
+ print (f"Extraction complete: { ret } " )
405
+ return ret
401
406
402
407
results = run_in_parallel (
403
- download ,
408
+ download_and_extract ,
404
409
list (artifact_map .items ()),
405
410
on_error = lambda item , exc : print (
406
411
f"ERROR: Failed to download database for { item [0 ]} : { exc } "
@@ -410,7 +415,7 @@ def download(item: tuple[str, dict]) -> str:
410
415
),
411
416
)
412
417
413
- print (f"\n === Extracted { len (database_results )} databases ===" )
418
+ print (f"\n === Extracted { len (results )} databases ===" )
414
419
415
420
return [(project_map [n ], r ) for n , r in zip (artifact_map , results )]
416
421
@@ -463,7 +468,9 @@ def main(config, args) -> None:
463
468
case "repo" :
464
469
extractor_options = config .get ("extractor_options" , [])
465
470
database_results = build_databases_from_projects (
466
- language , extractor_options , projects
471
+ language ,
472
+ extractor_options ,
473
+ projects ,
467
474
)
468
475
case "dca" :
469
476
experiment_name = args .dca
@@ -480,7 +487,10 @@ def main(config, args) -> None:
480
487
with open (args .pat , "r" ) as f :
481
488
pat = f .read ().strip ()
482
489
database_results = download_dca_databases (
483
- experiment_name , pat , projects
490
+ language ,
491
+ experiment_name ,
492
+ pat ,
493
+ projects ,
484
494
)
485
495
486
496
# Generate models for all projects
0 commit comments