88#
99from datetime import datetime
1010
11- from minecode_pipelines .pipes import fetch_checkpoint_from_github
11+ from minecode_pipelines .pipes import get_checkpoint_from_file
1212from minecode_pipelines .pipes import get_commit_at_distance_ahead
1313from minecode_pipelines .pipes import update_checkpoints_in_github
14- from minecode_pipelines .pipes import MINECODE_PIPELINES_CONFIG_REPO
1514from minecode_pipelines .pipes import get_changed_files
1615from minecode_pipelines .pipes .cargo import store_cargo_packages
1716from scanpipe .pipes .federatedcode import commit_changes
@@ -38,15 +37,19 @@ def process_cargo_packages(cargo_index_repo, cloned_data_repo, config_repo, logg
3837 base_path = Path (cargo_index_repo .working_tree_dir )
3938
4039 while True :
41- cargo_checkpoints = fetch_checkpoint_from_github (
42- config_repo = MINECODE_PIPELINES_CONFIG_REPO , checkpoint_path = CARGO_CHECKPOINT_PATH
40+ cargo_checkpoints = get_checkpoint_from_file (
41+ cloned_repo = config_repo , path = CARGO_CHECKPOINT_PATH
4342 )
4443
4544 checkpoints_last_commit = cargo_checkpoints .get ("last_commit" )
4645
47- next_commit = get_commit_at_distance_ahead (
48- cargo_index_repo , checkpoints_last_commit , num_commits_ahead = 10 , branch_name = "master"
49- )
46+ try :
47+ next_commit = get_commit_at_distance_ahead (
48+ cargo_index_repo , checkpoints_last_commit , num_commits_ahead = COMMIT_BATCH_SIZE , branch_name = "master"
49+ )
50+ except ValueError as e :
51+ logger (str (e ))
52+ break
5053
5154 if next_commit == checkpoints_last_commit :
5255 logger ("No new commits to mine" )
@@ -75,19 +78,25 @@ def process_cargo_packages(cargo_index_repo, cloned_data_repo, config_repo, logg
7578 with open (file_path , encoding = "utf-8" ) as f :
7679 for line in f :
7780 if line .strip ():
78- packages .append (json .loads (line ))
81+ try :
82+ packages .append (json .loads (line ))
83+ except json .JSONDecodeError as e :
84+ logger (f"Skipping invalid JSON in { file_path } : { e } " )
7985
8086 file_counter += 1
8187
8288 # Commit and push after each full batch or when processing the last file
8389 commit_and_push = (file_counter % PACKAGE_BATCH_SIZE == 0 ) or (
84- idx == len (changed_files )
90+ idx == len (changed_files ) - 1
8591 )
86- purl_file , base_purl = store_cargo_packages (packages , cloned_data_repo )
87- logger (f"writing packageURLs for package: { base_purl } at: { purl_file } " )
8892
89- purl_files .append (purl_file )
90- purls .append (str (base_purl ))
93+ result_store = store_cargo_packages (packages , cloned_data_repo )
94+ if result_store :
95+ purl_file , base_purl = result_store
96+ logger (f"writing packageURLs for package: { base_purl } at: { purl_file } " )
97+
98+ purl_files .append (purl_file )
99+ purls .append (str (base_purl ))
91100
92101 if not commit_and_push :
93102 continue
0 commit comments