99from datetime import datetime
1010
1111from minecode_pipelines .pipes import fetch_checkpoint_from_github
12+ from minecode_pipelines .pipes import get_commit_at_distance_ahead
1213from minecode_pipelines .pipes import update_checkpoints_in_github
1314from minecode_pipelines .pipes import MINECODE_PIPELINES_CONFIG_REPO
1415from minecode_pipelines .pipes import get_changed_files
2021import json
2122from pathlib import Path
2223
23- from minecode_pipelines .utils import get_next_x_commit
2424
2525PACKAGE_BATCH_SIZE = 500
26+ COMMIT_BATCH_SIZE = 10
27+
2628CARGO_CHECKPOINT_PATH = "cargo/checkpoints.json"
2729
2830
@@ -36,14 +38,14 @@ def process_cargo_packages(cargo_index_repo, cloned_data_repo, config_repo, logg
3638 base_path = Path (cargo_index_repo .working_tree_dir )
3739
3840 while True :
39- cargo_checkpoints = (
40- fetch_checkpoint_from_github (MINECODE_PIPELINES_CONFIG_REPO , CARGO_CHECKPOINT_PATH )
41- or {}
41+ cargo_checkpoints = fetch_checkpoint_from_github (
42+ config_repo = MINECODE_PIPELINES_CONFIG_REPO , checkpoint_path = CARGO_CHECKPOINT_PATH
4243 )
44+
4345 checkpoints_last_commit = cargo_checkpoints .get ("last_commit" )
4446
45- next_commit = get_next_x_commit (
46- cargo_index_repo , checkpoints_last_commit , x = 10 , branch = "master"
47+ next_commit = get_commit_at_distance_ahead (
48+ cargo_index_repo , checkpoints_last_commit , num_commits_ahead = 10 , branch_name = "master"
4749 )
4850
4951 if next_commit == checkpoints_last_commit :
@@ -62,10 +64,11 @@ def process_cargo_packages(cargo_index_repo, cloned_data_repo, config_repo, logg
6264 file_path = base_path / rel_path
6365 logger (f"Found { file_path } ." )
6466
65- if not file_path .is_file ():
66- continue
67-
68- if file_path .name in {"config.json" , "README.md" , "update-dl-url.yml" }:
67+ if not file_path .is_file () or file_path .name in {
68+ "config.json" ,
69+ "README.md" ,
70+ "update-dl-url.yml" ,
71+ }:
6972 continue
7073
7174 packages = []
@@ -75,6 +78,8 @@ def process_cargo_packages(cargo_index_repo, cloned_data_repo, config_repo, logg
7578 packages .append (json .loads (line ))
7679
7780 file_counter += 1
81+
82+ # Commit and push after each full batch or when processing the last file
7883 commit_and_push = (file_counter % PACKAGE_BATCH_SIZE == 0 ) or (
7984 idx == len (changed_files )
8085 )
@@ -83,6 +88,7 @@ def process_cargo_packages(cargo_index_repo, cloned_data_repo, config_repo, logg
8388
8489 purl_files .append (purl_file )
8590 purls .append (str (base_purl ))
91+
8692 if not commit_and_push :
8793 continue
8894
@@ -91,11 +97,10 @@ def process_cargo_packages(cargo_index_repo, cloned_data_repo, config_repo, logg
9197 files_to_commit = purl_files ,
9298 purls = purls ,
9399 mine_type = "packageURL" ,
94- tool_name = "pkg:cargo /minecode-pipelines" ,
100+ tool_name = "pkg:pypi /minecode-pipelines" ,
95101 tool_version = VERSION ,
96102 )
97103
98- # Push changes to remote repository
99104 push_changes (repo = cloned_data_repo )
100105 purl_files = []
101106 purls = []
@@ -105,15 +110,16 @@ def process_cargo_packages(cargo_index_repo, cloned_data_repo, config_repo, logg
105110 f"Updating checkpoint at: { CARGO_CHECKPOINT_PATH } with last commit: { checkpoints_last_commit } "
106111 )
107112
108- settings_data = {
109- "date" : str (datetime .now ()),
110- "last_commit" : next_commit ,
111- }
113+ if next_commit != checkpoints_last_commit :
114+ settings_data = {
115+ "date" : str (datetime .now ()),
116+ "last_commit" : next_commit ,
117+ }
112118
113- update_checkpoints_in_github (
114- checkpoint = settings_data ,
115- cloned_repo = config_repo ,
116- path = CARGO_CHECKPOINT_PATH ,
117- )
119+ update_checkpoints_in_github (
120+ checkpoint = settings_data ,
121+ cloned_repo = config_repo ,
122+ path = CARGO_CHECKPOINT_PATH ,
123+ )
118124
119125 logger (f"Pushed batch for commit range { checkpoints_last_commit } :{ next_commit } ." )
0 commit comments