|
6 | 6 | # See https://github.com/aboutcode-org/purldb for support or download. |
7 | 7 | # See https://aboutcode.org for more information about nexB OSS projects. |
8 | 8 | # |
9 | | -from minecode_pipelines.pipes import get_last_commit, get_changed_files, update_last_commit |
| 9 | +from minecode_pipelines.pipes import get_last_commit |
| 10 | +from minecode_pipelines.pipes import get_changed_files |
| 11 | +from minecode_pipelines.pipes import update_last_commit |
10 | 12 | from minecode_pipelines.pipes.cargo import store_cargo_packages |
11 | 13 | import json |
12 | 14 | from pathlib import Path |
13 | 15 |
|
| 16 | +from minecode_pipelines.utils import get_next_x_commit |
| 17 | + |
| 18 | + |
| 19 | +def process_cargo_packages(cargo_repo, fed_repo, fed_conf_repo, logger): |
| 20 | + """ |
| 21 | + Process Cargo index files commit by commit. |
| 22 | + Push changes to fed_repo after: |
| 23 | + - every `commit_batch_size` commits, OR |
| 24 | + - every `file_batch_size` files, OR |
| 25 | + - when reaching HEAD. |
| 26 | + """ |
14 | 27 |
|
15 | | -def process_cargo_packages(cargo_repo, fed_repo, logger): |
16 | 28 | base_path = Path(cargo_repo.working_tree_dir) |
17 | | - setting_last_commit = get_last_commit(fed_repo, "cargo") |
18 | | - valid_files = get_changed_files(cargo_repo, setting_last_commit) # start from empty tree hash |
19 | 29 |
|
20 | | - logger(f"Found {len(valid_files)} changed files in Cargo index.") |
21 | | - targets_files = [] |
22 | | - for file_path in base_path.glob("**/*"): |
23 | | - if not file_path.is_file(): |
24 | | - continue |
| 30 | + while True: |
| 31 | + setting_last_commit = get_last_commit(fed_conf_repo, "cargo") |
| 32 | + |
| 33 | + if setting_last_commit is None: |
| 34 | + setting_last_commit = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" |
25 | 35 |
|
26 | | - rel_path = str(file_path.relative_to(base_path)) |
27 | | - if rel_path not in valid_files: |
28 | | - continue |
| 36 | + next_commit = get_next_x_commit(cargo_repo, setting_last_commit, x=1000, branch="master") |
29 | 37 |
|
30 | | - if file_path.name in {"config.json", "README.md", "update-dl-url.yml"}: |
31 | | - continue |
| 38 | + if next_commit == setting_last_commit: |
| 39 | + logger("No new commits to mine") |
| 40 | + break |
32 | 41 |
|
33 | | - targets_files.append(file_path) |
| 42 | + changed_files = get_changed_files( |
| 43 | + cargo_repo, commit_x=setting_last_commit, commit_y=next_commit |
| 44 | + ) |
| 45 | + logger(f"Found {len(changed_files)} changed files in Cargo index.") |
34 | 46 |
|
35 | | - logger(f"Collected {len(targets_files)} target package files to process.") |
| 47 | + for idx, rel_path in enumerate(changed_files): |
| 48 | + file_path = base_path / rel_path |
| 49 | + logger(f"Found {file_path}.") |
36 | 50 |
|
37 | | - for idx, file_path in enumerate(targets_files, start=1): |
38 | | - packages = [] |
39 | | - with open(file_path, encoding="utf-8") as f: |
40 | | - for line in f: |
41 | | - if line.strip(): |
42 | | - packages.append(json.loads(line)) |
| 51 | + if not file_path.is_file(): |
| 52 | + continue |
43 | 53 |
|
44 | | - if not packages: |
45 | | - continue |
| 54 | + if file_path.name in {"config.json", "README.md", "update-dl-url.yml"}: |
| 55 | + continue |
| 56 | + packages = [] |
| 57 | + with open(file_path, encoding="utf-8") as f: |
| 58 | + for line in f: |
| 59 | + if line.strip(): |
| 60 | + packages.append(json.loads(line)) |
46 | 61 |
|
47 | | - push_commit = idx == len(targets_files) # only True on last |
48 | | - store_cargo_packages(packages, fed_repo, push_commit) |
49 | | - logger(f"Processed {len(packages)} packages from {file_path} ({idx}/{len(targets_files)}).") |
| 62 | + push_commit = idx == len(changed_files) |
| 63 | + store_cargo_packages(packages, fed_repo, push_commit) |
50 | 64 |
|
51 | | - update_last_commit(setting_last_commit, fed_repo, "cargo") |
52 | | - logger("Updated last commit checkpoint for Cargo.") |
| 65 | + update_last_commit(next_commit, fed_conf_repo, "cargo") |
| 66 | + logger(f"Pushed batch for commit range {setting_last_commit}:{next_commit}.") |
0 commit comments