2121# Visit https://github.com/aboutcode-org/scancode.io for support and download.
2222
2323import json
24- import os
25- from datetime import datetime
2624from pathlib import Path
27- from aboutcode import hashid
2825from packageurl import PackageURL
2926
3027from minecode_pipelines .miners .swift import fetch_git_tags_raw
3128from minecode_pipelines .miners .swift import get_tags_and_commits_from_git_output
3229from minecode_pipelines .miners .swift import split_org_repo
30+ from minecode_pipelines .utils import cycle_from_index , grouper
3331
34- from minecode_pipelines .pipes import update_checkpoints_in_github
35- from minecode_pipelines .pipes import MINECODE_PIPELINES_CONFIG_REPO
36- from minecode_pipelines .pipes import write_data_to_yaml_file
32+ PACKAGE_BATCH_SIZE = 100
3733
38- from minecode_pipelines .pipes import get_checkpoint_from_file
39- from scanpipe .pipes .federatedcode import clone_repository
4034
41- from scanpipe .pipes .federatedcode import commit_and_push_changes
42- from minecode_pipelines .utils import cycle_from_index
35+ def mine_swift_packageurls (packages_urls , start_index , logger ):
36+ """Mine Swift PackageURLs from package index."""
37+
38+ packages_iter = cycle_from_index (packages_urls , start_index )
39+ for batch_index , package_batch in enumerate (
40+ grouper (n = PACKAGE_BATCH_SIZE , iterable = packages_iter )
41+ ):
42+ for item in package_batch :
43+ if not item :
44+ continue
45+ package_repo_url = item
46+ git_ls_remote = fetch_git_tags_raw (package_repo_url , 60 , logger )
47+ if not git_ls_remote :
48+ continue
49+
50+ tags_and_commits = get_tags_and_commits_from_git_output (git_ls_remote )
51+ if not tags_and_commits :
52+ continue
53+
54+ yield generate_package_urls (
55+ package_repo_url = package_repo_url , tags_and_commits = tags_and_commits
56+ )
4357
44- PACKAGE_BATCH_SIZE = 1000
45- SWIFT_CHECKPOINT_PATH = "swift/checkpoints.json"
4658
47- MINECODE_DATA_SWIFT_REPO = os .environ .get (
48- "MINECODE_DATA_SWIFT_REPO" , "https://github.com/aboutcode-data/minecode-data-swift-test"
49- )
50- MINECODE_SWIFT_INDEX_REPO = "https://github.com/SwiftPackageIndex/PackageList"
59+ def load_swift_package_urls (swift_index_repo ):
60+ packages_path = Path (swift_index_repo .working_dir ) / "packages.json"
61+ with open (packages_path ) as f :
62+ packages_urls = json .load (f )
63+ return packages_urls
5164
5265
53- def store_swift_packages (package_repo_url , tags_and_commits , cloned_data_repo ):
54- """Collect Swift package versions into purls and write them to the repo."""
66+ def generate_package_urls (package_repo_url , tags_and_commits ):
5567 org , name = split_org_repo (package_repo_url )
5668 org = "github.com/" + org
5769 base_purl = PackageURL (type = "swift" , namespace = org , name = name )
@@ -70,102 +82,4 @@ def store_swift_packages(package_repo_url, tags_and_commits, cloned_data_repo):
7082 if purl :
7183 updated_purls .append (purl )
7284
73- purl_yaml_path = cloned_data_repo .working_dir / hashid .get_package_purls_yml_file_path (
74- base_purl
75- )
76- write_data_to_yaml_file (path = purl_yaml_path , data = updated_purls )
77- return purl_yaml_path , base_purl
78-
79-
80- def mine_and_publish_swift_packageurls (logger ):
81- """
82- Clone Swift-related repositories, process Swift packages, and publish their
83- Package URLs (purls) to the data repository.
84-
85- This function:
86- 1. Clones the Swift index, data, and pipelines config repositories.
87- 2. Loads the list of Swift package repositories from `packages.json`.
88- 3. Iterates over each package, fetching tags/commits and generating purls.
89- 4. Commits and pushes purl files to the data repository in batches.
90- 5. Updates checkpoint information in the config repository to track progress.
91-
92- logger (callable): Optional logging function for status updates.
93- Returns: list: A list of cloned repository objects in the order:
94- [swift_index_repo, cloned_data_repo, cloned_config_repo]
95- """
96-
97- swift_index_repo = clone_repository (MINECODE_SWIFT_INDEX_REPO )
98- cloned_data_repo = clone_repository (MINECODE_DATA_SWIFT_REPO )
99- cloned_config_repo = clone_repository (MINECODE_PIPELINES_CONFIG_REPO )
100-
101- if logger :
102- logger (f"{ MINECODE_SWIFT_INDEX_REPO } repo cloned at: { swift_index_repo .working_dir } " )
103- logger (f"{ MINECODE_DATA_SWIFT_REPO } repo cloned at: { cloned_data_repo .working_dir } " )
104- logger (f"{ MINECODE_PIPELINES_CONFIG_REPO } repo cloned at: { cloned_config_repo .working_dir } " )
105-
106- packages_path = Path (swift_index_repo .working_dir ) / "packages.json"
107- with open (packages_path ) as f :
108- packages_urls = json .load (f )
109-
110- counter = 0
111- purl_files = []
112- purls = []
113-
114- swift_checkpoint = get_checkpoint_from_file (
115- cloned_repo = cloned_config_repo , path = SWIFT_CHECKPOINT_PATH
116- )
117-
118- start_index = swift_checkpoint .get ("start_index" , 0 )
119-
120- if logger :
121- logger (f"Processing total files: { len (packages_urls )} " )
122-
123- for idx , package_repo_url in enumerate (cycle_from_index (packages_urls , start_index )):
124- git_ls_remote = fetch_git_tags_raw (package_repo_url , 60 , logger )
125- if not git_ls_remote :
126- continue
127-
128- tags_and_commits = get_tags_and_commits_from_git_output (git_ls_remote )
129- if not tags_and_commits :
130- continue
131-
132- purl_file , base_purl = store_swift_packages (
133- package_repo_url , tags_and_commits , cloned_data_repo
134- )
135-
136- purl_files .append (purl_file )
137- purls .append (str (base_purl ))
138- counter += 1
139-
140- if counter >= PACKAGE_BATCH_SIZE :
141- if purls and purl_files :
142- logger (f"Committing packageURLs: { ', ' .join (purls )} " )
143- commit_and_push_changes (
144- repo = cloned_data_repo , files_to_commit = purl_files , purls = purls , logger = logger
145- )
146-
147- purl_files = []
148- purls = []
149- counter = 0
150-
151- if start_index == idx :
152- continue
153-
154- settings_data = {
155- "date" : str (datetime .now ()),
156- "start_index" : idx ,
157- }
158-
159- update_checkpoints_in_github (
160- checkpoint = settings_data ,
161- cloned_repo = cloned_config_repo ,
162- path = SWIFT_CHECKPOINT_PATH ,
163- )
164-
165- if purls and purl_files :
166- logger (f"Committing packageURLs: { ', ' .join (purls )} " )
167- commit_and_push_changes (
168- repo = cloned_data_repo , files_to_commit = purl_files , purls = purls , logger = logger
169- )
170-
171- return [swift_index_repo , cloned_data_repo , cloned_config_repo ]
85+ return base_purl , updated_purls
0 commit comments