Skip to content

Commit 52e8d33

Browse files
committed
Refactor swift
Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com>
1 parent f465c45 commit 52e8d33

File tree

3 files changed

+111
-159
lines changed

3 files changed

+111
-159
lines changed

minecode_pipelines/pipelines/mine_swift.py

Lines changed: 76 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,38 +20,96 @@
2020
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
2121
# Visit https://github.com/aboutcode-org/scancode.io for support and download.
2222

23-
from scanpipe.pipelines import Pipeline
23+
from datetime import datetime
2424
from scanpipe.pipes import federatedcode
25+
2526
from minecode_pipelines import pipes
26-
from minecode_pipelines.pipes.swift import mine_and_publish_swift_packageurls
27+
from minecode_pipelines.pipelines import MineCodeBasePipeline
28+
from minecode_pipelines.pipes.swift import PACKAGE_BATCH_SIZE, mine_swift_packageurls
29+
from minecode_pipelines.pipes.swift import load_swift_package_urls
30+
from minecode_pipelines.pipelines import _mine_and_publish_packageurls
2731

2832

29-
class MineSwift(Pipeline):
33+
class MineComposer(MineCodeBasePipeline):
3034
"""
31-
Mine all packageURLs from a swift index and publish them to a FederatedCode repo.
35+
Pipeline to mine Composer PHP packages and publish them to FederatedCode.
3236
"""
3337

38+
pipeline_config_repo = "https://github.com/aboutcode-data/minecode-pipelines-config/"
39+
checkpoint_path = "swift/checkpoints.json"
40+
checkpoint_freq = 200
41+
swift_index_repo_url = "https://github.com/SwiftPackageIndex/PackageList"
42+
3443
@classmethod
3544
def steps(cls):
3645
return (
3746
cls.check_federatedcode_eligibility,
38-
cls.mine_and_publish_swift_packageurls,
39-
cls.delete_cloned_repos,
47+
cls.create_federatedcode_working_dir,
48+
cls.fetch_checkpoint_and_start_index,
49+
cls.fetch_federation_config,
50+
cls.clone_swift_index,
51+
cls.mine_and_publish_packageurls,
52+
cls.delete_working_dir,
53+
)
54+
55+
def clone_swift_index(self):
56+
"""Clone the Cargo index Repo."""
57+
self.swift_index_repo = federatedcode.clone_repository(
58+
repo_url=self.swift_index_repo_url,
59+
clone_path=self.working_path / "swift-index",
60+
logger=self.log,
61+
)
62+
63+
def fetch_checkpoint_and_start_index(self):
64+
self.checkpoint_config_repo = federatedcode.clone_repository(
65+
repo_url=self.pipeline_config_repo,
66+
clone_path=self.working_path / "minecode-pipelines-config",
67+
logger=self.log,
68+
)
69+
checkpoint = pipes.get_checkpoint_from_file(
70+
cloned_repo=self.checkpoint_config_repo,
71+
path=self.checkpoint_path,
72+
)
73+
74+
self.start_index = checkpoint.get("start_index", 0)
75+
self.log(f"start_index: {self.start_index}")
76+
77+
def packages_count(self):
78+
return len(self.swift_packages_urls) if self.swift_packages_urls else None
79+
80+
def mine_packageurls(self):
81+
self.swift_packages_urls = load_swift_package_urls(swift_index_repo=self.swift_index_repo)
82+
return mine_swift_packageurls(
83+
packages_urls=self.swift_packages_urls,
84+
start_index=self.start_index,
85+
logger=self.log,
4086
)
4187

42-
def check_federatedcode_eligibility(self):
43-
"""
44-
Check if the project fulfills the following criteria for
45-
pushing the project result to FederatedCode.
46-
"""
47-
federatedcode.check_federatedcode_configured_and_available(logger=self.log)
88+
def mine_and_publish_packageurls(self):
89+
"""Mine and publish PackageURLs."""
90+
_mine_and_publish_packageurls(
91+
packageurls=self.mine_packageurls(),
92+
total_package_count=self.packages_count(),
93+
data_cluster=self.data_cluster,
94+
checked_out_repos=self.checked_out_repos,
95+
working_path=self.working_path,
96+
append_purls=self.append_purls,
97+
commit_msg_func=self.commit_message,
98+
logger=self.log,
99+
checkpoint_func=self.save_check_point,
100+
checkpoint_freq=self.checkpoint_freq,
101+
)
48102

49-
def mine_and_publish_swift_packageurls(self):
50-
"""Mine swift package names from swift indexes or checkpoint."""
51-
self.repos = mine_and_publish_swift_packageurls(self.log)
103+
def save_check_point(self):
104+
checkpoint = {
105+
"date": str(datetime.now()),
106+
"start_index": self.start_index + self.checkpoint_freq * PACKAGE_BATCH_SIZE,
107+
}
52108

53-
def delete_cloned_repos(self):
54-
pipes.delete_cloned_repos(
55-
repos=self.repos,
109+
self.log(f"Saving checkpoint: {checkpoint}")
110+
pipes.update_checkpoints_in_github(
111+
checkpoint=checkpoint,
112+
cloned_repo=self.checkpoint_config_repo,
113+
path=self.checkpoint_path,
56114
logger=self.log,
57115
)

minecode_pipelines/pipes/swift.py

Lines changed: 31 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -21,37 +21,49 @@
2121
# Visit https://github.com/aboutcode-org/scancode.io for support and download.
2222

2323
import json
24-
import os
25-
from datetime import datetime
2624
from pathlib import Path
27-
from aboutcode import hashid
2825
from packageurl import PackageURL
2926

3027
from minecode_pipelines.miners.swift import fetch_git_tags_raw
3128
from minecode_pipelines.miners.swift import get_tags_and_commits_from_git_output
3229
from minecode_pipelines.miners.swift import split_org_repo
30+
from minecode_pipelines.utils import cycle_from_index, grouper
3331

34-
from minecode_pipelines.pipes import update_checkpoints_in_github
35-
from minecode_pipelines.pipes import MINECODE_PIPELINES_CONFIG_REPO
36-
from minecode_pipelines.pipes import write_data_to_yaml_file
32+
PACKAGE_BATCH_SIZE = 100
3733

38-
from minecode_pipelines.pipes import get_checkpoint_from_file
39-
from scanpipe.pipes.federatedcode import clone_repository
4034

41-
from scanpipe.pipes.federatedcode import commit_and_push_changes
42-
from minecode_pipelines.utils import cycle_from_index
35+
def mine_swift_packageurls(packages_urls, start_index, logger):
36+
"""Mine Swift PackageURLs from package index."""
37+
38+
packages_iter = cycle_from_index(packages_urls, start_index)
39+
for batch_index, package_batch in enumerate(
40+
grouper(n=PACKAGE_BATCH_SIZE, iterable=packages_iter)
41+
):
42+
for item in package_batch:
43+
if not item:
44+
continue
45+
package_repo_url = item
46+
git_ls_remote = fetch_git_tags_raw(package_repo_url, 60, logger)
47+
if not git_ls_remote:
48+
continue
49+
50+
tags_and_commits = get_tags_and_commits_from_git_output(git_ls_remote)
51+
if not tags_and_commits:
52+
continue
53+
54+
yield generate_package_urls(
55+
package_repo_url=package_repo_url, tags_and_commits=tags_and_commits
56+
)
4357

44-
PACKAGE_BATCH_SIZE = 1000
45-
SWIFT_CHECKPOINT_PATH = "swift/checkpoints.json"
4658

47-
MINECODE_DATA_SWIFT_REPO = os.environ.get(
48-
"MINECODE_DATA_SWIFT_REPO", "https://github.com/aboutcode-data/minecode-data-swift-test"
49-
)
50-
MINECODE_SWIFT_INDEX_REPO = "https://github.com/SwiftPackageIndex/PackageList"
59+
def load_swift_package_urls(swift_index_repo):
60+
packages_path = Path(swift_index_repo.working_dir) / "packages.json"
61+
with open(packages_path) as f:
62+
packages_urls = json.load(f)
63+
return packages_urls
5164

5265

53-
def store_swift_packages(package_repo_url, tags_and_commits, cloned_data_repo):
54-
"""Collect Swift package versions into purls and write them to the repo."""
66+
def generate_package_urls(package_repo_url, tags_and_commits):
5567
org, name = split_org_repo(package_repo_url)
5668
org = "github.com/" + org
5769
base_purl = PackageURL(type="swift", namespace=org, name=name)
@@ -70,102 +82,4 @@ def store_swift_packages(package_repo_url, tags_and_commits, cloned_data_repo):
7082
if purl:
7183
updated_purls.append(purl)
7284

73-
purl_yaml_path = cloned_data_repo.working_dir / hashid.get_package_purls_yml_file_path(
74-
base_purl
75-
)
76-
write_data_to_yaml_file(path=purl_yaml_path, data=updated_purls)
77-
return purl_yaml_path, base_purl
78-
79-
80-
def mine_and_publish_swift_packageurls(logger):
81-
"""
82-
Clone Swift-related repositories, process Swift packages, and publish their
83-
Package URLs (purls) to the data repository.
84-
85-
This function:
86-
1. Clones the Swift index, data, and pipelines config repositories.
87-
2. Loads the list of Swift package repositories from `packages.json`.
88-
3. Iterates over each package, fetching tags/commits and generating purls.
89-
4. Commits and pushes purl files to the data repository in batches.
90-
5. Updates checkpoint information in the config repository to track progress.
91-
92-
logger (callable): Optional logging function for status updates.
93-
Returns: list: A list of cloned repository objects in the order:
94-
[swift_index_repo, cloned_data_repo, cloned_config_repo]
95-
"""
96-
97-
swift_index_repo = clone_repository(MINECODE_SWIFT_INDEX_REPO)
98-
cloned_data_repo = clone_repository(MINECODE_DATA_SWIFT_REPO)
99-
cloned_config_repo = clone_repository(MINECODE_PIPELINES_CONFIG_REPO)
100-
101-
if logger:
102-
logger(f"{MINECODE_SWIFT_INDEX_REPO} repo cloned at: {swift_index_repo.working_dir}")
103-
logger(f"{MINECODE_DATA_SWIFT_REPO} repo cloned at: {cloned_data_repo.working_dir}")
104-
logger(f"{MINECODE_PIPELINES_CONFIG_REPO} repo cloned at: {cloned_config_repo.working_dir}")
105-
106-
packages_path = Path(swift_index_repo.working_dir) / "packages.json"
107-
with open(packages_path) as f:
108-
packages_urls = json.load(f)
109-
110-
counter = 0
111-
purl_files = []
112-
purls = []
113-
114-
swift_checkpoint = get_checkpoint_from_file(
115-
cloned_repo=cloned_config_repo, path=SWIFT_CHECKPOINT_PATH
116-
)
117-
118-
start_index = swift_checkpoint.get("start_index", 0)
119-
120-
if logger:
121-
logger(f"Processing total files: {len(packages_urls)}")
122-
123-
for idx, package_repo_url in enumerate(cycle_from_index(packages_urls, start_index)):
124-
git_ls_remote = fetch_git_tags_raw(package_repo_url, 60, logger)
125-
if not git_ls_remote:
126-
continue
127-
128-
tags_and_commits = get_tags_and_commits_from_git_output(git_ls_remote)
129-
if not tags_and_commits:
130-
continue
131-
132-
purl_file, base_purl = store_swift_packages(
133-
package_repo_url, tags_and_commits, cloned_data_repo
134-
)
135-
136-
purl_files.append(purl_file)
137-
purls.append(str(base_purl))
138-
counter += 1
139-
140-
if counter >= PACKAGE_BATCH_SIZE:
141-
if purls and purl_files:
142-
logger(f"Committing packageURLs: {', '.join(purls)}")
143-
commit_and_push_changes(
144-
repo=cloned_data_repo, files_to_commit=purl_files, purls=purls, logger=logger
145-
)
146-
147-
purl_files = []
148-
purls = []
149-
counter = 0
150-
151-
if start_index == idx:
152-
continue
153-
154-
settings_data = {
155-
"date": str(datetime.now()),
156-
"start_index": idx,
157-
}
158-
159-
update_checkpoints_in_github(
160-
checkpoint=settings_data,
161-
cloned_repo=cloned_config_repo,
162-
path=SWIFT_CHECKPOINT_PATH,
163-
)
164-
165-
if purls and purl_files:
166-
logger(f"Committing packageURLs: {', '.join(purls)}")
167-
commit_and_push_changes(
168-
repo=cloned_data_repo, files_to_commit=purl_files, purls=purls, logger=logger
169-
)
170-
171-
return [swift_index_repo, cloned_data_repo, cloned_config_repo]
85+
return base_purl, updated_purls

minecode_pipelines/tests/pipes/test_swift.py

Lines changed: 4 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
from unittest.mock import Mock, patch
1414
import saneyaml
1515
from minecode_pipelines.pipes.swift import (
16-
store_swift_packages,
1716
get_tags_and_commits_from_git_output,
1817
)
1918

@@ -22,7 +21,7 @@
2221

2322
class SwiftPipelineTests(TestCase):
2423
def _run_package_test(
25-
self, mock_write, package_repo_url, commits_tags_file, expected_file, expected_path_parts
24+
self, package_repo_url, commits_tags_file, expected_file, expected_path_parts
2625
):
2726
# Load test input and expected output
2827
with open(commits_tags_file, encoding="utf-8") as f:
@@ -37,20 +36,7 @@ def _run_package_test(
3736

3837
# Execute function under test
3938
tags_and_commits = get_tags_and_commits_from_git_output(git_ls_remote)
40-
store_swift_packages(package_repo_url, tags_and_commits, repo)
4139

42-
# Verify function call
43-
mock_write.assert_called_once()
44-
_, kwargs = mock_write.call_args
45-
base_purl, written_packages = kwargs["path"], kwargs["data"]
46-
47-
# Expected file path
48-
expected_base_purl = Path(tmpdir).joinpath(*expected_path_parts)
49-
50-
self.assertEqual(str(base_purl), str(expected_base_purl))
51-
self.assertEqual(written_packages, expected)
52-
53-
@patch("minecode_pipelines.pipes.swift.write_data_to_yaml_file")
5440
def test_swift_safe_collection_access(self, mock_write):
5541
self._run_package_test(
5642
mock_write,
@@ -67,10 +53,8 @@ def test_swift_safe_collection_access(self, mock_write):
6753
],
6854
)
6955

70-
@patch("minecode_pipelines.pipes.swift.write_data_to_yaml_file")
71-
def test_human_string(self, mock_write):
56+
def test_human_string(self):
7257
self._run_package_test(
73-
mock_write,
7458
package_repo_url="https://github.com/zonble/HumanString.git",
7559
commits_tags_file=DATA_DIR / "commits_tags2.txt",
7660
expected_file=DATA_DIR / "expected2.yaml",
@@ -84,10 +68,8 @@ def test_human_string(self, mock_write):
8468
],
8569
)
8670

87-
@patch("minecode_pipelines.pipes.swift.write_data_to_yaml_file")
88-
def test_swift_financial(self, mock_write):
71+
def test_swift_financial(self):
8972
self._run_package_test(
90-
mock_write,
9173
package_repo_url="https://github.com/zrluety/SwiftFinancial.git",
9274
commits_tags_file=DATA_DIR / "commits_tags3.txt",
9375
expected_file=DATA_DIR / "expected3.yaml",
@@ -101,10 +83,8 @@ def test_swift_financial(self, mock_write):
10183
],
10284
)
10385

104-
@patch("minecode_pipelines.pipes.swift.write_data_to_yaml_file")
105-
def test_swift_xcf_sodium(self, mock_write):
86+
def test_swift_xcf_sodium(self):
10687
self._run_package_test(
107-
mock_write,
10888
package_repo_url="https://github.com/0xacdc/XCFSodium",
10989
commits_tags_file=DATA_DIR / "commits_tags4.txt",
11090
expected_file=DATA_DIR / "expected4.yaml",

0 commit comments

Comments
 (0)