Skip to content

Commit 5648cf1

Browse files
committed
Remove checkpoint from swift mining
Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com>
1 parent d945195 commit 5648cf1

File tree

2 files changed

+15
-75
lines changed

2 files changed

+15
-75
lines changed

minecode_pipelines/pipelines/mine_swift.py

Lines changed: 1 addition & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -20,32 +20,25 @@
2020
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
2121
# Visit https://github.com/aboutcode-org/scancode.io for support and download.
2222

23-
from datetime import datetime
2423
from scanpipe.pipes import federatedcode
2524

26-
from minecode_pipelines import pipes
2725
from minecode_pipelines.pipelines import MineCodeBasePipeline
28-
from minecode_pipelines.pipes.swift import PACKAGE_BATCH_SIZE, mine_swift_packageurls
26+
from minecode_pipelines.pipes.swift import mine_swift_packageurls
2927
from minecode_pipelines.pipes.swift import load_swift_package_urls
30-
from minecode_pipelines.pipelines import _mine_and_publish_packageurls
3128

3229

3330
class MineSwift(MineCodeBasePipeline):
3431
"""
3532
Pipeline to mine Swift packages and publish them to FederatedCode.
3633
"""
3734

38-
pipeline_config_repo = "https://github.com/aboutcode-data/minecode-pipelines-config/"
39-
checkpoint_path = "swift/checkpoints.json"
40-
checkpoint_freq = 30
4135
swift_index_repo_url = "https://github.com/SwiftPackageIndex/PackageList"
4236

4337
@classmethod
4438
def steps(cls):
4539
return (
4640
cls.check_federatedcode_eligibility,
4741
cls.create_federatedcode_working_dir,
48-
cls.fetch_checkpoint_and_start_index,
4942
cls.fetch_federation_config,
5043
cls.clone_swift_index,
5144
cls.mine_and_publish_packageurls,
@@ -60,20 +53,6 @@ def clone_swift_index(self):
6053
logger=self.log,
6154
)
6255

63-
def fetch_checkpoint_and_start_index(self):
64-
self.checkpoint_config_repo = federatedcode.clone_repository(
65-
repo_url=self.pipeline_config_repo,
66-
clone_path=self.working_path / "minecode-pipelines-config",
67-
logger=self.log,
68-
)
69-
checkpoint = pipes.get_checkpoint_from_file(
70-
cloned_repo=self.checkpoint_config_repo,
71-
path=self.checkpoint_path,
72-
)
73-
74-
self.start_index = checkpoint.get("start_index", 0)
75-
self.log(f"start_index: {self.start_index}")
76-
7756
def packages_count(self):
7857
return len(self.swift_packages_urls) if self.swift_packages_urls else None
7958

@@ -82,35 +61,5 @@ def mine_packageurls(self):
8261
self.log(f"Total Swift packages to process: {len(self.swift_packages_urls)}")
8362
return mine_swift_packageurls(
8463
packages_urls=self.swift_packages_urls,
85-
start_index=self.start_index,
86-
logger=self.log,
87-
)
88-
89-
def mine_and_publish_packageurls(self):
90-
"""Mine and publish PackageURLs."""
91-
_mine_and_publish_packageurls(
92-
packageurls=self.mine_packageurls(),
93-
total_package_count=self.packages_count(),
94-
data_cluster=self.data_cluster,
95-
checked_out_repos=self.checked_out_repos,
96-
working_path=self.working_path,
97-
append_purls=self.append_purls,
98-
commit_msg_func=self.commit_message,
99-
logger=self.log,
100-
checkpoint_func=self.save_check_point,
101-
checkpoint_freq=self.checkpoint_freq,
102-
)
103-
104-
def save_check_point(self):
105-
checkpoint = {
106-
"date": str(datetime.now()),
107-
"start_index": self.start_index + self.checkpoint_freq * PACKAGE_BATCH_SIZE,
108-
}
109-
110-
self.log(f"Saving checkpoint: {checkpoint}")
111-
pipes.update_checkpoints_in_github(
112-
checkpoint=checkpoint,
113-
cloned_repo=self.checkpoint_config_repo,
114-
path=self.checkpoint_path,
11564
logger=self.log,
11665
)

minecode_pipelines/pipes/swift.py

Lines changed: 14 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -24,36 +24,27 @@
2424
from pathlib import Path
2525
from packageurl import PackageURL
2626

27-
from minecode_pipelines.utils import cycle_from_index, grouper
2827
import shutil
2928
import subprocess
3029
from urllib.parse import urlparse
3130

32-
PACKAGE_BATCH_SIZE = 100
3331

34-
35-
def mine_swift_packageurls(packages_urls, start_index, logger):
32+
def mine_swift_packageurls(packages_urls, logger):
3633
"""Mine Swift PackageURLs from package index."""
3734

38-
packages_iter = cycle_from_index(packages_urls, start_index)
39-
for batch_index, package_batch in enumerate(
40-
grouper(n=PACKAGE_BATCH_SIZE, iterable=packages_iter)
41-
):
42-
for package_repo_url in package_batch:
43-
if not package_repo_url:
44-
continue
45-
logger(f"Processing package repo URL: {package_repo_url}")
46-
git_ls_remote = fetch_git_tags_raw(package_repo_url, 60, logger)
47-
if not git_ls_remote:
48-
continue
49-
50-
tags_and_commits = get_tags_and_commits_from_git_output(git_ls_remote)
51-
if not tags_and_commits:
52-
continue
53-
54-
yield generate_package_urls(
55-
package_repo_url=package_repo_url, tags_and_commits=tags_and_commits, logger=logger
56-
)
35+
for package_repo_url in packages_urls:
36+
logger(f"Processing package repo URL: {package_repo_url}")
37+
git_ls_remote = fetch_git_tags_raw(package_repo_url, 60, logger)
38+
if not git_ls_remote:
39+
continue
40+
41+
tags_and_commits = get_tags_and_commits_from_git_output(git_ls_remote)
42+
if not tags_and_commits:
43+
continue
44+
45+
yield generate_package_urls(
46+
package_repo_url=package_repo_url, tags_and_commits=tags_and_commits, logger=logger
47+
)
5748

5849

5950
def load_swift_package_urls(swift_index_repo):

0 commit comments

Comments
 (0)