Skip to content

Commit 26a8982

Browse files
authored
Add utilities to create/init FederatedCode data repo (#1896)
* Add utilities to create/init FederatedCode data repo Signed-off-by: Keshav Priyadarshi <[email protected]> * Add docstring to repo initialization utilities Signed-off-by: Keshav Priyadarshi <[email protected]> * Fix repo_url Signed-off-by: Keshav Priyadarshi <[email protected]> --------- Signed-off-by: Keshav Priyadarshi <[email protected]>
1 parent 23b94ad commit 26a8982

File tree

3 files changed

+125
-21
lines changed

3 files changed

+125
-21
lines changed

scanpipe/pipelines/publish_to_federatedcode.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
# Visit https://github.com/aboutcode-org/scancode.io for support and download.
2222

2323

24+
import shutil
25+
2426
from scanpipe.pipelines import Pipeline
2527
from scanpipe.pipes import federatedcode
2628

@@ -41,11 +43,12 @@ class PublishToFederatedCode(Pipeline):
4143
def steps(cls):
4244
return (
4345
cls.check_federatedcode_eligibility,
46+
cls.create_federatedcode_working_dir,
4447
cls.get_package_repository,
4548
cls.clone_repository,
4649
cls.add_scan_result,
4750
cls.commit_and_push_changes,
48-
cls.delete_local_clone,
51+
cls.delete_working_dir,
4952
)
5053

5154
def check_federatedcode_eligibility(self):
@@ -55,9 +58,12 @@ def check_federatedcode_eligibility(self):
5558
"""
5659
federatedcode.check_federatedcode_eligibility(project=self.project)
5760

61+
def create_federatedcode_working_dir(self):
62+
self.working_path = federatedcode.create_federatedcode_working_dir()
63+
5864
def get_package_repository(self):
5965
"""Get the Git repository URL and scan path for a given package."""
60-
self.package_git_repo, self.package_scan_file = (
66+
self.package_repo_name, self.package_git_repo, self.package_scan_file = (
6167
federatedcode.get_package_repository(
6268
project_purl=self.project.purl, logger=self.log
6369
)
@@ -67,6 +73,7 @@ def clone_repository(self):
6773
"""Clone repository to local_path."""
6874
self.repo = federatedcode.clone_repository(
6975
repo_url=self.package_git_repo,
76+
clone_path=self.working_path / self.package_repo_name,
7077
logger=self.log,
7178
)
7279

@@ -91,6 +98,6 @@ def commit_and_push_changes(self):
9198
f"Scan result for '{self.project.purl}' pushed to '{self.package_git_repo}'"
9299
)
93100

94-
def delete_local_clone(self):
95-
"""Remove local clone."""
96-
federatedcode.delete_local_clone(repo=self.repo)
101+
def delete_working_dir(self):
102+
"""Remove temporary working dir."""
103+
shutil.rmtree(self.working_dir)

scanpipe/pipes/federatedcode.py

Lines changed: 110 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import textwrap
2828
from pathlib import Path
2929
from urllib.parse import urljoin
30+
from urllib.parse import urlparse
3031

3132
from django.conf import settings
3233

@@ -43,6 +44,21 @@
4344
logger = logging.getLogger(__name__)
4445

4546

47+
def url_exists(url, timeout=5):
48+
"""
49+
Check if the given `url` is reachable by doing head request.
50+
Return True if response status is 200, else False.
51+
"""
52+
try:
53+
response = requests.head(url, timeout=timeout)
54+
response.raise_for_status()
55+
except requests.exceptions.RequestException as request_exception:
56+
logger.debug(f"Error while checking {url}: {request_exception}")
57+
return False
58+
59+
return response.status_code == requests.codes.ok
60+
61+
4662
def is_configured():
4763
"""Return True if the required FederatedCode settings have been set."""
4864
if all(
@@ -57,19 +73,17 @@ def is_configured():
5773
return False
5874

5975

76+
def create_federatedcode_working_dir():
77+
"""Create temporary working dir for cloning federatedcode repositories."""
78+
return Path(tempfile.mkdtemp())
79+
80+
6081
def is_available():
6182
"""Return True if the configured Git account is available."""
6283
if not is_configured():
6384
return False
6485

65-
try:
66-
response = requests.head(settings.FEDERATEDCODE_GIT_ACCOUNT_URL, timeout=5)
67-
response.raise_for_status()
68-
except requests.exceptions.RequestException as request_exception:
69-
logger.debug(f"FederatedCode is_available() error: {request_exception}")
70-
return False
71-
72-
return response.status_code == requests.codes.ok
86+
return url_exists(settings.FEDERATEDCODE_GIT_ACCOUNT_URL)
7387

7488

7589
def get_package_repository(project_purl, logger=None):
@@ -85,7 +99,7 @@ def get_package_repository(project_purl, logger=None):
8599
)
86100
package_git_repo_url = urljoin(git_account_url, f"{package_repo_name}.git")
87101

88-
return package_git_repo_url, package_scan_path
102+
return package_repo_name, package_git_repo_url, package_scan_path
89103

90104

91105
def check_federatedcode_eligibility(project):
@@ -146,27 +160,108 @@ def check_federatedcode_configured_and_available(logger=None):
146160
logger("Federatedcode repositories are configured and available.")
147161

148162

149-
def clone_repository(repo_url, logger=None):
150-
"""Clone repository to local_path."""
151-
local_dir = tempfile.mkdtemp()
163+
def clone_repository(repo_url, clone_path, logger, shallow_clone=True):
164+
"""Clone repository to clone_path."""
165+
logger(f"Cloning repository {repo_url}")
152166

153167
authenticated_repo_url = repo_url.replace(
154168
"https://",
155169
f"https://{settings.FEDERATEDCODE_GIT_SERVICE_TOKEN}@",
156170
)
157-
repo = Repo.clone_from(url=authenticated_repo_url, to_path=local_dir, depth=1)
158-
171+
clone_args = {
172+
"url": authenticated_repo_url,
173+
"to_path": clone_path,
174+
}
175+
if shallow_clone:
176+
clone_args["depth"] = 1
177+
178+
repo = Repo.clone_from(**clone_args)
159179
repo.config_writer(config_level="repository").set_value(
160180
"user", "name", settings.FEDERATEDCODE_GIT_SERVICE_NAME
161181
).release()
162-
163182
repo.config_writer(config_level="repository").set_value(
164183
"user", "email", settings.FEDERATEDCODE_GIT_SERVICE_EMAIL
165184
).release()
166185

167186
return repo
168187

169188

189+
def get_github_org(url):
190+
"""Return org username from GitHub account URL."""
191+
github_account_url = urlparse(url)
192+
path_after_domain = github_account_url.path.lstrip("/")
193+
org_name = path_after_domain.split("/")[0]
194+
return org_name
195+
196+
197+
def create_repository(repo_name, clone_path, logger, shallow_clone=True):
198+
"""
199+
Create and initialize remote FederatedCode `repo_name` repository,
200+
perform local checkout, and return it.
201+
"""
202+
account_url = f"{settings.FEDERATEDCODE_GIT_ACCOUNT_URL}/"
203+
repo_url = urljoin(account_url, repo_name)
204+
205+
headers = {
206+
"Authorization": f"token {settings.FEDERATEDCODE_GIT_SERVICE_TOKEN}",
207+
"Accept": "application/vnd.github+json",
208+
}
209+
210+
data = {
211+
"name": repo_name,
212+
"private": False,
213+
"auto_init": True,
214+
"CC-BY-4.0": "cc-by-4.0",
215+
}
216+
org_name = get_github_org(account_url)
217+
create_repo_api = f"https://api.github.com/orgs/{org_name}/repos"
218+
response = requests.post(
219+
create_repo_api,
220+
headers=headers,
221+
json=data,
222+
timeout=5,
223+
)
224+
response.raise_for_status()
225+
return clone_repository(
226+
repo_url=repo_url,
227+
clone_path=clone_path,
228+
shallow_clone=shallow_clone,
229+
logger=logger,
230+
)
231+
232+
233+
def get_or_create_repository(repo_name, working_path, logger, shallow_clone=True):
234+
"""
235+
Return local checkout of the FederatedCode `repo_name` repository.
236+
237+
- If local checkout for `repo_name` already exists in `working_path`, return it.
238+
- If no local checkout exists but the remote repository `repo_name` exists,
239+
clone it locally and return the checkout.
240+
- If the remote repository does not exist, create and initialize `repo_name`
241+
repository, perform local checkout, and return it.
242+
"""
243+
account_url = f"{settings.FEDERATEDCODE_GIT_ACCOUNT_URL}/"
244+
repo_url = urljoin(account_url, repo_name)
245+
clone_path = working_path / repo_name
246+
247+
if clone_path.exists():
248+
return False, Repo(clone_path)
249+
if url_exists(repo_url):
250+
return False, clone_repository(
251+
repo_url=repo_url,
252+
clone_path=clone_path,
253+
logger=logger,
254+
shallow_clone=shallow_clone,
255+
)
256+
257+
return True, create_repository(
258+
repo_name=repo_name,
259+
clone_path=clone_path,
260+
logger=logger,
261+
shallow_clone=shallow_clone,
262+
)
263+
264+
170265
def add_scan_result(project, repo, package_scan_file, logger=None):
171266
"""Add package scan result to the local Git repository."""
172267
relative_scan_file_path = Path(*package_scan_file.parts[1:])

scanpipe/tests/pipes/test_federatedcode.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,14 +50,16 @@ def test_scanpipe_pipes_federatedcode_get_package_repository(self):
5050
version="v.1.2.3",
5151
)
5252
project_purl = "pkg:npm/[email protected]"
53+
expected_repo_name = "aboutcode-packages-npm-3f1"
5354
expected_git_repo = "https://github.com/test/aboutcode-packages-npm-3f1.git"
5455
expected_scan_path = (
5556
"aboutcode-packages-npm-3f1/npm/foobar/v1.2.3/scancodeio.json"
5657
)
57-
git_repo, scan_path = federatedcode.get_package_repository(
58+
repo_name, git_repo, scan_path = federatedcode.get_package_repository(
5859
project_purl=project_purl
5960
)
6061

62+
self.assertEqual(expected_repo_name, repo_name)
6163
self.assertEqual(expected_git_repo, git_repo)
6264
self.assertEqual(expected_scan_path, str(scan_path))
6365

0 commit comments

Comments
 (0)