Skip to content

Commit d81904c

Browse files
authored
Improving download performance when cloning based on specific branches or tags (#190)
Signed-off-by: MoonJeWoong <[email protected]>
1 parent 573b432 commit d81904c

File tree

4 files changed

+132
-29
lines changed

4 files changed

+132
-29
lines changed

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,5 @@ setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerabil
1313
numpy; python_version < '3.8'
1414
numpy>=1.22.2; python_version >= '3.8'
1515
npm
16-
requests
16+
requests
17+
GitPython

src/fosslight_util/download.py

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import logging
1111
import argparse
1212
import shutil
13-
import pygit2 as git
13+
from git import Repo, GitCommandError
1414
import bz2
1515
import contextlib
1616
from datetime import datetime
@@ -230,14 +230,10 @@ def get_github_token(git_url):
230230

231231

232232
def download_git_clone(git_url, target_dir, checkout_to="", tag="", branch=""):
233-
ref_to_checkout = decide_checkout(checkout_to, tag, branch)
234-
msg = ""
235233
oss_name = get_github_ossname(git_url)
236-
oss_version = ""
237-
github_token = get_github_token(git_url)
238-
callbacks = None
239-
if github_token != "":
240-
callbacks = git.RemoteCallbacks(credentials=git.UserPass("foo", github_token)) # username is not used, so set to dummy
234+
refs_to_checkout = decide_checkout(checkout_to, tag, branch)
235+
clone_default_branch_flag = False
236+
msg = ""
241237

242238
try:
243239
if platform.system() != "Windows":
@@ -248,30 +244,35 @@ def download_git_clone(git_url, target_dir, checkout_to="", tag="", branch=""):
248244
alarm.start()
249245

250246
Path(target_dir).mkdir(parents=True, exist_ok=True)
251-
repo = git.clone_repository(git_url, target_dir,
252-
bare=False, repository=None,
253-
remote=None, callbacks=callbacks)
247+
if refs_to_checkout != "":
248+
try:
249+
# gitPython uses the branch argument the same whether you check out to a branch or a tag.
250+
repo = Repo.clone_from(git_url, target_dir, branch=refs_to_checkout)
251+
except GitCommandError as error:
252+
error_msg = error.args[2].decode("utf-8")
253+
if "Remote branch " + refs_to_checkout + " not found in upstream origin" in error_msg:
254+
# clone default branch, when non-existent branch or tag entered
255+
repo = Repo.clone_from(git_url, target_dir)
256+
clone_default_branch_flag = True
257+
else:
258+
repo = Repo.clone_from(git_url, target_dir)
259+
clone_default_branch_flag = True
260+
261+
if refs_to_checkout != tag or clone_default_branch_flag:
262+
oss_version = repo.active_branch.name
263+
else:
264+
oss_version = repo.git.describe('--tags')
265+
logger.info(f"git checkout: {oss_version}")
266+
254267
if platform.system() != "Windows":
255268
signal.alarm(0)
256269
else:
257270
del alarm
258271
except Exception as error:
259272
logger.warning(f"git clone - failed: {error}")
260273
msg = str(error)
261-
return False, msg, oss_name, oss_version
262-
try:
263-
if ref_to_checkout != "":
264-
ref_list = [x for x in repo.references]
265-
ref_to_checkout = get_ref_to_checkout(ref_to_checkout, ref_list)
266-
logger.info(f"git checkout: {ref_to_checkout}")
267-
repo.checkout(ref_to_checkout)
274+
return False, msg, oss_name, refs_to_checkout
268275

269-
for prefix_ref in prefix_refs:
270-
if ref_to_checkout.startswith(prefix_ref):
271-
oss_version = ref_to_checkout[len(prefix_ref):]
272-
273-
except Exception as error:
274-
logger.warning(f"git checkout to {ref_to_checkout} - failed: {error}")
275276
return True, msg, oss_name, oss_version
276277

277278

tests/test_download.py

Lines changed: 104 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,34 @@
44
import os
55
import pytest
66

7-
from fosslight_util.download import cli_download_and_extract
7+
from fosslight_util.download import cli_download_and_extract, download_git_clone
88
from tests import constants
99

1010

1111
def test_download_from_github():
12+
# given
13+
git_url = "https://github.com/LGE-OSS/example"
14+
target_dir = os.path.join(constants.TEST_RESULT_DIR, "download/example")
15+
log_dir = "test_result/download_log/example"
16+
1217
# when
18+
success, _, _, _ = cli_download_and_extract(git_url, target_dir, log_dir)
19+
20+
# then
21+
assert success is True
22+
assert len(os.listdir(target_dir)) > 0
23+
24+
25+
@pytest.mark.parametrize("git_url",
26+
["git://git.kernel.org/pub/scm/utils/kernel/kmod/kmod.git;protocol=git;branch=ci-test",
27+
"git://git.kernel.org/pub/scm/utils/kernel/kmod/kmod.git;protocol=git;tag=v32"])
28+
def test_download_from_github_with_branch_or_tag(git_url):
29+
# given
1330
target_dir = os.path.join(constants.TEST_RESULT_DIR, "download/example")
14-
success, _, _, _ = cli_download_and_extract("https://github.com/LGE-OSS/example",
15-
target_dir,
16-
"test_result/download_log/example")
31+
log_dir = "test_result/download_log/example"
32+
33+
# when
34+
success, _, _, _ = cli_download_and_extract(git_url, target_dir, log_dir)
1735

1836
# then
1937
assert success is True
@@ -38,3 +56,85 @@ def test_download_from_wget(project_name, project_url):
3856
# then
3957
assert success is True
4058
assert len(os.listdir(target_dir)) > 0
59+
60+
61+
def test_download_git_clone_with_branch():
62+
# given
63+
git_url = "git://git.kernel.org/pub/scm/utils/kernel/kmod/kmod.git"
64+
target_dir = os.path.join(constants.TEST_RESULT_DIR, "download/example")
65+
branch_name = "ci-test"
66+
67+
# when
68+
success, _, oss_name, oss_version = download_git_clone(git_url, target_dir, branch=branch_name)
69+
70+
# then
71+
assert success is True
72+
assert len(os.listdir(target_dir)) > 0
73+
assert oss_name == ''
74+
assert oss_version == branch_name
75+
76+
77+
def test_download_git_clone_with_tag():
78+
# given
79+
git_url = "git://git.kernel.org/pub/scm/utils/kernel/kmod/kmod.git"
80+
target_dir = os.path.join(constants.TEST_RESULT_DIR, "download/example")
81+
tag_name = "v32"
82+
83+
# when
84+
success, _, oss_name, oss_version = download_git_clone(git_url, target_dir, tag=tag_name)
85+
86+
# then
87+
assert success is True
88+
assert len(os.listdir(target_dir)) > 0
89+
assert oss_name == ''
90+
assert oss_version == tag_name
91+
92+
93+
def test_download_main_branch_when_any_branch_or_tag_not_entered():
94+
# given
95+
git_url = "https://github.com/LGE-OSS/example"
96+
target_dir = os.path.join(constants.TEST_RESULT_DIR, "download/example")
97+
expected_oss_name = "main"
98+
99+
# when
100+
success, _, oss_name, oss_version = download_git_clone(git_url, target_dir)
101+
102+
# then
103+
assert success is True
104+
assert len(os.listdir(target_dir)) > 0
105+
assert oss_name == 'LGE-OSS-example'
106+
assert oss_version == expected_oss_name
107+
108+
109+
def test_download_main_branch_when_non_existent_branch_entered():
110+
# given
111+
git_url = "https://github.com/LGE-OSS/example"
112+
target_dir = os.path.join(constants.TEST_RESULT_DIR, "download/example")
113+
branch_name = "non-existent-branch"
114+
expected_oss_name = "main"
115+
116+
# when
117+
success, _, oss_name, oss_version = download_git_clone(git_url, target_dir, branch=branch_name)
118+
119+
# then
120+
assert success is True
121+
assert len(os.listdir(target_dir)) > 0
122+
assert oss_name == 'LGE-OSS-example'
123+
assert oss_version == expected_oss_name
124+
125+
126+
def test_download_main_branch_when_non_existent_tag_entered():
127+
# given
128+
git_url = "https://github.com/LGE-OSS/example"
129+
target_dir = os.path.join(constants.TEST_RESULT_DIR, "download/example")
130+
tag_name = "non-existent-tag"
131+
expected_oss_name = "main"
132+
133+
# when
134+
success, _, oss_name, oss_version = download_git_clone(git_url, target_dir, tag=tag_name)
135+
136+
# then
137+
assert success is True
138+
assert len(os.listdir(target_dir)) > 0
139+
assert oss_name == 'LGE-OSS-example'
140+
assert oss_version == expected_oss_name

tox.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ exclude = .tox/*
2020
filterwarnings = ignore::DeprecationWarning
2121
norecursedirs = test_result/* tests/legacy
2222

23+
2324
[testenv:test_run]
2425
deps =
2526
-r{toxinidir}/requirements-dev.txt

0 commit comments

Comments
 (0)