Skip to content

Commit 82ad61d

Browse files
authored
[Benchmarks] Add option for shallow clones (#20137)
To speed up cloning big repositories, add an option for a shallow clone.
1 parent b76f12e commit 82ad61d

File tree

2 files changed

+93
-20
lines changed

2 files changed

+93
-20
lines changed

devops/scripts/benchmarks/git_project.py

Lines changed: 89 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,16 @@ def __init__(
2020
name: str,
2121
force_rebuild: bool = False,
2222
no_suffix_src: bool = False,
23+
shallow_clone: bool = True,
2324
) -> None:
2425
self._url = url
2526
self._ref = ref
2627
self._directory = directory
2728
self._name = name
2829
self._force_rebuild = force_rebuild
2930
self._no_suffix_src = no_suffix_src
30-
self._rebuild_needed = self._git_clone()
31+
self._shallow_clone = shallow_clone
32+
self._rebuild_needed = self._setup_repo()
3133

3234
@property
3335
def src_dir(self) -> Path:
@@ -128,42 +130,109 @@ def install(self) -> None:
128130
"""Installs the project."""
129131
run(f"cmake --install {self.build_dir}")
130132

131-
def _git_clone(self) -> bool:
133+
def _can_shallow_clone_ref(self, ref: str) -> bool:
134+
"""Check if we can do a shallow clone with this ref using git ls-remote."""
135+
try:
136+
result = run(f"git ls-remote --heads --tags {self._url} {ref}")
137+
output = result.stdout.decode().strip()
138+
139+
if output:
140+
# Found the ref as a branch or tag
141+
log.debug(
142+
f"Ref {ref} found as branch/tag via ls-remote, can shallow clone"
143+
)
144+
return True
145+
else:
146+
# Not found as branch/tag, likely a SHA commit
147+
log.debug(
148+
f"Ref {ref} not found as branch/tag via ls-remote, likely SHA commit"
149+
)
150+
return False
151+
except Exception as e:
152+
log.debug(
153+
f"Could not check ref {ref} via ls-remote: {e}, assuming SHA commit"
154+
)
155+
return False
156+
157+
def _git_clone(self) -> None:
158+
"""Clone the git repository."""
159+
try:
160+
log.debug(f"Cloning {self._url} into {self.src_dir} at commit {self._ref}")
161+
git_clone_cmd = f"git clone --recursive {self._url} {self.src_dir}"
162+
if self._shallow_clone:
163+
if self._can_shallow_clone_ref(self._ref):
164+
# Shallow clone for branches and tags only
165+
git_clone_cmd = f"git clone --recursive --depth 1 --branch {self._ref} {self._url} {self.src_dir}"
166+
else:
167+
log.debug(f"Cannot shallow clone SHA {self._ref}, using full clone")
168+
169+
run(git_clone_cmd)
170+
run(f"git checkout {self._ref}", cwd=self.src_dir)
171+
log.debug(f"Cloned {self._url} into {self.src_dir} at commit {self._ref}")
172+
except Exception as e:
173+
log.error(f"Failed to clone repository {self._url}: {e}")
174+
raise
175+
176+
def _git_fetch(self) -> None:
177+
"""Fetch the latest changes from the remote repository."""
178+
try:
179+
log.debug(f"Fetching latest changes for {self._url} in {self.src_dir}")
180+
run("git fetch", cwd=self.src_dir)
181+
run("git reset --hard", cwd=self.src_dir)
182+
run(f"git checkout {self._ref}", cwd=self.src_dir)
183+
log.debug(f"Fetched latest changes for {self._url} in {self.src_dir}")
184+
except Exception as e:
185+
log.error(f"Failed to fetch updates for repository {self._url}: {e}")
186+
raise
187+
188+
def _setup_repo(self) -> bool:
132189
"""Clone a git repository into a specified directory at a specific commit.
133190
Returns:
134191
bool: True if the repository was cloned or updated, False if it was already up-to-date.
135192
"""
136-
log.debug(f"Cloning {self._url} into {self.src_dir} at commit {self._ref}")
137-
if self.src_dir.exists() and Path(self.src_dir, ".git").exists():
193+
if not self.src_dir.exists():
194+
self._git_clone()
195+
return True
196+
elif Path(self.src_dir, ".git").exists():
138197
log.debug(
139198
f"Repository {self._url} already exists at {self.src_dir}, checking for updates."
140199
)
141-
run("git fetch", cwd=self.src_dir)
142-
target_commit = (
143-
run(f"git rev-parse {self._ref}", cwd=self.src_dir)
200+
current_commit = (
201+
run("git rev-parse HEAD^{commit}", cwd=self.src_dir)
144202
.stdout.decode()
145203
.strip()
146204
)
147-
current_commit = (
148-
run("git rev-parse HEAD", cwd=self.src_dir).stdout.decode().strip()
149-
)
150-
if current_commit != target_commit:
151-
log.debug(
152-
f"Current commit {current_commit} does not match target {target_commit}, checking out {self._ref}."
205+
try:
206+
target_commit = (
207+
run(f"git rev-parse {self._ref}^{{commit}}", cwd=self.src_dir)
208+
.stdout.decode()
209+
.strip()
210+
)
211+
if current_commit != target_commit:
212+
log.debug(
213+
f"Current commit {current_commit} does not match target {target_commit}, checking out {self._ref}."
214+
)
215+
run("git reset --hard", cwd=self.src_dir)
216+
run(f"git checkout {self._ref}", cwd=self.src_dir)
217+
return True
218+
except Exception:
219+
log.error(
220+
f"Failed to resolve target commit {self._ref}. Fetching updates."
153221
)
154-
run("git reset --hard", cwd=self.src_dir)
155-
run(f"git checkout {self._ref}", cwd=self.src_dir)
222+
if self._shallow_clone:
223+
log.debug(f"Cloning a clean shallow copy.")
224+
shutil.rmtree(self.src_dir)
225+
self._git_clone()
226+
return True
227+
else:
228+
self._git_fetch()
229+
return True
156230
else:
157231
log.debug(
158232
f"Current commit {current_commit} matches target {target_commit}, no update needed."
159233
)
160234
return False
161-
elif not self.src_dir.exists():
162-
run(f"git clone --recursive {self._url} {self.src_dir}")
163-
run(f"git checkout {self._ref}", cwd=self.src_dir)
164235
else:
165236
raise Exception(
166237
f"The directory {self.src_dir} exists but is not a git repository."
167238
)
168-
log.debug(f"Cloned {self._url} into {self.src_dir} at commit {self._ref}")
169-
return True

devops/scripts/benchmarks/utils/compute_runtime.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,12 +90,14 @@ def build_igc(self, repo, commit) -> tuple[Path, bool]:
9090
rebuilt = False
9191
if igc_project.needs_rebuild(check_install=True):
9292
# Clone igc dependencies by creating a GitProject instance for each dependency.
93+
# Repos with commit hashes as refs can't be cloned shallowly.
9394
GitProject(
9495
"https://github.com/intel/vc-intrinsics",
9596
"9d255266e1df8f1dc5d11e1fbb03213acfaa4fc7",
9697
Path(options.workdir),
9798
"vc-intrinsics",
9899
no_suffix_src=True,
100+
shallow_clone=False,
99101
)
100102
llvm_project = GitProject(
101103
"https://github.com/llvm/llvm-project",
@@ -125,13 +127,15 @@ def build_igc(self, repo, commit) -> tuple[Path, bool]:
125127
Path(options.workdir),
126128
"SPIRV-Tools",
127129
no_suffix_src=True,
130+
shallow_clone=False,
128131
)
129132
GitProject(
130133
"https://github.com/KhronosGroup/SPIRV-Headers.git",
131134
"0e710677989b4326ac974fd80c5308191ed80965",
132135
Path(options.workdir),
133136
"SPIRV-Headers",
134137
no_suffix_src=True,
138+
shallow_clone=False,
135139
)
136140

137141
configure_args = [

0 commit comments

Comments
 (0)