Skip to content

Commit 69df132

Browse files
committed
perf: only consider tracked files when building toolchain hashes
Issue: #663
1 parent 02a58db commit 69df132

File tree

3 files changed

+48
-8
lines changed

3 files changed

+48
-8
lines changed

src/taskgraph/util/hash.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@
44

55
import functools
66
import hashlib
7-
from pathlib import Path
7+
import os
88

99
from taskgraph.util import path as mozpath
10+
from taskgraph.util.vcs import get_repository
1011

1112

1213
@functools.lru_cache(maxsize=None)
@@ -52,8 +53,5 @@ def _find_matching_files(base_path, pattern):
5253

5354
@functools.lru_cache(maxsize=None)
5455
def _get_all_files(base_path):
55-
return [
56-
mozpath.normsep(str(path))
57-
for path in Path(base_path).rglob("*")
58-
if path.is_file()
59-
]
56+
repo = get_repository(os.getcwd())
57+
return repo.get_tracked_files(base_path)

src/taskgraph/util/vcs.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def base_rev(self) -> str:
6969

7070
@property
7171
@abstractmethod
72-
def branch(self) -> str | None:
72+
def branch(self) -> Optional[str]:
7373
"""Current branch or bookmark the checkout has active."""
7474

7575
@property
@@ -125,6 +125,15 @@ def get_url(self, remote: Optional[str]) -> str:
125125
def get_commit_message(self, revision: Optional[str]) -> str:
126126
"""Commit message of specified revision or current commit."""
127127

128+
@abstractmethod
129+
def get_tracked_files(self, *paths: str, rev: Optional[str] = None) -> List[str]:
130+
"""Return list of tracked files.
131+
132+
``*paths`` are path specifiers to limit results to.
133+
``rev`` is a revision specifier at which to retrieve the files.
134+
Defaults to the parent of the working copy if unspecified.
135+
"""
136+
128137
@abstractmethod
129138
def get_changed_files(
130139
self,
@@ -280,6 +289,10 @@ def _files_template(self, diff_filter):
280289
template += "{file_mods % '{file}\\n'}"
281290
return template
282291

292+
def get_tracked_files(self, *paths, rev=None):
293+
rev = rev or "."
294+
return self.run("files", "-r", rev, *paths).splitlines()
295+
283296
def get_changed_files(self, diff_filter=None, mode=None, rev=None, base_rev=None):
284297
diff_filter = diff_filter or "ADM"
285298
if rev is None:
@@ -462,6 +475,10 @@ def get_commit_message(self, revision=None):
462475
revision = revision or "HEAD"
463476
return self.run("log", "-n1", "--format=%B", revision)
464477

478+
def get_tracked_files(self, *paths, rev=None):
479+
rev = rev or "HEAD"
480+
return self.run("ls-tree", "-r", "--name-only", rev, *paths).splitlines()
481+
465482
def get_changed_files(self, diff_filter=None, mode=None, rev=None, base_rev=None):
466483
diff_filter = diff_filter or "ADM"
467484
mode = mode or "unstaged"

test/test_util_vcs.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import os
66
import subprocess
7+
from pathlib import Path
78
from textwrap import dedent
89

910
import pytest
@@ -211,7 +212,31 @@ def test_default_branch_cloned_metadata(tmpdir, default_git_branch, repo):
211212

212213

213214
def assert_files(actual, expected):
214-
assert set(map(os.path.basename, actual)) == set(expected)
215+
assert set(actual) == set(expected)
216+
217+
218+
def test_get_tracked_files(repo):
219+
assert_files(repo.get_tracked_files(), ["first_file"])
220+
221+
second_file = Path(repo.path) / "subdir" / "second_file"
222+
second_file.parent.mkdir()
223+
second_file.write_text("foo")
224+
assert_files(repo.get_tracked_files(), ["first_file"])
225+
226+
repo.run("add", str(second_file))
227+
assert_files(repo.get_tracked_files(), ["first_file"])
228+
229+
repo.run("commit", "-m", "Add second file")
230+
rev = ".~1" if repo.tool == "hg" else "HEAD~1"
231+
assert_files(repo.get_tracked_files(), ["first_file", "subdir/second_file"])
232+
assert_files(repo.get_tracked_files("subdir"), ["subdir/second_file"])
233+
assert_files(repo.get_tracked_files(rev=rev), ["first_file"])
234+
235+
if repo.tool == "git":
236+
assert_files(repo.get_tracked_files("subdir", rev=rev), [])
237+
elif repo.tool == "hg":
238+
with pytest.raises(subprocess.CalledProcessError):
239+
repo.get_tracked_files("subdir", rev=rev)
215240

216241

217242
def test_get_changed_files_no_changes(repo):

0 commit comments

Comments
 (0)