Skip to content

Commit c4d3700

Browse files
Merge pull request #752 from abravalheri/fallback-files-command
Add fallback for `find_files` on git/hg archives
2 parents 0e20dba + ae13586 commit c4d3700

File tree

6 files changed

+91
-6
lines changed

6 files changed

+91
-6
lines changed

setup.cfg

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ setuptools.finalize_distribution_options =
5050
setuptools_scm.files_command =
5151
.hg = setuptools_scm.file_finder_hg:hg_find_files
5252
.git = setuptools_scm.file_finder_git:git_find_files
53+
setuptools_scm.files_command_fallback =
54+
.hg_archival.txt = setuptools_scm.file_finder_hg:hg_archive_find_files
55+
.git_archival.txt = setuptools_scm.file_finder_git:git_archive_find_files
5356
setuptools_scm.local_scheme =
5457
node-and-date = setuptools_scm.version:get_local_node_and_date
5558
node-and-timestamp = setuptools_scm.version:get_local_node_and_timestamp

src/setuptools_scm/file_finder.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,10 @@
1111

1212

1313
def scm_find_files(
14-
path: _t.PathT, scm_files: set[str], scm_dirs: set[str]
14+
path: _t.PathT,
15+
scm_files: set[str],
16+
scm_dirs: set[str],
17+
force_all_files: bool = False,
1518
) -> list[str]:
1619
""" setuptools compatible file finder that follows symlinks
1720
@@ -20,6 +23,7 @@ def scm_find_files(
2023
(including symlinks to directories)
2124
- scm_dirs: set of scm controlled directories
2225
(including directories containing no scm controlled files)
26+
- force_all_files: ignore ``scm_files`` and ``scm_dirs`` and list everything.
2327
2428
scm_files and scm_dirs must be absolute with symlinks resolved (realpath),
2529
with normalized case (normcase)
@@ -38,7 +42,7 @@ def _link_not_in_scm(n: str) -> bool:
3842
fn = os.path.join(realdirpath, os.path.normcase(n))
3943
return os.path.islink(fn) and fn not in scm_files
4044

41-
if realdirpath not in scm_dirs:
45+
if not force_all_files and realdirpath not in scm_dirs:
4246
# directory not in scm, don't walk it's content
4347
dirnames[:] = []
4448
continue
@@ -54,13 +58,16 @@ def _link_not_in_scm(n: str) -> bool:
5458
# symlink loop protection
5559
dirnames[:] = []
5660
continue
57-
dirnames[:] = [dn for dn in dirnames if not _link_not_in_scm(dn)]
61+
dirnames[:] = [
62+
dn for dn in dirnames if force_all_files or not _link_not_in_scm(dn)
63+
]
5864
for filename in filenames:
59-
if _link_not_in_scm(filename):
65+
if not force_all_files and _link_not_in_scm(filename):
6066
continue
6167
# dirpath + filename with symlinks preserved
6268
fullfilename = os.path.join(dirpath, filename)
63-
if os.path.normcase(os.path.realpath(fullfilename)) in scm_files:
69+
is_tracked = os.path.normcase(os.path.realpath(fullfilename)) in scm_files
70+
if force_all_files or is_tracked:
6471
res.append(os.path.join(path, os.path.relpath(fullfilename, realpath)))
6572
seen.add(realdirpath)
6673
return res

src/setuptools_scm/file_finder_git.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from .file_finder import is_toplevel_acceptable
1111
from .file_finder import scm_find_files
12+
from .utils import data_from_mime
1213
from .utils import do_ex
1314
from .utils import trace
1415

@@ -101,3 +102,20 @@ def git_find_files(path: _t.PathT = "") -> list[str]:
101102
trace("toplevel mismatch", toplevel, fullpath)
102103
git_files, git_dirs = _git_ls_files_and_dirs(toplevel)
103104
return scm_find_files(path, git_files, git_dirs)
105+
106+
107+
def git_archive_find_files(path: _t.PathT = "") -> list[str]:
108+
# This function assumes that ``path`` is obtained from a git archive
109+
# and therefore all the files that should be ignored were already removed.
110+
archival = os.path.join(path, ".git_archival.txt")
111+
if not os.path.exists(archival):
112+
return []
113+
114+
data = data_from_mime(archival)
115+
116+
if "$Format" in data.get("node", ""):
117+
# Substitutions have not been performed, so not a reliable archive
118+
return []
119+
120+
trace("git archive detected - fallback to listing all files")
121+
return scm_find_files(path, set(), set(), force_all_files=True)

src/setuptools_scm/file_finder_hg.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,16 @@
22

33
import os
44
import subprocess
5+
from typing import TYPE_CHECKING
56

67
from .file_finder import is_toplevel_acceptable
78
from .file_finder import scm_find_files
9+
from .utils import data_from_mime
810
from .utils import do_ex
11+
from .utils import trace
12+
13+
if TYPE_CHECKING:
14+
from . import _types as _t
915

1016

1117
def _hg_toplevel(path: str) -> str | None:
@@ -49,3 +55,20 @@ def hg_find_files(path: str = "") -> list[str]:
4955
assert toplevel is not None
5056
hg_files, hg_dirs = _hg_ls_files_and_dirs(toplevel)
5157
return scm_find_files(path, hg_files, hg_dirs)
58+
59+
60+
def hg_archive_find_files(path: _t.PathT = "") -> list[str]:
61+
# This function assumes that ``path`` is obtained from a mercurial archive
62+
# and therefore all the files that should be ignored were already removed.
63+
archival = os.path.join(path, ".hg_archival.txt")
64+
if not os.path.exists(archival):
65+
return []
66+
67+
data = data_from_mime(archival)
68+
69+
if "node" not in data:
70+
# Ensure file is valid
71+
return []
72+
73+
trace("hg archive detected - fallback to listing all files")
74+
return scm_find_files(path, set(), set(), force_all_files=True)

src/setuptools_scm/integration.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import itertools
34
import os
45
import warnings
56
from typing import Any
@@ -91,7 +92,10 @@ def version_keyword(
9192

9293

9394
def find_files(path: _t.PathT = "") -> list[str]:
94-
for ep in iter_entry_points("setuptools_scm.files_command"):
95+
for ep in itertools.chain(
96+
iter_entry_points("setuptools_scm.files_command"),
97+
iter_entry_points("setuptools_scm.files_command_fallback"),
98+
):
9599
command = ep.load()
96100
if isinstance(command, str):
97101
# this technique is deprecated

testing/test_file_finder.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,3 +201,33 @@ def test_symlink_not_in_scm_while_target_is(inwd: WorkDir) -> None:
201201
@pytest.mark.skip_commit
202202
def test_not_commited(inwd: WorkDir) -> None:
203203
assert find_files() == []
204+
205+
206+
def test_unexpanded_git_archival(wd: WorkDir, monkeypatch: pytest.MonkeyPatch) -> None:
207+
# When substitutions in `.git_archival.txt` are not expanded, files should
208+
# not be automatically listed.
209+
monkeypatch.chdir(wd.cwd)
210+
(wd.cwd / ".git_archival.txt").write_text("node: $Format:%H$", encoding="utf-8")
211+
(wd.cwd / "file1.txt").touch()
212+
assert find_files() == []
213+
214+
215+
@pytest.mark.parametrize("archive_file", (".git_archival.txt", ".hg_archival.txt"))
216+
def test_archive(
217+
wd: WorkDir, monkeypatch: pytest.MonkeyPatch, archive_file: str
218+
) -> None:
219+
# When substitutions in `.git_archival.txt` are not expanded, files should
220+
# not be automatically listed.
221+
monkeypatch.chdir(wd.cwd)
222+
sha = "a1bda3d984d1a40d7b00ae1d0869354d6d503001"
223+
(wd.cwd / archive_file).write_text(f"node: {sha}", encoding="utf-8")
224+
(wd.cwd / "data").mkdir()
225+
(wd.cwd / "data" / "datafile").touch()
226+
227+
datalink = wd.cwd / "data" / "datalink"
228+
if sys.platform != "win32":
229+
datalink.symlink_to("data/datafile")
230+
else:
231+
os.link("data/datafile", datalink)
232+
233+
assert set(find_files()) == _sep({archive_file, "data/datafile", "data/datalink"})

0 commit comments

Comments
 (0)