Skip to content

Commit cb136d4

Browse files
committed
Abstract filesystem operations in file_utils.py
- Add fs parameter to walk_with_gitignore and get_files functions - Replace direct filesystem calls with Fs interface methods - Use fs.stat(), fs.readlines(), and fs.list() instead of os.scandir() and open() - Exclude .gitignore files from walk results - Add comprehensive tests using RecordFs for deterministic testing - Test gitignore filtering, skip functions, and edge cases
1 parent f3aa1d1 commit cb136d4

File tree

2 files changed

+174
-11
lines changed

2 files changed

+174
-11
lines changed

aws_doc_sdk_examples_tools/file_utils.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from shutil import rmtree
99

1010
from pathspec import GitIgnoreSpec
11+
from .fs import Fs, PathFs
1112

1213

1314
def match_path_to_specs(path: Path, specs: List[GitIgnoreSpec]) -> bool:
@@ -21,7 +22,7 @@ def match_path_to_specs(path: Path, specs: List[GitIgnoreSpec]) -> bool:
2122

2223

2324
def walk_with_gitignore(
24-
root: Path, specs: List[GitIgnoreSpec] = []
25+
root: Path, specs: List[GitIgnoreSpec] = [], fs: Fs = PathFs()
2526
) -> Generator[Path, None, None]:
2627
"""
2728
Starting from a root directory, walk the file system yielding a path for each file.
@@ -30,27 +31,31 @@ def walk_with_gitignore(
3031
fiddling with a number of flags.
3132
"""
3233
gitignore = root / ".gitignore"
33-
if gitignore.exists():
34-
with open(root / ".gitignore", "r", encoding="utf-8") as ignore_file:
35-
specs = [*specs, GitIgnoreSpec.from_lines(ignore_file.readlines())]
36-
for entry in os.scandir(root):
37-
path = Path(entry.path)
34+
gitignore_stat = fs.stat(gitignore)
35+
if gitignore_stat.exists:
36+
lines = fs.readlines(gitignore)
37+
specs = [*specs, GitIgnoreSpec.from_lines(lines)]
38+
39+
for path in fs.list(root):
3840
if not match_path_to_specs(path, specs):
39-
if entry.is_dir():
40-
yield from walk_with_gitignore(path, specs)
41+
path_stat = fs.stat(path)
42+
if path_stat.is_dir:
43+
yield from walk_with_gitignore(path, specs, fs)
4144
else:
42-
yield path
45+
# Don't yield .gitignore files themselves
46+
if path.name != ".gitignore":
47+
yield path
4348

4449

4550
def get_files(
46-
root: Path, skip: Callable[[Path], bool] = lambda _: False
51+
root: Path, skip: Callable[[Path], bool] = lambda _: False, fs: Fs = PathFs()
4752
) -> Generator[Path, None, None]:
4853
"""
4954
Yield non-skipped files, that is, anything not matching git ls-files and not
5055
in the "to skip" files that are in git but are machine generated, so we don't
5156
want to validate them.
5257
"""
53-
for path in walk_with_gitignore(root):
58+
for path in walk_with_gitignore(root, fs=fs):
5459
if not skip(path):
5560
yield path
5661

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
"""
5+
Tests for file_utils.py with filesystem abstraction.
6+
"""
7+
8+
from pathlib import Path
9+
10+
from .fs import RecordFs
11+
from .file_utils import walk_with_gitignore, get_files
12+
13+
14+
class TestWalkWithGitignore:
15+
"""Test walk_with_gitignore with RecordFs."""
16+
17+
def test_basic_directory_traversal(self):
18+
"""Test basic directory traversal without gitignore."""
19+
fs = RecordFs(
20+
{
21+
Path("root/file1.py"): "print('file1')",
22+
Path("root/file2.js"): "console.log('file2')",
23+
}
24+
)
25+
26+
files = list(walk_with_gitignore(Path("root"), fs=fs))
27+
28+
expected = [
29+
Path("root/file1.py"),
30+
Path("root/file2.js"),
31+
]
32+
assert sorted(files) == sorted(expected)
33+
34+
def test_gitignore_filtering(self):
35+
"""Test that gitignore rules are applied correctly."""
36+
fs = RecordFs(
37+
{
38+
Path("root/.gitignore"): "*.tmp\n*.log\n",
39+
Path("root/keep.py"): "print('keep')",
40+
Path("root/ignore.tmp"): "temporary",
41+
Path("root/keep.js"): "console.log('keep')",
42+
Path("root/debug.log"): "log content",
43+
}
44+
)
45+
46+
files = list(walk_with_gitignore(Path("root"), fs=fs))
47+
48+
# .gitignore files should not be included in results
49+
expected = [
50+
Path("root/keep.py"),
51+
Path("root/keep.js"),
52+
]
53+
assert sorted(files) == sorted(expected)
54+
55+
def test_no_gitignore_file(self):
56+
"""Test directory traversal when no .gitignore exists."""
57+
fs = RecordFs(
58+
{
59+
Path("root/file1.py"): "print('file1')",
60+
Path("root/file2.js"): "console.log('file2')",
61+
Path("root/file3.txt"): "text content",
62+
}
63+
)
64+
65+
files = list(walk_with_gitignore(Path("root"), fs=fs))
66+
67+
expected = [
68+
Path("root/file1.py"),
69+
Path("root/file2.js"),
70+
Path("root/file3.txt"),
71+
]
72+
assert sorted(files) == sorted(expected)
73+
74+
def test_empty_directory(self):
75+
"""Test walking an empty directory."""
76+
fs = RecordFs({})
77+
78+
files = list(walk_with_gitignore(Path("empty"), fs=fs))
79+
80+
assert files == []
81+
82+
def test_directory_with_only_gitignore(self):
83+
"""Test directory that only contains .gitignore file."""
84+
fs = RecordFs(
85+
{
86+
Path("root/.gitignore"): "*.tmp\n",
87+
}
88+
)
89+
90+
files = list(walk_with_gitignore(Path("root"), fs=fs))
91+
92+
assert files == []
93+
94+
95+
class TestGetFiles:
96+
"""Test get_files with RecordFs."""
97+
98+
def test_get_files_basic(self):
99+
"""Test basic get_files functionality."""
100+
fs = RecordFs(
101+
{
102+
Path("root/file1.py"): "print('file1')",
103+
Path("root/file2.js"): "console.log('file2')",
104+
}
105+
)
106+
107+
files = list(get_files(Path("root"), fs=fs))
108+
109+
expected = [
110+
Path("root/file1.py"),
111+
Path("root/file2.js"),
112+
]
113+
assert sorted(files) == sorted(expected)
114+
115+
def test_get_files_with_skip_function(self):
116+
"""Test get_files with skip function."""
117+
fs = RecordFs(
118+
{
119+
Path("root/keep.py"): "print('keep')",
120+
Path("root/skip.py"): "print('skip')",
121+
Path("root/keep.js"): "console.log('keep')",
122+
Path("root/skip.js"): "console.log('skip')",
123+
}
124+
)
125+
126+
def skip_function(path: Path) -> bool:
127+
return "skip" in path.name
128+
129+
files = list(get_files(Path("root"), skip=skip_function, fs=fs))
130+
131+
expected = [
132+
Path("root/keep.py"),
133+
Path("root/keep.js"),
134+
]
135+
assert sorted(files) == sorted(expected)
136+
137+
def test_get_files_with_gitignore_and_skip(self):
138+
"""Test get_files with both gitignore and skip function."""
139+
fs = RecordFs(
140+
{
141+
Path("root/.gitignore"): "*.tmp\n",
142+
Path("root/keep.py"): "print('keep')",
143+
Path("root/skip.py"): "print('skip')",
144+
Path("root/ignore.tmp"): "temporary",
145+
Path("root/keep.js"): "console.log('keep')",
146+
}
147+
)
148+
149+
def skip_function(path: Path) -> bool:
150+
return "skip" in path.name
151+
152+
files = list(get_files(Path("root"), skip=skip_function, fs=fs))
153+
154+
expected = [
155+
Path("root/keep.py"),
156+
Path("root/keep.js"),
157+
]
158+
assert sorted(files) == sorted(expected)

0 commit comments

Comments
 (0)