Skip to content

Commit 144fa6e

Browse files
authored
Refactor DocGen to use filesystem abstraction (#178)
* Refactor DocGen to use filesystem abstraction * Add readlines method to Fs interface and implementations - Add abstract readlines method to Fs class - Implement readlines in PathFs using file.readlines() - Implement readlines in RecordFs using splitlines(keepends=True) - Add comprehensive tests for both implementations - Test various scenarios: empty files, no final newline, different line endings * Abstract filesystem operations in file_utils.py (#179) * Abstract filesystem operations in file_utils.py - Add fs parameter to walk_with_gitignore and get_files functions - Replace direct filesystem calls with Fs interface methods - Use fs.stat(), fs.readlines(), and fs.list() instead of os.scandir() and open() - Exclude .gitignore files from walk results - Add comprehensive tests using RecordFs for deterministic testing - Test gitignore filtering, skip functions, and edge cases * Address PR feedback: use fully qualified imports, absolute paths, and add nested gitignore test - Use fully qualified imports in file_utils.py and file_utils_test.py - Convert all test paths to absolute paths as requested - Add comprehensive nested gitignore test case - Improve RecordFs.list() method to handle intermediate directories properly * Hide all FS serialization in DocGenEncoder * Handle test cleanup better.
1 parent 8aaa767 commit 144fa6e

File tree

6 files changed

+411
-66
lines changed

6 files changed

+411
-66
lines changed

aws_doc_sdk_examples_tools/doc_gen.py

Lines changed: 64 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
# from os import glob
1717

1818
from .categories import Category, parse as parse_categories
19+
from .fs import Fs, PathFs
1920
from .metadata import (
2021
Example,
2122
DocFilenames,
@@ -55,6 +56,7 @@ class DocGenMergeWarning(MetadataError):
5556
class DocGen:
5657
root: Path
5758
errors: MetadataErrors
59+
fs: Fs = field(default_factory=PathFs)
5860
entities: Dict[str, str] = field(default_factory=dict)
5961
prefix: Optional[str] = None
6062
validation: ValidationConfig = field(default_factory=ValidationConfig)
@@ -171,8 +173,12 @@ def extend_examples(self, examples: Iterable[Example], errors: MetadataErrors):
171173
self.examples[id] = example
172174

173175
@classmethod
174-
def empty(cls, validation: ValidationConfig = ValidationConfig()) -> "DocGen":
175-
return DocGen(root=Path("/"), errors=MetadataErrors(), validation=validation)
176+
def empty(
177+
cls, validation: ValidationConfig = ValidationConfig(), fs: Fs = PathFs()
178+
) -> "DocGen":
179+
return DocGen(
180+
root=Path("/"), errors=MetadataErrors(), validation=validation, fs=fs
181+
)
176182

177183
@classmethod
178184
def default(cls) -> "DocGen":
@@ -190,6 +196,7 @@ def clone(self) -> "DocGen":
190196
snippet_files=set(),
191197
cross_blocks=set(),
192198
examples={},
199+
fs=self.fs,
193200
)
194201

195202
def for_root(
@@ -199,7 +206,7 @@ def for_root(
199206

200207
config = config or Path(__file__).parent / "config"
201208

202-
doc_gen = DocGen.empty()
209+
doc_gen = DocGen.empty(fs=self.fs)
203210
parse_config(doc_gen, root, config, self.validation.strict_titles)
204211
self.merge(doc_gen)
205212

@@ -209,31 +216,31 @@ def for_root(
209216
return self
210217

211218
def find_and_process_metadata(self, metadata_path: Path):
212-
for path in metadata_path.glob("*_metadata.yaml"):
219+
for path in self.fs.glob(metadata_path, "*_metadata.yaml"):
213220
self.process_metadata(path)
214221

215222
def process_metadata(self, path: Path) -> "DocGen":
216223
if path in self._loaded:
217224
return self
218225
try:
219-
with open(path) as file:
220-
examples, errs = parse_examples(
221-
path,
222-
yaml.safe_load(file),
223-
self.sdks,
224-
self.services,
225-
self.standard_categories,
226-
self.cross_blocks,
227-
self.validation,
228-
)
229-
self.extend_examples(examples, self.errors)
230-
self.errors.extend(errs)
231-
for example in examples:
232-
for lang in example.languages:
233-
language = example.languages[lang]
234-
for version in language.versions:
235-
for excerpt in version.excerpts:
236-
self.snippet_files.update(excerpt.snippet_files)
226+
content = self.fs.read(path)
227+
examples, errs = parse_examples(
228+
path,
229+
yaml.safe_load(content),
230+
self.sdks,
231+
self.services,
232+
self.standard_categories,
233+
self.cross_blocks,
234+
self.validation,
235+
)
236+
self.extend_examples(examples, self.errors)
237+
self.errors.extend(errs)
238+
for example in examples:
239+
for lang in example.languages:
240+
language = example.languages[lang]
241+
for version in language.versions:
242+
for excerpt in version.excerpts:
243+
self.snippet_files.update(excerpt.snippet_files)
237244
self._loaded.add(path)
238245
except ParserError as e:
239246
self.errors.append(YamlParseError(file=path, parser_error=str(e)))
@@ -246,8 +253,9 @@ def from_root(
246253
config: Optional[Path] = None,
247254
validation: ValidationConfig = ValidationConfig(),
248255
incremental: bool = False,
256+
fs: Fs = PathFs(),
249257
) -> "DocGen":
250-
return DocGen.empty(validation=validation).for_root(
258+
return DocGen.empty(validation=validation, fs=fs).for_root(
251259
root, config, incremental=incremental
252260
)
253261

@@ -348,6 +356,10 @@ def default(self, obj):
348356
"__entity_errors__": [{error.entity: error.message()} for error in obj]
349357
}
350358

359+
if isinstance(obj, Fs):
360+
# Don't serialize filesystem objects for security
361+
return {}
362+
351363
if isinstance(obj, set):
352364
return {"__set__": list(obj)}
353365

@@ -356,64 +368,63 @@ def default(self, obj):
356368

357369
def parse_config(doc_gen: DocGen, root: Path, config: Path, strict: bool):
358370
try:
359-
with open(root / ".doc_gen" / "validation.yaml", encoding="utf-8") as file:
360-
validation = yaml.safe_load(file)
361-
validation = validation or {}
362-
doc_gen.validation.allow_list.update(validation.get("allow_list", []))
363-
doc_gen.validation.sample_files.update(validation.get("sample_files", []))
371+
content = doc_gen.fs.read(root / ".doc_gen" / "validation.yaml")
372+
validation = yaml.safe_load(content)
373+
validation = validation or {}
374+
doc_gen.validation.allow_list.update(validation.get("allow_list", []))
375+
doc_gen.validation.sample_files.update(validation.get("sample_files", []))
364376
except Exception:
365377
pass
366378

367379
try:
368380
sdk_path = config / "sdks.yaml"
369-
with sdk_path.open(encoding="utf-8") as file:
370-
meta = yaml.safe_load(file)
371-
sdks, errs = parse_sdks(sdk_path, meta, strict)
372-
doc_gen.sdks = sdks
373-
doc_gen.errors.extend(errs)
381+
content = doc_gen.fs.read(sdk_path)
382+
meta = yaml.safe_load(content)
383+
sdks, errs = parse_sdks(sdk_path, meta, strict)
384+
doc_gen.sdks = sdks
385+
doc_gen.errors.extend(errs)
374386
except Exception:
375387
pass
376388

377389
try:
378390
services_path = config / "services.yaml"
379-
with services_path.open(encoding="utf-8") as file:
380-
meta = yaml.safe_load(file)
381-
services, service_errors = parse_services(services_path, meta)
382-
doc_gen.services = services
383-
for service in doc_gen.services.values():
384-
if service.expanded:
385-
doc_gen.entities[service.long] = service.expanded.long
386-
doc_gen.entities[service.short] = service.expanded.short
387-
doc_gen.errors.extend(service_errors)
391+
content = doc_gen.fs.read(services_path)
392+
meta = yaml.safe_load(content)
393+
services, service_errors = parse_services(services_path, meta)
394+
doc_gen.services = services
395+
for service in doc_gen.services.values():
396+
if service.expanded:
397+
doc_gen.entities[service.long] = service.expanded.long
398+
doc_gen.entities[service.short] = service.expanded.short
399+
doc_gen.errors.extend(service_errors)
388400
except Exception:
389401
pass
390402

391403
try:
392404
categories_path = config / "categories.yaml"
393-
with categories_path.open(encoding="utf-8") as file:
394-
meta = yaml.safe_load(file)
395-
standard_categories, categories, errs = parse_categories(
396-
categories_path, meta
397-
)
398-
doc_gen.standard_categories = standard_categories
399-
doc_gen.categories = categories
400-
doc_gen.errors.extend(errs)
405+
content = doc_gen.fs.read(categories_path)
406+
meta = yaml.safe_load(content)
407+
standard_categories, categories, errs = parse_categories(categories_path, meta)
408+
doc_gen.standard_categories = standard_categories
409+
doc_gen.categories = categories
410+
doc_gen.errors.extend(errs)
401411
except Exception:
402412
pass
403413

404414
try:
405415
entities_config_path = config / "entities.yaml"
406-
with entities_config_path.open(encoding="utf-8") as file:
407-
entities_config = yaml.safe_load(file)
416+
content = doc_gen.fs.read(entities_config_path)
417+
entities_config = yaml.safe_load(content)
408418
for entity, expanded in entities_config["expanded_override"].items():
409419
doc_gen.entities[entity] = expanded
410420
except Exception:
411421
pass
412422

413423
metadata = root / ".doc_gen/metadata"
414424
try:
425+
cross_content_path = metadata.parent / "cross-content"
415426
doc_gen.cross_blocks = set(
416-
[path.name for path in (metadata.parent / "cross-content").glob("*.xml")]
427+
[path.name for path in doc_gen.fs.glob(cross_content_path, "*.xml")]
417428
)
418429
except Exception:
419430
pass

aws_doc_sdk_examples_tools/doc_gen_test.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
from .sdks import Sdk, SdkVersion
1717
from .services import Service, ServiceExpanded
1818
from .snippets import Snippet
19+
from .fs import PathFs
20+
21+
SHARED_FS = PathFs()
1922

2023

2124
@pytest.mark.parametrize(
@@ -24,6 +27,7 @@
2427
(
2528
DocGen(
2629
root=Path("/a"),
30+
fs=SHARED_FS,
2731
errors=MetadataErrors(),
2832
sdks={
2933
"a": Sdk(
@@ -43,6 +47,7 @@
4347
),
4448
DocGen(
4549
root=Path("/b"),
50+
fs=SHARED_FS,
4651
errors=MetadataErrors(),
4752
sdks={
4853
"b": Sdk(
@@ -62,6 +67,7 @@
6267
),
6368
DocGen(
6469
root=Path("/a"),
70+
fs=SHARED_FS,
6571
errors=MetadataErrors(),
6672
sdks={
6773
"a": Sdk(

aws_doc_sdk_examples_tools/file_utils.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from shutil import rmtree
99

1010
from pathspec import GitIgnoreSpec
11+
from aws_doc_sdk_examples_tools.fs import Fs, PathFs
1112

1213

1314
def match_path_to_specs(path: Path, specs: List[GitIgnoreSpec]) -> bool:
@@ -21,7 +22,7 @@ def match_path_to_specs(path: Path, specs: List[GitIgnoreSpec]) -> bool:
2122

2223

2324
def walk_with_gitignore(
24-
root: Path, specs: List[GitIgnoreSpec] = []
25+
root: Path, specs: List[GitIgnoreSpec] = [], fs: Fs = PathFs()
2526
) -> Generator[Path, None, None]:
2627
"""
2728
Starting from a root directory, walk the file system yielding a path for each file.
@@ -30,27 +31,31 @@ def walk_with_gitignore(
3031
fiddling with a number of flags.
3132
"""
3233
gitignore = root / ".gitignore"
33-
if gitignore.exists():
34-
with open(root / ".gitignore", "r", encoding="utf-8") as ignore_file:
35-
specs = [*specs, GitIgnoreSpec.from_lines(ignore_file.readlines())]
36-
for entry in os.scandir(root):
37-
path = Path(entry.path)
34+
gitignore_stat = fs.stat(gitignore)
35+
if gitignore_stat.exists:
36+
lines = fs.readlines(gitignore)
37+
specs = [*specs, GitIgnoreSpec.from_lines(lines)]
38+
39+
for path in fs.list(root):
3840
if not match_path_to_specs(path, specs):
39-
if entry.is_dir():
40-
yield from walk_with_gitignore(path, specs)
41+
path_stat = fs.stat(path)
42+
if path_stat.is_dir:
43+
yield from walk_with_gitignore(path, specs, fs)
4144
else:
42-
yield path
45+
# Don't yield .gitignore files themselves
46+
if path.name != ".gitignore":
47+
yield path
4348

4449

4550
def get_files(
46-
root: Path, skip: Callable[[Path], bool] = lambda _: False
51+
root: Path, skip: Callable[[Path], bool] = lambda _: False, fs: Fs = PathFs()
4752
) -> Generator[Path, None, None]:
4853
"""
4954
Yield non-skipped files, that is, anything not matching git ls-files and not
5055
in the "to skip" files that are in git but are machine generated, so we don't
5156
want to validate them.
5257
"""
53-
for path in walk_with_gitignore(root):
58+
for path in walk_with_gitignore(root, fs=fs):
5459
if not skip(path):
5560
yield path
5661

0 commit comments

Comments
 (0)