Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 64 additions & 53 deletions aws_doc_sdk_examples_tools/doc_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# from os import glob

from .categories import Category, parse as parse_categories
from .fs import Fs, PathFs
from .metadata import (
Example,
DocFilenames,
Expand Down Expand Up @@ -55,6 +56,7 @@ class DocGenMergeWarning(MetadataError):
class DocGen:
root: Path
errors: MetadataErrors
fs: Fs = field(default_factory=PathFs)
entities: Dict[str, str] = field(default_factory=dict)
prefix: Optional[str] = None
validation: ValidationConfig = field(default_factory=ValidationConfig)
Expand Down Expand Up @@ -171,8 +173,12 @@ def extend_examples(self, examples: Iterable[Example], errors: MetadataErrors):
self.examples[id] = example

@classmethod
def empty(cls, validation: ValidationConfig = ValidationConfig()) -> "DocGen":
return DocGen(root=Path("/"), errors=MetadataErrors(), validation=validation)
def empty(
cls, validation: ValidationConfig = ValidationConfig(), fs: Fs = PathFs()
) -> "DocGen":
return DocGen(
root=Path("/"), errors=MetadataErrors(), validation=validation, fs=fs
)

@classmethod
def default(cls) -> "DocGen":
Expand All @@ -190,6 +196,7 @@ def clone(self) -> "DocGen":
snippet_files=set(),
cross_blocks=set(),
examples={},
fs=self.fs,
)

def for_root(
Expand All @@ -199,7 +206,7 @@ def for_root(

config = config or Path(__file__).parent / "config"

doc_gen = DocGen.empty()
doc_gen = DocGen.empty(fs=self.fs)
parse_config(doc_gen, root, config, self.validation.strict_titles)
self.merge(doc_gen)

Expand All @@ -209,31 +216,31 @@ def for_root(
return self

def find_and_process_metadata(self, metadata_path: Path):
for path in metadata_path.glob("*_metadata.yaml"):
for path in self.fs.glob(metadata_path, "*_metadata.yaml"):
self.process_metadata(path)

def process_metadata(self, path: Path) -> "DocGen":
if path in self._loaded:
return self
try:
with open(path) as file:
examples, errs = parse_examples(
path,
yaml.safe_load(file),
self.sdks,
self.services,
self.standard_categories,
self.cross_blocks,
self.validation,
)
self.extend_examples(examples, self.errors)
self.errors.extend(errs)
for example in examples:
for lang in example.languages:
language = example.languages[lang]
for version in language.versions:
for excerpt in version.excerpts:
self.snippet_files.update(excerpt.snippet_files)
content = self.fs.read(path)
examples, errs = parse_examples(
path,
yaml.safe_load(content),
self.sdks,
self.services,
self.standard_categories,
self.cross_blocks,
self.validation,
)
self.extend_examples(examples, self.errors)
self.errors.extend(errs)
for example in examples:
for lang in example.languages:
language = example.languages[lang]
for version in language.versions:
for excerpt in version.excerpts:
self.snippet_files.update(excerpt.snippet_files)
self._loaded.add(path)
except ParserError as e:
self.errors.append(YamlParseError(file=path, parser_error=str(e)))
Expand All @@ -246,8 +253,9 @@ def from_root(
config: Optional[Path] = None,
validation: ValidationConfig = ValidationConfig(),
incremental: bool = False,
fs: Fs = PathFs(),
) -> "DocGen":
return DocGen.empty(validation=validation).for_root(
return DocGen.empty(validation=validation, fs=fs).for_root(
root, config, incremental=incremental
)

Expand Down Expand Up @@ -348,6 +356,10 @@ def default(self, obj):
"__entity_errors__": [{error.entity: error.message()} for error in obj]
}

if isinstance(obj, Fs):
# Don't serialize filesystem objects for security
return {}

if isinstance(obj, set):
return {"__set__": list(obj)}

Expand All @@ -356,64 +368,63 @@ def default(self, obj):

def parse_config(doc_gen: DocGen, root: Path, config: Path, strict: bool):
try:
with open(root / ".doc_gen" / "validation.yaml", encoding="utf-8") as file:
validation = yaml.safe_load(file)
validation = validation or {}
doc_gen.validation.allow_list.update(validation.get("allow_list", []))
doc_gen.validation.sample_files.update(validation.get("sample_files", []))
content = doc_gen.fs.read(root / ".doc_gen" / "validation.yaml")
validation = yaml.safe_load(content)
validation = validation or {}
doc_gen.validation.allow_list.update(validation.get("allow_list", []))
doc_gen.validation.sample_files.update(validation.get("sample_files", []))
except Exception:
pass

try:
sdk_path = config / "sdks.yaml"
with sdk_path.open(encoding="utf-8") as file:
meta = yaml.safe_load(file)
sdks, errs = parse_sdks(sdk_path, meta, strict)
doc_gen.sdks = sdks
doc_gen.errors.extend(errs)
content = doc_gen.fs.read(sdk_path)
meta = yaml.safe_load(content)
sdks, errs = parse_sdks(sdk_path, meta, strict)
doc_gen.sdks = sdks
doc_gen.errors.extend(errs)
except Exception:
pass

try:
services_path = config / "services.yaml"
with services_path.open(encoding="utf-8") as file:
meta = yaml.safe_load(file)
services, service_errors = parse_services(services_path, meta)
doc_gen.services = services
for service in doc_gen.services.values():
if service.expanded:
doc_gen.entities[service.long] = service.expanded.long
doc_gen.entities[service.short] = service.expanded.short
doc_gen.errors.extend(service_errors)
content = doc_gen.fs.read(services_path)
meta = yaml.safe_load(content)
services, service_errors = parse_services(services_path, meta)
doc_gen.services = services
for service in doc_gen.services.values():
if service.expanded:
doc_gen.entities[service.long] = service.expanded.long
doc_gen.entities[service.short] = service.expanded.short
doc_gen.errors.extend(service_errors)
except Exception:
pass

try:
categories_path = config / "categories.yaml"
with categories_path.open(encoding="utf-8") as file:
meta = yaml.safe_load(file)
standard_categories, categories, errs = parse_categories(
categories_path, meta
)
doc_gen.standard_categories = standard_categories
doc_gen.categories = categories
doc_gen.errors.extend(errs)
content = doc_gen.fs.read(categories_path)
meta = yaml.safe_load(content)
standard_categories, categories, errs = parse_categories(categories_path, meta)
doc_gen.standard_categories = standard_categories
doc_gen.categories = categories
doc_gen.errors.extend(errs)
except Exception:
pass

try:
entities_config_path = config / "entities.yaml"
with entities_config_path.open(encoding="utf-8") as file:
entities_config = yaml.safe_load(file)
content = doc_gen.fs.read(entities_config_path)
entities_config = yaml.safe_load(content)
for entity, expanded in entities_config["expanded_override"].items():
doc_gen.entities[entity] = expanded
except Exception:
pass

metadata = root / ".doc_gen/metadata"
try:
cross_content_path = metadata.parent / "cross-content"
doc_gen.cross_blocks = set(
[path.name for path in (metadata.parent / "cross-content").glob("*.xml")]
[path.name for path in doc_gen.fs.glob(cross_content_path, "*.xml")]
)
except Exception:
pass
Expand Down
6 changes: 6 additions & 0 deletions aws_doc_sdk_examples_tools/doc_gen_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
from .sdks import Sdk, SdkVersion
from .services import Service, ServiceExpanded
from .snippets import Snippet
from .fs import PathFs

SHARED_FS = PathFs()


@pytest.mark.parametrize(
Expand All @@ -24,6 +27,7 @@
(
DocGen(
root=Path("/a"),
fs=SHARED_FS,
errors=MetadataErrors(),
sdks={
"a": Sdk(
Expand All @@ -43,6 +47,7 @@
),
DocGen(
root=Path("/b"),
fs=SHARED_FS,
errors=MetadataErrors(),
sdks={
"b": Sdk(
Expand All @@ -62,6 +67,7 @@
),
DocGen(
root=Path("/a"),
fs=SHARED_FS,
errors=MetadataErrors(),
sdks={
"a": Sdk(
Expand Down
27 changes: 16 additions & 11 deletions aws_doc_sdk_examples_tools/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from shutil import rmtree

from pathspec import GitIgnoreSpec
from aws_doc_sdk_examples_tools.fs import Fs, PathFs


def match_path_to_specs(path: Path, specs: List[GitIgnoreSpec]) -> bool:
Expand All @@ -21,7 +22,7 @@ def match_path_to_specs(path: Path, specs: List[GitIgnoreSpec]) -> bool:


def walk_with_gitignore(
root: Path, specs: List[GitIgnoreSpec] = []
root: Path, specs: List[GitIgnoreSpec] = [], fs: Fs = PathFs()
) -> Generator[Path, None, None]:
"""
Starting from a root directory, walk the file system yielding a path for each file.
Expand All @@ -30,27 +31,31 @@ def walk_with_gitignore(
fiddling with a number of flags.
"""
gitignore = root / ".gitignore"
if gitignore.exists():
with open(root / ".gitignore", "r", encoding="utf-8") as ignore_file:
specs = [*specs, GitIgnoreSpec.from_lines(ignore_file.readlines())]
for entry in os.scandir(root):
path = Path(entry.path)
gitignore_stat = fs.stat(gitignore)
if gitignore_stat.exists:
lines = fs.readlines(gitignore)
specs = [*specs, GitIgnoreSpec.from_lines(lines)]

for path in fs.list(root):
if not match_path_to_specs(path, specs):
if entry.is_dir():
yield from walk_with_gitignore(path, specs)
path_stat = fs.stat(path)
if path_stat.is_dir:
yield from walk_with_gitignore(path, specs, fs)
else:
yield path
# Don't yield .gitignore files themselves
if path.name != ".gitignore":
yield path


def get_files(
root: Path, skip: Callable[[Path], bool] = lambda _: False
root: Path, skip: Callable[[Path], bool] = lambda _: False, fs: Fs = PathFs()
) -> Generator[Path, None, None]:
"""
Yield non-skipped files, that is, anything not matching git ls-files and not
in the "to skip" files that are in git but are machine generated, so we don't
want to validate them.
"""
for path in walk_with_gitignore(root):
for path in walk_with_gitignore(root, fs=fs):
if not skip(path):
yield path

Expand Down
Loading
Loading