Skip to content

Commit 58ecee6

Browse files
authored
Validator: Re-enable yamale validator. (#124)
* Re-enable yamale validator. * Update relaxed schema and use it for tributaries by using strict_titles arg to control strict schema. * Allow bare AWS within code-style XML tags. * Write unit tests for yamale validation.
1 parent 7425df6 commit 58ecee6

File tree

8 files changed

+153
-62
lines changed

8 files changed

+153
-62
lines changed
Lines changed: 19 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,60 +1,43 @@
11
# Yamale Schema for example metadata, which is all .yaml files in the metadata folder
22
# with a _metadata.yaml suffix.
33

4-
map(include('example'), key=example_id())
4+
map(include('example'), key=str())
55
---
6-
# An example blocks all the languages together for a single example in a tab list. It is a navigable page on the code examples library. It is the top level "unit" of SoS content. This metadata is merged from tributaries with aws-doc-sdk-examples.
76
example:
8-
# Human readable title. TODO: Defaults to slug-to-title of the ID if not provided. Overwritten by aws-doc-sdk-example when merging.
9-
title: str(upper_start=True, no_end_punc=True, required=False)
10-
# Used in the TOC. TODO: Defaults to slug-to-title of the ID if not provided. Overwritten by aws-doc-sdk-example when merging.
11-
title_abbrev: str(upper_start=True, no_end_punc=True, required=False)
12-
# String label categories. Categories inferred by cross-service with multiple services, and can be whatever else it wants. Controls where in the TOC it appears. Overwritten by aws-doc-sdk-example when merging.
13-
category: str(upper_start=True, no_end_punc=True, required=False)
14-
# Link to additional topic places. Overwritten by aws-doc-sdk-example when merging.
15-
guide_topic: include('guide_topic', required=False) # TODO Make this a list or a single.
16-
# TODO how to add a language here and require it in sdks_schema. TODO: Keys merged by aws-doc-sdk-example when merging.
17-
languages: map(include('language'), key=enum('Bash', 'C++', 'CLI', 'Go', 'Java', 'JavaScript', 'Kotlin', '.NET', 'PHP', 'Python', 'Ruby', 'Rust', 'SAP ABAP', 'Swift'))
18-
# TODO document service_main and services. Not to be used by tributaries. Part of Cross Service.
19-
# List of services used by the examples. Lines up with those in services.yaml. Overwritten by aws-doc-sdk-example when merging.
7+
title: str(required=False, upper_start=True, no_end_punc=True)
8+
title_abbrev: str(required=False, upper_start=True, no_end_punc=True)
9+
synopsis: str(required=False)
10+
synopsis_list: list(str(upper_start=True), required=False)
11+
category: str(required=False, upper_start=True, no_end_punc=True)
12+
guide_topic: include('guide_topic', required=False)
13+
languages: map(include('language'), key=enum('Bash', 'C++', 'CLI', 'Go', 'Java', 'JavaScript', 'Kotlin', '.NET', 'PHP', 'PowerShell', 'Python', 'Ruby', 'Rust', 'SAP ABAP', 'Swift'))
2014
service_main: service_name(required=False)
21-
services: map(map(key=str(), required=False), key=service_name())
22-
synopsis: str(required=False, lower_start=True, end_punc_or_semicolon=True, required=False)
23-
synopsis_list: list(str(upper_start=True, end_punc=True), required=False)
15+
services: map(key=service_name())
2416

25-
# Used for creating links in the block.
2617
guide_topic:
2718
title: str(upper_start=True, no_end_punc=True)
2819
url: include('doc_url', required=False)
2920

30-
# Language Version configuration. Likely just the single list item.
3121
language:
3222
versions: list(include('version'))
3323

34-
# Example for a single Language.
24+
# Per-language excerpts for the example. Languages and SDK versions are defined in .doc_gen/metadata/sdk_metadata.yaml
3525
version:
36-
sdk_version: int(min=1)
37-
# Additional ZonBook XML to include in the tab for this sample.
38-
block_content: block_content(required=False)
39-
# The specific code samples to include in the example.
40-
excerpts: list(include('excerpt'), required=False)
41-
# Link to the source code for this example. TODO rename.
26+
sdk_version: any(int(min=1), str(check_aws=False))
4227
github: str(required=False)
28+
github_name: str(required=False)
4329
github_note_at_bottom: bool(required=False)
44-
add_services: map(key=service_name(), required=False)
45-
# Deprecated. Replace with guide_topic list.
4630
sdkguide: include('doc_url', required=False)
47-
# Link to additional topic places. TODO: Overwritten by aws-doc-sdk-example when merging.
48-
more_info: list(include('guide_topic', required=False))
31+
excerpts: list(include('excerpt'), required=False)
32+
block_content: block_content(required=False)
33+
add_services: map(key=service_name(), required=False)
4934

50-
# One language example can have several excerpts, each having a description block and one or more snippets.
51-
# An excerpt may have either snippet_files OR snippet_tags, but not both.
35+
# The references to code content that will be included in the example's content.
5236
excerpt:
53-
description: str(required=False, upper_start=True, end_punc=True)
54-
# A path within the repo to extract the entire file as a snippet.
55-
snippet_files: list(str(), required=False)
56-
# Tags embedded in source files to extract as snippets.
37+
description: str(required=False)
38+
genai: enum('none', 'some', 'most', 'all', required=False)
5739
snippet_tags: list(str(), required=False)
40+
snippet_files: list(str(), required=False)
5841

5942
service_slug_regex: regex('^[-a-z0-9]+$', name='service slug')
6043
doc_url: regex('^(?!https://docs.aws.amazon.com/).+', name="relative documentation URL")

aws_doc_sdk_examples_tools/config/example_strict_schema.yaml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@
44
map(include('example'), key=example_id())
55
---
66
example:
7-
title: str(upper_start=True, no_end_punc=True)
8-
title_abbrev: str(upper_start=True, no_end_punc=True)
7+
title: str(required=False, upper_start=True, no_end_punc=True)
8+
title_abbrev: str(required=False, upper_start=True, no_end_punc=True)
99
synopsis: str(required=False, lower_start=True, end_punc_or_semicolon=True)
1010
synopsis_list: list(str(upper_start=True, end_punc=True), required=False)
1111
category: str(required=False, upper_start=True, no_end_punc=True)
1212
guide_topic: include('guide_topic', required=False)
13-
languages: map(include('language'), key=enum('Bash', 'C++', 'CLI', 'Go', 'Java', 'JavaScript', 'Kotlin', '.NET', 'PHP', 'Python', 'Ruby', 'Rust', 'SAP ABAP', 'Swift'))
13+
languages: map(include('language'), key=enum('Bash', 'C++', 'CLI', 'Go', 'Java', 'JavaScript', 'Kotlin', '.NET', 'PHP', 'PowerShell', 'Python', 'Ruby', 'Rust', 'SAP ABAP', 'Swift'))
1414
service_main: service_name(required=False)
1515
services: map(map(key=str(), required=False), key=service_name())
1616

@@ -34,6 +34,7 @@ version:
3434
# The references to code content that will be included in the example's content.
3535
excerpt:
3636
description: str(required=False, upper_start=True, end_punc=True)
37+
genai: enum('none', 'some', 'most', 'all', required=False)
3738
snippet_tags: list(str(), required=False)
3839
snippet_files: list(str(), required=False)
3940

aws_doc_sdk_examples_tools/config/sdks_schema.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Yamale Schema for SDK metadata, which is the sdks.yaml file in the metadata folder.
22

3-
map(include('sdk'), key=enum('Bash', 'C++', 'CLI', 'Go', 'Java', 'JavaScript', 'Kotlin', '.NET', 'PHP', 'Python', 'Ruby', 'Rust', 'SAP ABAP', 'Swift'))
3+
map(include('sdk'), key=enum('Bash', 'C++', 'CLI', 'Go', 'Java', 'JavaScript', 'Kotlin', '.NET', 'PHP', 'PowerShell', 'Python', 'Ruby', 'Rust', 'SAP ABAP', 'Swift'))
44
---
55
sdk:
66
property: include('syntax_enum')
@@ -30,6 +30,6 @@ title_override:
3030
title: str()
3131
title_abbrev: str()
3232

33-
syntax_enum: enum('bash', 'cli', 'none', 'cpp', 'go', 'java', 'javascript', 'kotlin', 'csharp', 'php', 'python', 'ruby', 'rust', 'sap-abap', 'sh', 'swift')
33+
syntax_enum: enum('bash', 'cli', 'none', 'cpp', 'go', 'java', 'javascript', 'kotlin', 'csharp', 'php', 'powershell', 'python', 'ruby', 'rust', 'sap-abap', 'sh', 'swift')
3434
entity_regex: regex('^&[-_a-zA-Z0-9]+;$', name='valid entity')
3535
entity_with_version_regex: regex('^&[-_a-zA-Z0-9]+;', name='valid entity with version')

aws_doc_sdk_examples_tools/config/services_schema.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
map(include('service'), key=regex('^[-a-z0-9]+$', name='service slug'))
44
---
55
service:
6-
long: include('long_entity_regex')
7-
short: include('entity_regex')
6+
long: str()
7+
short: str()
88
sort: regex('^[^&]\\w', name='non-entity')
99
chapter_override: include('chapter_override', required=False)
1010
expanded:
@@ -16,7 +16,7 @@ service:
1616
url: include('doc_url')
1717
api_client: service_name(required=False)
1818
api_ref: include('doc_url')
19-
version: service_version()
19+
version: str()
2020
caveat: str(required=False, upper_start=True, end_punc=True)
2121
bundle: service_name(required=False)
2222
tags: map(key=enum('product_categories'))

aws_doc_sdk_examples_tools/doc_gen.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ def validate(self):
270270
service.validate(self.errors)
271271
for example in self.examples.values():
272272
example.validate(self.errors, self.root)
273-
validate_metadata(self.root, self.errors)
273+
validate_metadata(self.root, self.validation.strict_titles, self.errors)
274274
validate_no_duplicate_api_examples(self.examples.values(), self.errors)
275275
validate_snippets(
276276
[*self.examples.values()],

aws_doc_sdk_examples_tools/metadata_validator.py

Lines changed: 55 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import datetime
1313
import os
1414
import re
15+
import xml.etree.ElementTree as xml_tree
1516
import yaml
1617
from dataclasses import dataclass, field
1718
from pathlib import Path
@@ -122,8 +123,7 @@ def _is_valid(self, value: str):
122123
return True
123124
valid = True
124125
if self.check_aws:
125-
# All occurrences of AWS must be entities or within a word.
126-
valid = len(re.findall("(?<![&0-9a-zA-Z])AWS(?![;0-9a-zA-Z])", value)) == 0
126+
valid = self._validate_aws_entity_usage(value)
127127
if not valid:
128128
self.last_err = 'valid string: it contains a non-entity usage of "AWS"'
129129
if valid and self.upper_start:
@@ -135,27 +135,51 @@ def _is_valid(self, value: str):
135135
if not valid:
136136
self.last_err = "valid string: it must start with a lowercase letter"
137137
if valid and self.end_punc:
138-
valid = value[-1] in "!.?"
138+
valid = value.rstrip()[-1] in "!.?"
139139
if not valid:
140140
self.last_err = "valid sentence or phrase: it must end with punctuation"
141141
if valid and self.no_end_punc:
142-
valid = value[-1] not in "!.?"
142+
valid = value.rstrip()[-1] not in "!.?"
143143
if not valid:
144144
self.last_err = "valid string: it must not end with punctuation"
145145
if valid and self.end_punc_or_colon:
146-
valid = value[-1] in "!.?:"
146+
valid = value.rstrip()[-1] in "!.?:"
147147
if not valid:
148148
self.last_err = (
149149
"valid sentence or phrase: it must end with punctuation or a colon"
150150
)
151151
if valid and self.end_punc_or_semicolon:
152-
valid = value[-1] in "!.?;"
152+
valid = value.rstrip()[-1] in "!.?;"
153153
if not valid:
154154
self.last_err = "valid sentence or phrase: it must end with punctuation or a semicolon"
155155
if valid:
156156
valid = super()._is_valid(value)
157157
return valid
158158

159+
@staticmethod
160+
def _validate_aws_entity_usage(value: str) -> bool:
161+
"""
162+
All occurrences of AWS must be entities or within a word or within a programlisting or code or noloc block.
163+
164+
Count all bare AWS occurrences within accepted XML tags.
165+
Count all bare AWS occurrences overall.
166+
If these counts differ, there's an invalid usage.
167+
"""
168+
xval = value.replace("&", "&amp;")
169+
xtree = xml_tree.fromstring(f"<fake><para>{xval}</para></fake>")
170+
blocks = (
171+
xtree.findall(".//programlisting")
172+
+ xtree.findall(".//code")
173+
+ xtree.findall(".//noloc")
174+
)
175+
aws_in_blocks = 0
176+
for element in blocks:
177+
aws_in_blocks += len(
178+
re.findall("(?<![&0-9a-zA-Z])AWS(?![;0-9a-zA-Z])", str(element.text))
179+
)
180+
aws_everywhere = len(re.findall("(?<![&0-9a-zA-Z])AWS(?![;0-9a-zA-Z])", value))
181+
return aws_everywhere == aws_in_blocks
182+
159183

160184
@dataclass
161185
class ValidateYamaleError(MetadataParseError):
@@ -169,6 +193,7 @@ def validate_files(
169193
schema_name: Path,
170194
meta_names: Iterable[Path],
171195
validators: Dict[str, Validator],
196+
strict: bool,
172197
errors: MetadataErrors,
173198
):
174199
"""Iterate a list of files and validate each one against a schema."""
@@ -177,14 +202,16 @@ def validate_files(
177202
for meta_name in meta_names:
178203
try:
179204
data = yamale.make_data(meta_name)
180-
yamale.validate(schema, data)
205+
yamale.validate(schema, data, strict=strict)
181206
print(f"{meta_name.resolve()} validation success! 👍")
182207
except YamaleError as e:
183208
errors.append(ValidateYamaleError(file=meta_name, yamale_error=e))
184209
return errors
185210

186211

187-
def validate_metadata(doc_gen_root: Path, errors: MetadataErrors) -> MetadataErrors:
212+
def validate_metadata(
213+
doc_gen_root: Path, strict: bool, errors: MetadataErrors
214+
) -> MetadataErrors:
188215
config = Path(__file__).parent / "config"
189216
with open(config / "sdks.yaml") as sdks_file:
190217
sdks_yaml: Dict[str, Any] = yaml.safe_load(sdks_file)
@@ -206,20 +233,28 @@ def validate_metadata(doc_gen_root: Path, errors: MetadataErrors) -> MetadataErr
206233
validators[BlockContent.tag] = BlockContent
207234
validators[String.tag] = StringExtension
208235

209-
schema_root = Path(__file__).parent / "config"
236+
config_root = Path(__file__).parent / "config"
237+
if strict:
238+
example_schema = "example_strict_schema.yaml"
239+
else:
240+
example_schema = "example_schema.yaml"
210241

211242
to_validate = [
212243
# (schema, metadata_glob)
213-
("sdks_schema.yaml", "sdks.yaml"),
214-
("services_schema.yaml", "services.yaml"),
215-
# TODO: Switch between strict schema for aws-doc-sdk-examples and loose schema for tributaries
216-
("example_strict_schema.yaml", "*_metadata.yaml"),
244+
(config_root / "sdks_schema.yaml", config_root, "sdks.yaml"),
245+
(config_root / "services_schema.yaml", config_root, "services.yaml"),
246+
(
247+
config_root / example_schema,
248+
doc_gen_root / ".doc_gen" / "metadata",
249+
"*_metadata.yaml",
250+
),
217251
]
218-
for schema, metadata in to_validate:
252+
for schema, meta_root, metadata in to_validate:
219253
validate_files(
220-
schema_root / schema,
221-
(doc_gen_root / "metadata").glob(metadata),
254+
schema,
255+
meta_root.glob(metadata),
222256
validators,
257+
strict,
223258
errors,
224259
)
225260

@@ -234,9 +269,12 @@ def main():
234269
help="The folder that contains schema and metadata files.",
235270
required=False,
236271
)
272+
parser.add_argument(
273+
"--strict", default=True, help="Use strict schema.", required=False
274+
)
237275
args = parser.parse_args()
238276

239-
errors = validate_metadata(Path(args.doc_gen), MetadataErrors())
277+
errors = validate_metadata(Path(args.doc_gen), args.strict, MetadataErrors())
240278

241279
if len(errors) == 0:
242280
print("Validation succeeded! 👍👍👍")
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#!/usr/bin/env python3
2+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
from pathlib import Path
6+
7+
import pytest
8+
9+
from .metadata_errors import MetadataErrors
10+
from .metadata_validator import validate_metadata
11+
12+
13+
@pytest.mark.parametrize("strict", [True, False])
14+
def test_aws_entity_usage(strict):
15+
errors = MetadataErrors()
16+
validate_metadata(
17+
Path(Path(__file__).parent / "test_resources/doc_gen_test"), strict, errors
18+
)
19+
20+
e_str = str(errors)
21+
assert "Title has AWS" in e_str
22+
assert "Title Abbrev has AWS" in e_str
23+
assert "Synopsis has AWS" in e_str
24+
assert "Synopsis list has AWS" in e_str
25+
assert "Description has AWS" in e_str
26+
27+
assert "Title has &AWS;" not in e_str
28+
assert "Title Abbrev has &AWS;" not in e_str
29+
assert "Synopsis programlisting has AWS" not in e_str
30+
assert "Synopsis list code has <code>AWS" not in e_str
31+
assert "Description programlisting has AWS" not in e_str
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
sns_EntityFailures:
2+
title: Title has AWS using an &AWS; SDK
3+
title_abbrev: Title Abbrev has AWS in it
4+
synopsis: "Synopsis has AWS in it."
5+
synopsis_list:
6+
- "Synopsis list has AWS in it."
7+
category: Cat
8+
languages:
9+
Java:
10+
versions:
11+
- sdk_version: 1
12+
github: java/example_code/svc_EntityFailures
13+
sdkguide:
14+
excerpts:
15+
- description: Description has AWS in it.
16+
snippet_tags:
17+
- java.example_code.svc_EntityFailures.Test
18+
services:
19+
sns:
20+
sns_EntitySuccesses:
21+
title: Title has &AWS; using an &AWS; SDK
22+
title_abbrev: Title Abbrev has &AWS; in it
23+
synopsis: "this <programlisting>Synopsis programlisting has AWS in it.</programlisting>."
24+
synopsis_list:
25+
- "Synopsis list code has <code>AWS</code> in it."
26+
category: Cat
27+
languages:
28+
Java:
29+
versions:
30+
- sdk_version: 1
31+
github: java/example_code/svc_EntityFailures
32+
sdkguide:
33+
excerpts:
34+
- description: This <emphasis><programlisting>Description programlisting has AWS in it</programlisting></emphasis> doesn't it.
35+
snippet_tags:
36+
- java.example_code.svc_EntityFailures.Test
37+
services:
38+
sns:

0 commit comments

Comments
 (0)