Skip to content

Commit d4bd910

Browse files
committed
Improve error handling around XML parsing.
1 parent acab725 commit d4bd910

File tree

5 files changed

+108
-10
lines changed

5 files changed

+108
-10
lines changed

aws_doc_sdk_examples_tools/doc_gen_test.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,12 @@
1212
from .categories import Category, TitleInfo
1313
from .doc_gen import DocGen, DocGenEncoder
1414
from .metadata import Example
15-
from .metadata_errors import MetadataErrors, MetadataError, UnknownLanguage
15+
from .metadata_errors import (
16+
MetadataErrors,
17+
MetadataError,
18+
UnknownLanguage,
19+
ParseXMLError,
20+
)
1621
from .sdks import Sdk, SdkVersion
1722
from .services import Service, ServiceExpanded
1823
from .snippets import Snippet
@@ -304,3 +309,22 @@ def test_language_not_in_sdks():
304309
)
305310
doc_gen.process_metadata(doc_gen.root / "bad_language_example.yaml")
306311
assert isinstance(doc_gen.errors[0], UnknownLanguage)
312+
313+
314+
def test_invalid_xml():
315+
errors = MetadataErrors()
316+
doc_gen = DocGen(Path(), errors).for_root(
317+
Path(__file__).parent / "test_resources", incremental=False
318+
)
319+
doc_gen.process_metadata(doc_gen.root / "invalid_xml_metadata.yaml")
320+
assert doc_gen.errors
321+
first_error, *_ = doc_gen.errors
322+
assert isinstance(first_error, ParseXMLError)
323+
assert (
324+
first_error.value
325+
== "<fake><para>Certain characters like < are invalid</para></fake>"
326+
)
327+
assert (
328+
first_error.message()
329+
== "ParseError('not well-formed (invalid token): line 1, column 37')"
330+
)

aws_doc_sdk_examples_tools/metadata_errors.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,16 @@
66
import re
77
from dataclasses import dataclass, field
88
from pathlib import Path
9-
from typing import Optional, Iterator, Iterable, List, TypeVar, Generic, Dict, Set
9+
from typing import (
10+
Optional,
11+
Iterator,
12+
Iterable,
13+
List,
14+
TypeVar,
15+
Generic,
16+
Dict,
17+
Set,
18+
)
1019

1120

1221
ErrorT = TypeVar("ErrorT")
@@ -108,6 +117,15 @@ def message(self) -> str:
108117
)
109118

110119

120+
@dataclass
121+
class ParseXMLError(MetadataError):
122+
xml_err_message: Optional[str] = field(default=None)
123+
value: Optional[str] = field(default=None)
124+
125+
def message(self) -> str:
126+
return f"{self.xml_err_message}"
127+
128+
111129
@dataclass
112130
class MetadataParseError(MetadataError):
113131
id: Optional[str] = None

aws_doc_sdk_examples_tools/metadata_validator.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from dataclasses import dataclass, field
1919
from pathlib import Path
2020
from typing import Any, Dict, Iterable, List, Optional, Set
21+
from xml.etree.ElementTree import ParseError
2122

2223
import yamale # type: ignore
2324
from yamale import YamaleError # type: ignore
@@ -29,6 +30,12 @@
2930
)
3031

3132

33+
class ElementTreeParseError(ParseError):
34+
def __init__(self, message: str, raw: str):
35+
super().__init__(message)
36+
self.raw = raw
37+
38+
3239
class SdkVersion(Validator):
3340
"""Validate that sdk version appears in sdks.yaml."""
3441

@@ -163,11 +170,11 @@ def _validate_aws_entity_usage(value: str) -> bool:
163170
If these counts differ, there's an invalid usage.
164171
"""
165172
xval = value.replace("&", "&amp;")
173+
xml_str = f"<fake><para>{xval}</para></fake>"
166174
try:
167-
xtree = xml_tree.fromstring(f"<fake><para>{xval}</para></fake>")
168-
except Exception as e:
169-
print(xval)
170-
raise e
175+
xtree = xml_tree.fromstring(xml_str)
176+
except ParseError as e:
177+
raise ElementTreeParseError(message=repr(e), raw=xml_str)
171178
blocks = (
172179
xtree.findall(".//programlisting")
173180
+ xtree.findall(".//code")
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
medical-imaging_TestExample:
2+
title: Certain characters like < are invalid
3+
title_abbrev: Check whether a phone number is opted out
4+
synopsis: check whether a phone number is opted out using some of the &AWS; SDKs that are available.
5+
synopsis_list:
6+
- Check the one thing.
7+
- Do some other thing.
8+
category: Usage
9+
guide_topic:
10+
title: Test guide topic title
11+
url: test-guide/url
12+
languages:
13+
Java:
14+
versions:
15+
- sdk_version: 2
16+
github: test_path
17+
block_content: test block
18+
JavaScript:
19+
versions:
20+
- sdk_version: 3
21+
github_note_at_bottom: true
22+
excerpts:
23+
- description: Descriptive
24+
genai: some
25+
snippet_tags:
26+
- medical-imaging.JavaScript.datastore.createDatastoreV3
27+
PHP:
28+
versions:
29+
- sdk_version: 3
30+
sdkguide: php/sdkguide/link
31+
excerpts:
32+
- description: Optional description.
33+
snippet_tags:
34+
- php.snippet.tag.1
35+
- php.snippet.tag.2
36+
snippet_files:
37+
- snippet_file.txt
38+
services:
39+
medical-imaging:

aws_doc_sdk_examples_tools/yaml_mapper.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from . import metadata_errors
1717
from .metadata_errors import MetadataErrors, DuplicateItemException, MetadataParseError
1818
from .project_validator import ValidationConfig
19-
from .metadata_validator import StringExtension
19+
from .metadata_validator import StringExtension, ElementTreeParseError
2020

2121

2222
CATEGORY_REQUIRED_FIELDS = {"IAMPolicy": {"version": {"authors", "owner", "source"}}}
@@ -156,13 +156,23 @@ def get_field(
156156
return ""
157157

158158
checker = StringExtension(check_aws=check_aws)
159-
if not checker.is_valid(field):
159+
try:
160+
if not checker.is_valid(field):
161+
errors.append(
162+
metadata_errors.AwsNotEntity(
163+
field=name, value=field, check_err=checker.get_name()
164+
)
165+
)
166+
return ""
167+
except ElementTreeParseError as e:
160168
errors.append(
161-
metadata_errors.AwsNotEntity(
162-
field=name, value=field, check_err=checker.get_name()
169+
metadata_errors.ParseXMLError(
170+
xml_err_message=e.msg,
171+
value=e.raw,
163172
)
164173
)
165174
return ""
175+
166176
return field
167177

168178

0 commit comments

Comments
 (0)