diff --git a/aws_doc_sdk_examples_tools/doc_gen_test.py b/aws_doc_sdk_examples_tools/doc_gen_test.py
index 2ff205e..6e6aa72 100644
--- a/aws_doc_sdk_examples_tools/doc_gen_test.py
+++ b/aws_doc_sdk_examples_tools/doc_gen_test.py
@@ -12,7 +12,12 @@
from .categories import Category, TitleInfo
from .doc_gen import DocGen, DocGenEncoder
from .metadata import Example
-from .metadata_errors import MetadataErrors, MetadataError, UnknownLanguage
+from .metadata_errors import (
+ MetadataErrors,
+ MetadataError,
+ UnknownLanguage,
+ ParseXMLError,
+)
from .sdks import Sdk, SdkVersion
from .services import Service, ServiceExpanded
from .snippets import Snippet
@@ -304,3 +309,19 @@ def test_language_not_in_sdks():
)
doc_gen.process_metadata(doc_gen.root / "bad_language_example.yaml")
assert isinstance(doc_gen.errors[0], UnknownLanguage)
+
+
+def test_invalid_xml():
+ errors = MetadataErrors()
+ doc_gen = DocGen(Path(), errors).for_root(
+ Path(__file__).parent / "test_resources", incremental=False
+ )
+ doc_gen.process_metadata(doc_gen.root / "invalid_xml_metadata.yaml")
+ assert doc_gen.errors
+ first_error, *_ = doc_gen.errors
+ assert isinstance(first_error, ParseXMLError)
+ assert (
+ first_error.value
+ == "Certain characters like < are invalid"
+ )
+ assert first_error.message() == "not well-formed (invalid token): line 1, column 37"
diff --git a/aws_doc_sdk_examples_tools/metadata_errors.py b/aws_doc_sdk_examples_tools/metadata_errors.py
index 7f1e924..59534d5 100644
--- a/aws_doc_sdk_examples_tools/metadata_errors.py
+++ b/aws_doc_sdk_examples_tools/metadata_errors.py
@@ -6,7 +6,16 @@
import re
from dataclasses import dataclass, field
from pathlib import Path
-from typing import Optional, Iterator, Iterable, List, TypeVar, Generic, Dict, Set
+from typing import (
+ Optional,
+ Iterator,
+ Iterable,
+ List,
+ TypeVar,
+ Generic,
+ Dict,
+ Set,
+)
ErrorT = TypeVar("ErrorT")
@@ -108,6 +117,15 @@ def message(self) -> str:
)
+@dataclass
+class ParseXMLError(MetadataError):
+ xml_err_message: Optional[str] = field(default=None)
+ value: Optional[str] = field(default=None)
+
+ def message(self) -> str:
+ return f"{self.xml_err_message}"
+
+
@dataclass
class MetadataParseError(MetadataError):
id: Optional[str] = None
diff --git a/aws_doc_sdk_examples_tools/metadata_validator.py b/aws_doc_sdk_examples_tools/metadata_validator.py
index 5c91198..81f00e5 100755
--- a/aws_doc_sdk_examples_tools/metadata_validator.py
+++ b/aws_doc_sdk_examples_tools/metadata_validator.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
@@ -18,6 +17,7 @@
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Set
+from xml.etree.ElementTree import ParseError
import yamale # type: ignore
from yamale import YamaleError # type: ignore
@@ -29,6 +29,12 @@
)
+class ElementTreeParseError(ParseError):
+ def __init__(self, message: str, raw: str):
+ super().__init__(message)
+ self.raw = raw
+
+
class SdkVersion(Validator):
"""Validate that sdk version appears in sdks.yaml."""
@@ -163,11 +169,11 @@ def _validate_aws_entity_usage(value: str) -> bool:
If these counts differ, there's an invalid usage.
"""
xval = value.replace("&", "&")
+ xml_str = f"{xval}"
try:
- xtree = xml_tree.fromstring(f"{xval}")
- except Exception as e:
- print(xval)
- raise e
+ xtree = xml_tree.fromstring(xml_str)
+ except ParseError as e:
+ raise ElementTreeParseError(message=f"{e}", raw=xml_str) from e
blocks = (
xtree.findall(".//programlisting")
+ xtree.findall(".//code")
diff --git a/aws_doc_sdk_examples_tools/test_resources/invalid_xml_metadata.yaml b/aws_doc_sdk_examples_tools/test_resources/invalid_xml_metadata.yaml
new file mode 100644
index 0000000..451b1bd
--- /dev/null
+++ b/aws_doc_sdk_examples_tools/test_resources/invalid_xml_metadata.yaml
@@ -0,0 +1,39 @@
+medical-imaging_TestExample:
+ title: Certain characters like < are invalid
+ title_abbrev: Check whether a phone number is opted out
+ synopsis: check whether a phone number is opted out using some of the &AWS; SDKs that are available.
+ synopsis_list:
+ - Check the one thing.
+ - Do some other thing.
+ category: Usage
+ guide_topic:
+ title: Test guide topic title
+ url: test-guide/url
+ languages:
+ Java:
+ versions:
+ - sdk_version: 2
+ github: test_path
+ block_content: test block
+ JavaScript:
+ versions:
+ - sdk_version: 3
+ github_note_at_bottom: true
+ excerpts:
+ - description: Descriptive
+ genai: some
+ snippet_tags:
+ - medical-imaging.JavaScript.datastore.createDatastoreV3
+ PHP:
+ versions:
+ - sdk_version: 3
+ sdkguide: php/sdkguide/link
+ excerpts:
+ - description: Optional description.
+ snippet_tags:
+ - php.snippet.tag.1
+ - php.snippet.tag.2
+ snippet_files:
+ - snippet_file.txt
+ services:
+ medical-imaging:
diff --git a/aws_doc_sdk_examples_tools/yaml_mapper.py b/aws_doc_sdk_examples_tools/yaml_mapper.py
index d2bbef3..9db1166 100644
--- a/aws_doc_sdk_examples_tools/yaml_mapper.py
+++ b/aws_doc_sdk_examples_tools/yaml_mapper.py
@@ -16,7 +16,7 @@
from . import metadata_errors
from .metadata_errors import MetadataErrors, DuplicateItemException, MetadataParseError
from .project_validator import ValidationConfig
-from .metadata_validator import StringExtension
+from .metadata_validator import StringExtension, ElementTreeParseError
CATEGORY_REQUIRED_FIELDS = {"IAMPolicy": {"version": {"authors", "owner", "source"}}}
@@ -156,13 +156,23 @@ def get_field(
return ""
checker = StringExtension(check_aws=check_aws)
- if not checker.is_valid(field):
+ try:
+ if not checker.is_valid(field):
+ errors.append(
+ metadata_errors.AwsNotEntity(
+ field=name, value=field, check_err=checker.get_name()
+ )
+ )
+ return ""
+ except ElementTreeParseError as e:
errors.append(
- metadata_errors.AwsNotEntity(
- field=name, value=field, check_err=checker.get_name()
+ metadata_errors.ParseXMLError(
+ xml_err_message=e.msg,
+ value=e.raw,
)
)
return ""
+
return field