diff --git a/aws_doc_sdk_examples_tools/doc_gen_test.py b/aws_doc_sdk_examples_tools/doc_gen_test.py index 2ff205e..6e6aa72 100644 --- a/aws_doc_sdk_examples_tools/doc_gen_test.py +++ b/aws_doc_sdk_examples_tools/doc_gen_test.py @@ -12,7 +12,12 @@ from .categories import Category, TitleInfo from .doc_gen import DocGen, DocGenEncoder from .metadata import Example -from .metadata_errors import MetadataErrors, MetadataError, UnknownLanguage +from .metadata_errors import ( + MetadataErrors, + MetadataError, + UnknownLanguage, + ParseXMLError, +) from .sdks import Sdk, SdkVersion from .services import Service, ServiceExpanded from .snippets import Snippet @@ -304,3 +309,19 @@ def test_language_not_in_sdks(): ) doc_gen.process_metadata(doc_gen.root / "bad_language_example.yaml") assert isinstance(doc_gen.errors[0], UnknownLanguage) + + +def test_invalid_xml(): + errors = MetadataErrors() + doc_gen = DocGen(Path(), errors).for_root( + Path(__file__).parent / "test_resources", incremental=False + ) + doc_gen.process_metadata(doc_gen.root / "invalid_xml_metadata.yaml") + assert doc_gen.errors + first_error, *_ = doc_gen.errors + assert isinstance(first_error, ParseXMLError) + assert ( + first_error.value + == "Certain characters like < are invalid" + ) + assert first_error.message() == "not well-formed (invalid token): line 1, column 37" diff --git a/aws_doc_sdk_examples_tools/metadata_errors.py b/aws_doc_sdk_examples_tools/metadata_errors.py index 7f1e924..59534d5 100644 --- a/aws_doc_sdk_examples_tools/metadata_errors.py +++ b/aws_doc_sdk_examples_tools/metadata_errors.py @@ -6,7 +6,16 @@ import re from dataclasses import dataclass, field from pathlib import Path -from typing import Optional, Iterator, Iterable, List, TypeVar, Generic, Dict, Set +from typing import ( + Optional, + Iterator, + Iterable, + List, + TypeVar, + Generic, + Dict, + Set, +) ErrorT = TypeVar("ErrorT") @@ -108,6 +117,15 @@ def message(self) -> str: ) +@dataclass +class ParseXMLError(MetadataError): + xml_err_message: Optional[str] = field(default=None) + value: Optional[str] = field(default=None) + + def message(self) -> str: + return f"{self.xml_err_message}" + + @dataclass class MetadataParseError(MetadataError): id: Optional[str] = None diff --git a/aws_doc_sdk_examples_tools/metadata_validator.py b/aws_doc_sdk_examples_tools/metadata_validator.py index 5c91198..81f00e5 100755 --- a/aws_doc_sdk_examples_tools/metadata_validator.py +++ b/aws_doc_sdk_examples_tools/metadata_validator.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python3 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 @@ -18,6 +17,7 @@ from dataclasses import dataclass, field from pathlib import Path from typing import Any, Dict, Iterable, List, Optional, Set +from xml.etree.ElementTree import ParseError import yamale # type: ignore from yamale import YamaleError # type: ignore @@ -29,6 +29,12 @@ ) +class ElementTreeParseError(ParseError): + def __init__(self, message: str, raw: str): + super().__init__(message) + self.raw = raw + + class SdkVersion(Validator): """Validate that sdk version appears in sdks.yaml.""" @@ -163,11 +169,11 @@ def _validate_aws_entity_usage(value: str) -> bool: If these counts differ, there's an invalid usage. """ xval = value.replace("&", "&") + xml_str = f"{xval}" try: - xtree = xml_tree.fromstring(f"{xval}") - except Exception as e: - print(xval) - raise e + xtree = xml_tree.fromstring(xml_str) + except ParseError as e: + raise ElementTreeParseError(message=f"{e}", raw=xml_str) from e blocks = ( xtree.findall(".//programlisting") + xtree.findall(".//code") diff --git a/aws_doc_sdk_examples_tools/test_resources/invalid_xml_metadata.yaml b/aws_doc_sdk_examples_tools/test_resources/invalid_xml_metadata.yaml new file mode 100644 index 0000000..451b1bd --- /dev/null +++ b/aws_doc_sdk_examples_tools/test_resources/invalid_xml_metadata.yaml @@ -0,0 +1,39 @@ +medical-imaging_TestExample: + title: Certain characters like < are invalid + title_abbrev: Check whether a phone number is opted out + synopsis: check whether a phone number is opted out using some of the &AWS; SDKs that are available. + synopsis_list: + - Check the one thing. + - Do some other thing. + category: Usage + guide_topic: + title: Test guide topic title + url: test-guide/url + languages: + Java: + versions: + - sdk_version: 2 + github: test_path + block_content: test block + JavaScript: + versions: + - sdk_version: 3 + github_note_at_bottom: true + excerpts: + - description: Descriptive + genai: some + snippet_tags: + - medical-imaging.JavaScript.datastore.createDatastoreV3 + PHP: + versions: + - sdk_version: 3 + sdkguide: php/sdkguide/link + excerpts: + - description: Optional description. + snippet_tags: + - php.snippet.tag.1 + - php.snippet.tag.2 + snippet_files: + - snippet_file.txt + services: + medical-imaging: diff --git a/aws_doc_sdk_examples_tools/yaml_mapper.py b/aws_doc_sdk_examples_tools/yaml_mapper.py index d2bbef3..9db1166 100644 --- a/aws_doc_sdk_examples_tools/yaml_mapper.py +++ b/aws_doc_sdk_examples_tools/yaml_mapper.py @@ -16,7 +16,7 @@ from . import metadata_errors from .metadata_errors import MetadataErrors, DuplicateItemException, MetadataParseError from .project_validator import ValidationConfig -from .metadata_validator import StringExtension +from .metadata_validator import StringExtension, ElementTreeParseError CATEGORY_REQUIRED_FIELDS = {"IAMPolicy": {"version": {"authors", "owner", "source"}}} @@ -156,13 +156,23 @@ def get_field( return "" checker = StringExtension(check_aws=check_aws) - if not checker.is_valid(field): + try: + if not checker.is_valid(field): + errors.append( + metadata_errors.AwsNotEntity( + field=name, value=field, check_err=checker.get_name() + ) + ) + return "" + except ElementTreeParseError as e: errors.append( - metadata_errors.AwsNotEntity( - field=name, value=field, check_err=checker.get_name() + metadata_errors.ParseXMLError( + xml_err_message=e.msg, + value=e.raw, ) ) return "" + return field