|
12 | 12 | import datetime |
13 | 13 | import os |
14 | 14 | import re |
| 15 | +import xml.etree.ElementTree as xml_tree |
15 | 16 | import yaml |
16 | 17 | from dataclasses import dataclass, field |
17 | 18 | from pathlib import Path |
@@ -122,8 +123,7 @@ def _is_valid(self, value: str): |
122 | 123 | return True |
123 | 124 | valid = True |
124 | 125 | if self.check_aws: |
125 | | - # All occurrences of AWS must be entities or within a word. |
126 | | - valid = len(re.findall("(?<![&0-9a-zA-Z])AWS(?![;0-9a-zA-Z])", value)) == 0 |
| 126 | + valid = self._validate_aws_entity_usage(value) |
127 | 127 | if not valid: |
128 | 128 | self.last_err = 'valid string: it contains a non-entity usage of "AWS"' |
129 | 129 | if valid and self.upper_start: |
@@ -156,6 +156,23 @@ def _is_valid(self, value: str): |
156 | 156 | valid = super()._is_valid(value) |
157 | 157 | return valid |
158 | 158 |
|
| 159 | + @staticmethod |
| 160 | + def _validate_aws_entity_usage(value: str) -> bool: |
| 161 | + """ |
| 162 | + All occurrences of AWS must be entities or within a word or within a programlisting or code or noloc block. |
| 163 | +
|
| 164 | + Count all bare AWS occurrences within accepted XML tags. |
| 165 | + Count all bare AWS occurrences overall. |
| 166 | + If these counts differ, there's an invalid usage. |
| 167 | + """ |
| 168 | + xtree = xml_tree.fromstring(f"<fake>{value.replace('&', '&')}</fake>") |
| 169 | + blocks = xtree.findall("programlisting") + xtree.findall("code") + xtree.findall("noloc") |
| 170 | + aws_in_blocks = 0 |
| 171 | + for element in blocks: |
| 172 | + aws_in_blocks += len(re.findall("(?<![&0-9a-zA-Z])AWS(?![;0-9a-zA-Z])", element.text)) |
| 173 | + aws_everywhere = len(re.findall("(?<![&0-9a-zA-Z])AWS(?![;0-9a-zA-Z])", value)) |
| 174 | + return aws_everywhere == aws_in_blocks |
| 175 | + |
159 | 176 |
|
160 | 177 | @dataclass |
161 | 178 | class ValidateYamaleError(MetadataParseError): |
|
0 commit comments