1212import datetime
1313import os
1414import re
15+ import xml .etree .ElementTree as xml_tree
1516import yaml
1617from dataclasses import dataclass , field
1718from pathlib import Path
@@ -122,8 +123,7 @@ def _is_valid(self, value: str):
122123 return True
123124 valid = True
124125 if self .check_aws :
125- # All occurrences of AWS must be entities or within a word.
126- valid = len (re .findall ("(?<![&0-9a-zA-Z])AWS(?![;0-9a-zA-Z])" , value )) == 0
126+ valid = self ._validate_aws_entity_usage (value )
127127 if not valid :
128128 self .last_err = 'valid string: it contains a non-entity usage of "AWS"'
129129 if valid and self .upper_start :
@@ -135,27 +135,51 @@ def _is_valid(self, value: str):
135135 if not valid :
136136 self .last_err = "valid string: it must start with a lowercase letter"
137137 if valid and self .end_punc :
138- valid = value [- 1 ] in "!.?"
138+ valid = value . rstrip () [- 1 ] in "!.?"
139139 if not valid :
140140 self .last_err = "valid sentence or phrase: it must end with punctuation"
141141 if valid and self .no_end_punc :
142- valid = value [- 1 ] not in "!.?"
142+ valid = value . rstrip () [- 1 ] not in "!.?"
143143 if not valid :
144144 self .last_err = "valid string: it must not end with punctuation"
145145 if valid and self .end_punc_or_colon :
146- valid = value [- 1 ] in "!.?:"
146+ valid = value . rstrip () [- 1 ] in "!.?:"
147147 if not valid :
148148 self .last_err = (
149149 "valid sentence or phrase: it must end with punctuation or a colon"
150150 )
151151 if valid and self .end_punc_or_semicolon :
152- valid = value [- 1 ] in "!.?;"
152+ valid = value . rstrip () [- 1 ] in "!.?;"
153153 if not valid :
154154 self .last_err = "valid sentence or phrase: it must end with punctuation or a semicolon"
155155 if valid :
156156 valid = super ()._is_valid (value )
157157 return valid
158158
159+ @staticmethod
160+ def _validate_aws_entity_usage (value : str ) -> bool :
161+ """
162+ All occurrences of AWS must be entities or within a word or within a programlisting or code or noloc block.
163+
164+ Count all bare AWS occurrences within accepted XML tags.
165+ Count all bare AWS occurrences overall.
166+ If these counts differ, there's an invalid usage.
167+ """
168+ xval = value .replace ("&" , "&" )
169+ xtree = xml_tree .fromstring (f"<fake><para>{ xval } </para></fake>" )
170+ blocks = (
171+ xtree .findall (".//programlisting" )
172+ + xtree .findall (".//code" )
173+ + xtree .findall (".//noloc" )
174+ )
175+ aws_in_blocks = 0
176+ for element in blocks :
177+ aws_in_blocks += len (
178+ re .findall ("(?<![&0-9a-zA-Z])AWS(?![;0-9a-zA-Z])" , str (element .text ))
179+ )
180+ aws_everywhere = len (re .findall ("(?<![&0-9a-zA-Z])AWS(?![;0-9a-zA-Z])" , value ))
181+ return aws_everywhere == aws_in_blocks
182+
159183
160184@dataclass
161185class ValidateYamaleError (MetadataParseError ):
@@ -169,6 +193,7 @@ def validate_files(
169193 schema_name : Path ,
170194 meta_names : Iterable [Path ],
171195 validators : Dict [str , Validator ],
196+ strict : bool ,
172197 errors : MetadataErrors ,
173198):
174199 """Iterate a list of files and validate each one against a schema."""
@@ -177,14 +202,16 @@ def validate_files(
177202 for meta_name in meta_names :
178203 try :
179204 data = yamale .make_data (meta_name )
180- yamale .validate (schema , data )
205+ yamale .validate (schema , data , strict = strict )
181206 print (f"{ meta_name .resolve ()} validation success! 👍" )
182207 except YamaleError as e :
183208 errors .append (ValidateYamaleError (file = meta_name , yamale_error = e ))
184209 return errors
185210
186211
187- def validate_metadata (doc_gen_root : Path , errors : MetadataErrors ) -> MetadataErrors :
212+ def validate_metadata (
213+ doc_gen_root : Path , strict : bool , errors : MetadataErrors
214+ ) -> MetadataErrors :
188215 config = Path (__file__ ).parent / "config"
189216 with open (config / "sdks.yaml" ) as sdks_file :
190217 sdks_yaml : Dict [str , Any ] = yaml .safe_load (sdks_file )
@@ -206,20 +233,28 @@ def validate_metadata(doc_gen_root: Path, errors: MetadataErrors) -> MetadataErr
206233 validators [BlockContent .tag ] = BlockContent
207234 validators [String .tag ] = StringExtension
208235
209- schema_root = Path (__file__ ).parent / "config"
236+ config_root = Path (__file__ ).parent / "config"
237+ if strict :
238+ example_schema = "example_strict_schema.yaml"
239+ else :
240+ example_schema = "example_schema.yaml"
210241
211242 to_validate = [
212243 # (schema, metadata_glob)
213- ("sdks_schema.yaml" , "sdks.yaml" ),
214- ("services_schema.yaml" , "services.yaml" ),
215- # TODO: Switch between strict schema for aws-doc-sdk-examples and loose schema for tributaries
216- ("example_strict_schema.yaml" , "*_metadata.yaml" ),
244+ (config_root / "sdks_schema.yaml" , config_root , "sdks.yaml" ),
245+ (config_root / "services_schema.yaml" , config_root , "services.yaml" ),
246+ (
247+ config_root / example_schema ,
248+ doc_gen_root / ".doc_gen" / "metadata" ,
249+ "*_metadata.yaml" ,
250+ ),
217251 ]
218- for schema , metadata in to_validate :
252+ for schema , meta_root , metadata in to_validate :
219253 validate_files (
220- schema_root / schema ,
221- ( doc_gen_root / "metadata" ) .glob (metadata ),
254+ schema ,
255+ meta_root .glob (metadata ),
222256 validators ,
257+ strict ,
223258 errors ,
224259 )
225260
@@ -234,9 +269,12 @@ def main():
234269 help = "The folder that contains schema and metadata files." ,
235270 required = False ,
236271 )
272+ parser .add_argument (
273+ "--strict" , default = True , help = "Use strict schema." , required = False
274+ )
237275 args = parser .parse_args ()
238276
239- errors = validate_metadata (Path (args .doc_gen ), MetadataErrors ())
277+ errors = validate_metadata (Path (args .doc_gen ), args . strict , MetadataErrors ())
240278
241279 if len (errors ) == 0 :
242280 print ("Validation succeeded! 👍👍👍" )
0 commit comments