@@ -36,14 +36,13 @@ def fetch_sitemap(url):
3636 return response .text
3737
3838def prettify_xml (element ):
39- """Prettify and return a string representation of the XML without XML declaration."""
39+ """Prettify and return a string representation of the XML with XML declaration including encoding ."""
4040 rough_string = ET .tostring (element , encoding = 'utf-8' )
4141 reparsed = minidom .parseString (rough_string )
42- pretty = reparsed .toprettyxml (indent = " " )
43- # Remove the XML declaration
44- lines = pretty .split ('\n ' )
45- lines = [line for line in lines if line .strip () and not line .strip ().startswith ('<?xml' )]
46- return '\n ' .join (lines )
42+ # Specify encoding to include it in the XML declaration
43+ pretty = reparsed .toprettyxml (indent = " " , encoding = "UTF-8" )
44+ # Decode bytes to string for writing to file
45+ return pretty .decode ('UTF-8' )
4746
4847def encode_url (url ):
4948 """Encode the URL to make it XML-safe and RFC-compliant."""
@@ -144,7 +143,7 @@ def main():
144143
145144 new_root .append (url_entry )
146145
147- # Save prettified XML to file without XML declaration
146+ # Save prettified XML to file with XML declaration including encoding
148147 beautified_xml = prettify_xml (new_root )
149148 with open ("sitemap.xml" , "w" , encoding = "utf-8" ) as f :
150149 f .write (beautified_xml )
0 commit comments