Skip to content

Commit 4df7786

Browse files
authored
Update generate_sitemap.py
1 parent 28bfaca commit 4df7786

File tree

1 file changed

+6
-7
lines changed

1 file changed

+6
-7
lines changed

generate_sitemap.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,13 @@ def fetch_sitemap(url):
3636
return response.text
3737

3838
def prettify_xml(element):
39-
"""Prettify and return a string representation of the XML without XML declaration."""
39+
"""Prettify and return a string representation of the XML with XML declaration including encoding."""
4040
rough_string = ET.tostring(element, encoding='utf-8')
4141
reparsed = minidom.parseString(rough_string)
42-
pretty = reparsed.toprettyxml(indent=" ")
43-
# Remove the XML declaration
44-
lines = pretty.split('\n')
45-
lines = [line for line in lines if line.strip() and not line.strip().startswith('<?xml')]
46-
return '\n'.join(lines)
42+
# Specify encoding to include it in the XML declaration
43+
pretty = reparsed.toprettyxml(indent=" ", encoding="UTF-8")
44+
# Decode bytes to string for writing to file
45+
return pretty.decode('UTF-8')
4746

4847
def encode_url(url):
4948
"""Encode the URL to make it XML-safe and RFC-compliant."""
@@ -144,7 +143,7 @@ def main():
144143

145144
new_root.append(url_entry)
146145

147-
# Save prettified XML to file without XML declaration
146+
# Save prettified XML to file with XML declaration including encoding
148147
beautified_xml = prettify_xml(new_root)
149148
with open("sitemap.xml", "w", encoding="utf-8") as f:
150149
f.write(beautified_xml)

0 commit comments

Comments
 (0)