Skip to content

Commit 7fb5d07

Browse files
authored
Update generate_sitemap.py
1 parent 64f0729 commit 7fb5d07

File tree

1 file changed

+36
-12
lines changed

1 file changed

+36
-12
lines changed

generate_sitemap.py

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,24 @@ def encode_url(url):
5353
"""Encode the URL to make it XML-safe and RFC-compliant."""
5454
return quote(url, safe=":/?&=") # Leave common URL-safe characters untouched
5555

56+
def add_static_urls(root, urls):
57+
"""Add static URLs to the sitemap."""
58+
for url in urls:
59+
url_element = ET.Element('{http://www.sitemaps.org/schemas/sitemap/0.9}url')
60+
61+
loc = ET.SubElement(url_element, '{http://www.sitemaps.org/schemas/sitemap/0.9}loc')
62+
loc.text = encode_url(url)
63+
64+
# Add translations for each language
65+
for lang_code, hreflang in languages.items():
66+
translated_url = encode_url(f"{url}/{lang_code}")
67+
alt_link = ET.SubElement(url_element, '{http://www.w3.org/1999/xhtml}link')
68+
alt_link.set('rel', 'alternate')
69+
alt_link.set('hreflang', hreflang)
70+
alt_link.set('href', translated_url)
71+
72+
root.append(url_element)
73+
5674
def main():
5775
# URLs of the sitemaps
5876
book_sitemap_url = "https://book.hacktricks.xyz/sitemap.xml"
@@ -83,6 +101,23 @@ def main():
83101
loc.text = encode_url("https://www.hacktricks.xyz/")
84102
new_root.append(static_url)
85103

104+
# Add static URLs for training.hacktricks.xyz
105+
static_training_urls = [
106+
"https://training.hacktricks.xyz/",
107+
"https://training.hacktricks.xyz/courses/arte",
108+
"https://training.hacktricks.xyz/courses/arta",
109+
"https://training.hacktricks.xyz/courses/grte",
110+
"https://training.hacktricks.xyz/courses/grta",
111+
"https://training.hacktricks.xyz/bundles",
112+
"https://training.hacktricks.xyz/signin",
113+
"https://training.hacktricks.xyz/signup",
114+
"https://training.hacktricks.xyz/contact",
115+
"https://training.hacktricks.xyz/faqs",
116+
"https://training.hacktricks.xyz/terms",
117+
"https://training.hacktricks.xyz/privacy",
118+
]
119+
add_static_urls(new_root, static_training_urls)
120+
86121
# Process main URLs
87122
for url_element in tqdm(all_urls, desc="Processing URLs"):
88123
loc = url_element.find('ns:loc', ns)
@@ -128,17 +163,6 @@ def main():
128163
translation_urls
129164
))
130165

131-
# Commented-out URL checks, assuming all translations exist for now
132-
# all_translation_checks = {}
133-
# with ThreadPoolExecutor(max_workers=10) as executor:
134-
# future_to_url = {executor.submit(check_url_exists, t_url): (hreflang, t_url)
135-
# for _, _, _, t_urls in url_entries for hreflang, t_url in t_urls.items()}
136-
#
137-
# for future in tqdm(as_completed(future_to_url), total=len(future_to_url), desc="Checking Translation URLs"):
138-
# hreflang, t_url = future_to_url[future]
139-
# result = future.result()
140-
# all_translation_checks[t_url] = result
141-
142166
# Build the final sitemap
143167
for (loc_text, priority_val, lastmod_val, translation_urls) in url_entries:
144168
new_url = ET.Element('{http://www.sitemaps.org/schemas/sitemap/0.9}url')
@@ -156,7 +180,7 @@ def main():
156180

157181
# Add all translations (assume all exist for now)
158182
for hreflang, t_url in translation_urls.items():
159-
alt_link = ET.SubElement(new_url, '{http://www.w3.org/1999/xhtml}link')
183+
alt_link = ET.SubElement(new_url, '{http://www.sitemaps.org/schemas/sitemap/0.9}link')
160184
alt_link.set('rel', 'alternate')
161185
alt_link.set('hreflang', hreflang)
162186
alt_link.set('href', t_url)

0 commit comments

Comments
 (0)