@@ -53,6 +53,24 @@ def encode_url(url):
5353 """Encode the URL to make it XML-safe and RFC-compliant."""
5454 return quote (url , safe = ":/?&=" ) # Leave common URL-safe characters untouched
5555
56+ def add_static_urls (root , urls ):
57+ """Add static URLs to the sitemap."""
58+ for url in urls :
59+ url_element = ET .Element ('{http://www.sitemaps.org/schemas/sitemap/0.9}url' )
60+
61+ loc = ET .SubElement (url_element , '{http://www.sitemaps.org/schemas/sitemap/0.9}loc' )
62+ loc .text = encode_url (url )
63+
64+ # Add translations for each language
65+ for lang_code , hreflang in languages .items ():
66+ translated_url = encode_url (f"{ url } /{ lang_code } " )
67+ alt_link = ET .SubElement (url_element , '{http://www.w3.org/1999/xhtml}link' )
68+ alt_link .set ('rel' , 'alternate' )
69+ alt_link .set ('hreflang' , hreflang )
70+ alt_link .set ('href' , translated_url )
71+
72+ root .append (url_element )
73+
5674def main ():
5775 # URLs of the sitemaps
5876 book_sitemap_url = "https://book.hacktricks.xyz/sitemap.xml"
@@ -83,6 +101,23 @@ def main():
83101 loc .text = encode_url ("https://www.hacktricks.xyz/" )
84102 new_root .append (static_url )
85103
104+ # Add static URLs for training.hacktricks.xyz
105+ static_training_urls = [
106+ "https://training.hacktricks.xyz/" ,
107+ "https://training.hacktricks.xyz/courses/arte" ,
108+ "https://training.hacktricks.xyz/courses/arta" ,
109+ "https://training.hacktricks.xyz/courses/grte" ,
110+ "https://training.hacktricks.xyz/courses/grta" ,
111+ "https://training.hacktricks.xyz/bundles" ,
112+ "https://training.hacktricks.xyz/signin" ,
113+ "https://training.hacktricks.xyz/signup" ,
114+ "https://training.hacktricks.xyz/contact" ,
115+ "https://training.hacktricks.xyz/faqs" ,
116+ "https://training.hacktricks.xyz/terms" ,
117+ "https://training.hacktricks.xyz/privacy" ,
118+ ]
119+ add_static_urls (new_root , static_training_urls )
120+
86121 # Process main URLs
87122 for url_element in tqdm (all_urls , desc = "Processing URLs" ):
88123 loc = url_element .find ('ns:loc' , ns )
@@ -128,17 +163,6 @@ def main():
128163 translation_urls
129164 ))
130165
131- # Commented-out URL checks, assuming all translations exist for now
132- # all_translation_checks = {}
133- # with ThreadPoolExecutor(max_workers=10) as executor:
134- # future_to_url = {executor.submit(check_url_exists, t_url): (hreflang, t_url)
135- # for _, _, _, t_urls in url_entries for hreflang, t_url in t_urls.items()}
136- #
137- # for future in tqdm(as_completed(future_to_url), total=len(future_to_url), desc="Checking Translation URLs"):
138- # hreflang, t_url = future_to_url[future]
139- # result = future.result()
140- # all_translation_checks[t_url] = result
141-
142166 # Build the final sitemap
143167 for (loc_text , priority_val , lastmod_val , translation_urls ) in url_entries :
144168 new_url = ET .Element ('{http://www.sitemaps.org/schemas/sitemap/0.9}url' )
@@ -156,7 +180,7 @@ def main():
156180
157181 # Add all translations (assume all exist for now)
158182 for hreflang , t_url in translation_urls .items ():
159- alt_link = ET .SubElement (new_url , '{http://www.w3 .org/1999/xhtml }link' )
183+ alt_link = ET .SubElement (new_url , '{http://www.sitemaps .org/schemas/sitemap/0.9 }link' )
160184 alt_link .set ('rel' , 'alternate' )
161185 alt_link .set ('hreflang' , hreflang )
162186 alt_link .set ('href' , t_url )
0 commit comments