@@ -258,10 +258,12 @@ def get_last_mod_date() -> dict[str, str]:
258258 )
259259 except subprocess .CalledProcessError :
260260 print ("Not a git repository. Using current date as last modification date." )
261- return {page .stem : now for page in template_dir .glob ("*.html " )}
261+ return {page .stem : now for page in content_dir .glob ("*.yaml " )}
262262
263263 last_mod_dates = {}
264- for page in template_dir .glob ("*.html" ):
264+
265+ # Content YAML files
266+ for page in content_dir .glob ("*.yaml" ):
265267 page_name = page .stem
266268 try :
267269 result = subprocess .run (
@@ -278,32 +280,206 @@ def get_last_mod_date() -> dict[str, str]:
278280 .replace (" +0100" , " +01:00" )
279281 .replace (" +0200" , " +02:00" )
280282 )
281- formatted_date = date_obj .strftime ("%Y-%m-%d" )
282- last_mod_dates [page_name ] = formatted_date
283+ last_mod_dates [page_name ] = date_obj .strftime ("%Y-%m-%d" )
283284 except subprocess .CalledProcessError as e :
284285 print (f"Error getting git log for { page } : { e } " )
285286 last_mod_dates [page_name ] = now
287+
288+ # Posts (markdown files)
289+ posts_dir = content_dir / "posts"
290+ if posts_dir .exists ():
291+ latest_post_date = None
292+ for post in posts_dir .glob ("*.md" ):
293+ post_key = f"post:{ post .stem } " # e.g., "post:my-first-post"
294+ try :
295+ result = subprocess .run (
296+ ["git" , "log" , "-n" , "1" , "--format=%ci" , "--" , post ],
297+ check = True ,
298+ capture_output = True ,
299+ text = True ,
300+ )
301+ date_str = result .stdout .strip ()
302+ if not date_str :
303+ continue
304+ date_obj = datetime .fromisoformat (
305+ date_str .replace ("Z" , "+00:00" )
306+ .replace (" +0100" , " +01:00" )
307+ .replace (" +0200" , " +02:00" )
308+ )
309+ formatted_date = date_obj .strftime ("%Y-%m-%d" )
310+ last_mod_dates [post_key ] = formatted_date
311+
312+ # Track latest post date for posts index
313+ if latest_post_date is None or date_obj > latest_post_date :
314+ latest_post_date = date_obj
315+ except subprocess .CalledProcessError as e :
316+ print (f"Error getting git log for { post } : { e } " )
317+ last_mod_dates [post_key ] = now
318+
319+ # Posts index uses the date of the most recently modified post
320+ if latest_post_date :
321+ last_mod_dates ["posts" ] = latest_post_date .strftime ("%Y-%m-%d" )
322+
286323 return last_mod_dates
287324
288325
326+ # def get_manual_mod_dates() -> dict[str, str]:
327+ # """Get last modification dates for lab-manual pages via GitHub API."""
328+ # now = datetime.now().strftime("%Y-%m-%d")
329+ # mod_dates = {}
330+
331+ # repo = config["manual_repo"]
332+ # api_base = f"https://api.github.com/repos/{repo}/commits"
333+
334+ # req = requests.get(
335+ # f"https://api.github.com/repos/{repo}/contents/source",
336+ # headers={"Accept": "application/vnd.github.v3+json"},
337+ # )
338+ # if req.status_code != 200:
339+ # return {}
340+
341+ # for file_info in req.json():
342+ # if not file_info["name"].endswith(".md"):
343+ # continue
344+ # page_name = file_info["name"].replace(".md", "")
345+
346+ # # Get last commit for this file
347+ # commits_req = requests.get(
348+ # api_base,
349+ # params={"path": f"source/{file_info['name']}", "per_page": 1},
350+ # headers={"Accept": "application/vnd.github.v3+json"},
351+ # )
352+ # if commits_req.status_code == 200 and commits_req.json():
353+ # commit_date = commits_req.json()[0]["commit"]["committer"]["date"]
354+ # date_obj = datetime.fromisoformat(commit_date.replace("Z", "+00:00"))
355+ # mod_dates[f"lab-manual:{page_name}"] = date_obj.strftime("%Y-%m-%d")
356+ # else:
357+ # mod_dates[f"lab-manual:{page_name}"] = now
358+
359+ # return mod_dates
360+
361+
362+ def get_manual_mod_dates () -> dict [str , str ]:
363+ """Clone manual repo shallowly and get git log dates."""
364+ import tempfile
365+
366+ now = datetime .now ().strftime ("%Y-%m-%d" )
367+ mod_dates = {}
368+ repo = config ["manual_repo" ]
369+
370+ with tempfile .TemporaryDirectory () as tmpdir :
371+ # Shallow clone with enough history to get meaningful dates
372+ subprocess .run (
373+ ["git" , "clone" , "--depth" , "50" , f"https://github.com/{ repo } .git" , tmpdir ],
374+ check = True ,
375+ capture_output = True ,
376+ )
377+
378+ # Get page order from Makefile (same logic as build_manual)
379+ makefile = Path (tmpdir ) / "Makefile"
380+ page_order = [
381+ p .split (".md" )[0 ]
382+ for p in makefile .read_text ().split ("\n " )
383+ if p .startswith ("source/" ) or p .endswith (".md \\ " )
384+ ]
385+
386+ latest_manual_date = None
387+
388+ for page in page_order :
389+ if page == "README" :
390+ page_slug = "index"
391+ file_path = "README.md"
392+ else :
393+ page_slug = page .replace ("source/" , "" ).lower ()
394+ file_path = f"{ page } .md"
395+
396+ try :
397+ result = subprocess .run (
398+ [
399+ "git" ,
400+ "-C" ,
401+ tmpdir ,
402+ "log" ,
403+ "-n" ,
404+ "1" ,
405+ "--format=%ci" ,
406+ "--" ,
407+ file_path ,
408+ ],
409+ check = True ,
410+ capture_output = True ,
411+ text = True ,
412+ )
413+ date_str = result .stdout .strip ()
414+ if not date_str :
415+ mod_dates [f"lab-manual:{ page_slug } " ] = now
416+ continue
417+
418+ date_obj = datetime .fromisoformat (
419+ date_str .replace ("Z" , "+00:00" )
420+ .replace (" +0100" , " +01:00" )
421+ .replace (" +0200" , " +02:00" )
422+ )
423+ formatted_date = date_obj .strftime ("%Y-%m-%d" )
424+ mod_dates [f"lab-manual:{ page_slug } " ] = formatted_date
425+
426+ # Track latest for manual index
427+ if latest_manual_date is None or date_obj > latest_manual_date :
428+ latest_manual_date = date_obj
429+
430+ except subprocess .CalledProcessError as e :
431+ print (f"Error getting git log for { file_path } : { e } " )
432+ mod_dates [f"lab-manual:{ page_slug } " ] = now
433+
434+ # Manual index uses the most recently modified page
435+ if latest_manual_date :
436+ mod_dates ["lab-manual" ] = latest_manual_date .strftime ("%Y-%m-%d" )
437+
438+ return mod_dates
439+
440+
289441def make_sitemap ():
290442 now = datetime .now ().strftime ("%Y-%m-%d" )
443+ mod_dates = get_last_mod_date () | get_manual_mod_dates ()
291444
292- mod_dates = get_last_mod_date ()
445+ # Build reverse mapping: build file path -> page key
446+ file_to_page = {config ["pages" ][page ]["file" ]: page for page in config ["pages" ]}
293447
294448 sitemap = ET .Element ("urlset" , xmlns = "http://www.sitemaps.org/schemas/sitemap/0.9" )
295449
296450 for page in build_dir .glob ("**/*.html" ):
297- page_name = page .parent .name if page .parent != build_dir else page .stem
298- url = config ["deploy_url" ] + str (page .relative_to (build_dir )).replace (
299- "index.html" , ""
300- )
451+ relative_path = str (page .relative_to (build_dir ))
452+ page_key = file_to_page .get (relative_path )
453+
454+ # Handle posts
455+ if (
456+ page_key is None
457+ and relative_path .startswith ("p/" )
458+ and relative_path .count ("/" ) == 2
459+ ):
460+ post_slug = relative_path .split ("/" )[1 ]
461+ page_key = f"post:{ post_slug } "
462+ # Handle manual pages
463+ elif page_key is None and relative_path .startswith ("lab-manual/" ):
464+ manual_page_slug = relative_path .replace ("manual/" , "" ).replace (
465+ "/index.html" , ""
466+ )
467+ if manual_page_slug == "index" :
468+ manual_page_slug = "index"
469+ else :
470+ manual_page_slug = manual_page_slug .replace ("lab-" , "" )
471+ page_key = f"lab-manual:{ manual_page_slug } "
472+
473+ url = config ["deploy_url" ] + relative_path .replace ("index.html" , "" )
301474 url_element = ET .SubElement (sitemap , "url" )
302475 ET .SubElement (url_element , "loc" ).text = url
303- try :
304- ET .SubElement (url_element , "lastmod" ).text = mod_dates [page_name ]
305- except KeyError :
306- # new page
476+
477+ if page_key and page_key in mod_dates :
478+ ET .SubElement (url_element , "lastmod" ).text = mod_dates [page_key ]
479+ else :
480+ print (
481+ f"No last modification date found for { relative_path } . Using current date."
482+ )
307483 ET .SubElement (url_element , "lastmod" ).text = now
308484
309485 tree = ET .ElementTree (sitemap )
0 commit comments