|
| 1 | +#!/usr/bin/env python3 |
1 | 2 | import os
|
2 |
| -import glob |
3 |
| -import re |
| 3 | +import frontmatter |
| 4 | +from pathlib import Path |
4 | 5 |
|
5 |
| -def get_permalinks(): |
6 |
| - html_files = glob.glob('_posts/python/**/*.html', recursive=True) |
7 |
| - md_files = glob.glob('_posts/python/**/*.md', recursive=True) |
8 |
| - files = html_files + md_files |
9 |
| - permalinks = [] |
10 |
| - for f in files: |
11 |
| - if '/html/' in f or '/chart-studio/' in f: |
12 |
| - continue |
| 6 | +def generate_sitemap(): |
| 7 | + base_url = "https://plotly.com" |
| 8 | + urls = [] |
| 9 | + |
| 10 | + # Find all HTML and MD files in _posts/python |
| 11 | + for file_path in Path("_posts/python").rglob("*.html"): |
13 | 12 | try:
|
14 |
| - with open(f, encoding='utf-8') as file: |
15 |
| - m = re.search(r'permalink:\s*(.+?)(?:\n|$)', file.read()) |
16 |
| - if m: |
17 |
| - link = m.group(1).strip() |
18 |
| - if not link.startswith('/'): |
19 |
| - link = '/' + link |
20 |
| - permalinks.append(link) |
21 |
| - except Exception: |
22 |
| - continue |
23 |
| - return permalinks |
24 |
| - |
25 |
| -def main(): |
26 |
| - base_url = 'https://plotly.com' |
27 |
| - permalinks = get_permalinks() |
| 13 | + post = frontmatter.load(file_path) |
| 14 | + if 'permalink' in post: |
| 15 | + urls.append(f"{base_url}/{post['permalink']}") |
| 16 | + except: |
| 17 | + pass |
| 18 | + |
| 19 | + for file_path in Path("_posts/python").rglob("*.md"): |
| 20 | + try: |
| 21 | + post = frontmatter.load(file_path) |
| 22 | + if 'permalink' in post: |
| 23 | + urls.append(f"{base_url}/{post['permalink']}") |
| 24 | + except: |
| 25 | + pass |
| 26 | + |
| 27 | + # Generate sitemap |
28 | 28 | xml = '<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'
|
29 |
| - xml += ''.join(f' <url>\n <loc>{base_url}{p}</loc>\n </url>\n' for p in permalinks) |
| 29 | + xml += ''.join(f' <url>\n <loc>{url}</loc>\n </url>\n' for url in sorted(set(urls))) |
30 | 30 | xml += '</urlset>'
|
| 31 | + |
31 | 32 | os.makedirs('python', exist_ok=True)
|
32 | 33 | with open('python/sitemap.xml', 'w') as f:
|
33 | 34 | f.write(xml)
|
34 |
| - print(f"Generated Python sitemap with {len(permalinks)} URLs") |
| 35 | + |
| 36 | + print(f"Generated sitemap with {len(urls)} URLs") |
35 | 37 |
|
36 | 38 | if __name__ == '__main__':
|
37 |
| - main() |
| 39 | + generate_sitemap() |
38 | 40 |
|
0 commit comments