Skip to content

Commit 6623004

Browse files
committed
Update python-sitemap.py
1 parent 9ea88ea commit 6623004

File tree

1 file changed

+29
-27
lines changed

1 file changed

+29
-27
lines changed

python-sitemap.py

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,40 @@
1+
#!/usr/bin/env python3
12
import os
2-
import glob
3-
import re
3+
import frontmatter
4+
from pathlib import Path
45

5-
def get_permalinks():
6-
html_files = glob.glob('_posts/python/**/*.html', recursive=True)
7-
md_files = glob.glob('_posts/python/**/*.md', recursive=True)
8-
files = html_files + md_files
9-
permalinks = []
10-
for f in files:
11-
if '/html/' in f or '/chart-studio/' in f:
12-
continue
6+
def generate_sitemap():
7+
base_url = "https://plotly.com"
8+
urls = []
9+
10+
# Find all HTML and MD files in _posts/python
11+
for file_path in Path("_posts/python").rglob("*.html"):
1312
try:
14-
with open(f, encoding='utf-8') as file:
15-
m = re.search(r'permalink:\s*(.+?)(?:\n|$)', file.read())
16-
if m:
17-
link = m.group(1).strip()
18-
if not link.startswith('/'):
19-
link = '/' + link
20-
permalinks.append(link)
21-
except Exception:
22-
continue
23-
return permalinks
24-
25-
def main():
26-
base_url = 'https://plotly.com'
27-
permalinks = get_permalinks()
13+
post = frontmatter.load(file_path)
14+
if 'permalink' in post:
15+
urls.append(f"{base_url}/{post['permalink']}")
16+
except:
17+
pass
18+
19+
for file_path in Path("_posts/python").rglob("*.md"):
20+
try:
21+
post = frontmatter.load(file_path)
22+
if 'permalink' in post:
23+
urls.append(f"{base_url}/{post['permalink']}")
24+
except:
25+
pass
26+
27+
# Generate sitemap
2828
xml = '<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'
29-
xml += ''.join(f' <url>\n <loc>{base_url}{p}</loc>\n </url>\n' for p in permalinks)
29+
xml += ''.join(f' <url>\n <loc>{url}</loc>\n </url>\n' for url in sorted(set(urls)))
3030
xml += '</urlset>'
31+
3132
os.makedirs('python', exist_ok=True)
3233
with open('python/sitemap.xml', 'w') as f:
3334
f.write(xml)
34-
print(f"Generated Python sitemap with {len(permalinks)} URLs")
35+
36+
print(f"Generated sitemap with {len(urls)} URLs")
3537

3638
if __name__ == '__main__':
37-
main()
39+
generate_sitemap()
3840

0 commit comments

Comments
 (0)