Skip to content

Commit 44d578e

Browse files
authored
Merge pull request #372 from plotly/add-sitemap
Add sitemap for pages at /python
2 parents 26a5f6b + 294a0b5 commit 44d578e

File tree

2 files changed

+47
-0
lines changed

2 files changed

+47
-0
lines changed

.circleci/config.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,12 @@ jobs:
7171
python check-or-enforce-order.py _posts/r/
7272
python check-or-enforce-order.py _posts/matlab
7373
python check-or-enforce-order.py _posts/plotly_js
74+
python python-sitemap.py
7475
echo `md5 -q all_static/css/main.css` > _data/cache_bust_css.yml
7576
deactivate
7677
rm -rf venv
7778
bundle exec jekyll build
79+
cp python/sitemap.xml _site/python/sitemap.xml
7880
rm _data/mapbox_token.yml
7981
mkdir snapshots
8082
cd _site

python-sitemap.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#!/usr/bin/env python3
2+
import frontmatter
3+
from pathlib import Path
4+
5+
def generate_sitemap():
6+
base_url = "https://plotly.com"
7+
urls = []
8+
9+
# Find all HTML and MD files in _posts/python
10+
for file_path in Path("_posts/python").rglob("*.[hm][td]*"):
11+
# Skip files in redir directory
12+
if "redir" in file_path.parts:
13+
continue
14+
try:
15+
post = frontmatter.load(file_path)
16+
if 'permalink' in post:
17+
permalink = post['permalink']
18+
# Use permalink as-is if it has a domain, otherwise prepend base_url
19+
url = permalink if permalink.startswith(('http://', 'https://')) else f"{base_url}/{permalink.lstrip('/')}"
20+
# Skip dash.plotly.com URLs
21+
if 'dash.plotly.com' in url:
22+
continue
23+
# Only include URLs that are under /python/
24+
if permalink.startswith('python/'):
25+
urls.append(url)
26+
except:
27+
pass
28+
29+
# Remove duplicates and sort
30+
urls = sorted(set(urls))
31+
32+
# Generate sitemap
33+
xml = '<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'
34+
xml += ''.join(f' <url>\n <loc>{url}</loc>\n <priority>0.5</priority>\n </url>\n' for url in urls)
35+
xml += '</urlset>'
36+
37+
# Write to file
38+
Path('python').mkdir(exist_ok=True)
39+
Path('python/sitemap.xml').write_text(xml, encoding='utf-8')
40+
41+
print(f"Generated sitemap with {len(urls)} URLs")
42+
43+
if __name__ == "__main__":
44+
generate_sitemap()
45+

0 commit comments

Comments
 (0)