Skip to content

Commit a1a5602

Browse files
authored
Merge pull request #376 from plotly/generate-pages
Add sitemap for javascript pages
2 parents 2d900ca + 48773d6 commit a1a5602

File tree

3 files changed

+59
-46
lines changed

3 files changed

+59
-46
lines changed

.circleci/config.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ jobs:
7171
python check-or-enforce-order.py _posts/r/
7272
python check-or-enforce-order.py _posts/matlab
7373
python check-or-enforce-order.py _posts/plotly_js
74-
python python-sitemap.py
74+
python generate-sitemaps.py
7575
echo `md5 -q all_static/css/main.css` > _data/cache_bust_css.yml
7676
deactivate
7777
rm -rf venv

generate-sitemaps.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#!/usr/bin/env python3
2+
import frontmatter
3+
from pathlib import Path
4+
5+
def generate_sitemap_for_language(posts_dir, url_prefix, output_dir, reference_dir=None):
6+
base_url = "https://plotly.com"
7+
urls = []
8+
9+
# Define directories to scan
10+
directories_to_scan = [posts_dir]
11+
if reference_dir:
12+
directories_to_scan.append(reference_dir)
13+
14+
# Find all HTML and MD files in all specified directories
15+
for directory in directories_to_scan:
16+
for file_path in Path(directory).rglob("*.[hm][td]*"):
17+
# Skip files in redir directory
18+
if "redir" in file_path.parts:
19+
continue
20+
post = frontmatter.load(file_path)
21+
if 'permalink' in post:
22+
permalink = post['permalink']
23+
# Use permalink as-is if it has a domain, otherwise prepend base_url
24+
url = permalink if permalink.startswith(('http://', 'https://')) else f"{base_url}/{permalink.lstrip('/')}"
25+
# Skip dash.plotly.com URLs
26+
if 'dash.plotly.com' in url:
27+
continue
28+
# Skip chart-studio URLs
29+
if 'chart-studio' in url:
30+
continue
31+
# Only include URLs that start with the specified prefix (with or without leading slash)
32+
if permalink.startswith(url_prefix) or permalink.startswith(f"/{url_prefix}"):
33+
urls.append(url)
34+
35+
# Remove duplicates and sort
36+
urls = sorted(set(urls))
37+
38+
# Generate sitemap
39+
xml = '<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'
40+
xml += ''.join(f' <url>\n <loc>{url}</loc>\n <priority>0.5</priority>\n </url>\n' for url in urls)
41+
xml += '</urlset>'
42+
43+
# Write to file
44+
Path(output_dir).mkdir(exist_ok=True)
45+
Path(f'{output_dir}/sitemap.xml').write_text(xml, encoding='utf-8')
46+
47+
print(f"Generated {output_dir} sitemap with {len(urls)} URLs")
48+
49+
def generate_sitemap():
50+
# Generate Python sitemap
51+
generate_sitemap_for_language("_posts/python", "python/", "python", "_posts/reference_pages/python")
52+
53+
# Generate JavaScript sitemap
54+
generate_sitemap_for_language("_posts/plotly_js", "javascript/", "javascript", "_posts/reference_pages/javascript")
55+
56+
if __name__ == "__main__":
57+
generate_sitemap()
58+

python-sitemap.py

Lines changed: 0 additions & 45 deletions
This file was deleted.

0 commit comments

Comments
 (0)