Skip to content

Commit ff00b79

Browse files
committed
add js sitemap
1 parent 2d900ca commit ff00b79

File tree

3 files changed

+62
-46
lines changed

3 files changed

+62
-46
lines changed

.circleci/config.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ jobs:
7171
python check-or-enforce-order.py _posts/r/
7272
python check-or-enforce-order.py _posts/matlab
7373
python check-or-enforce-order.py _posts/plotly_js
74-
python python-sitemap.py
74+
python generate-sitemaps.py
7575
echo `md5 -q all_static/css/main.css` > _data/cache_bust_css.yml
7676
deactivate
7777
rm -rf venv

generate-sitemaps.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
#!/usr/bin/env python3
2+
import frontmatter
3+
from pathlib import Path
4+
5+
def generate_sitemap_for_language(posts_dir, url_prefix, output_dir, reference_dir=None):
6+
base_url = "https://plotly.com"
7+
urls = []
8+
9+
# Define directories to scan
10+
directories_to_scan = [posts_dir]
11+
if reference_dir:
12+
directories_to_scan.append(reference_dir)
13+
14+
# Find all HTML and MD files in all specified directories
15+
for directory in directories_to_scan:
16+
for file_path in Path(directory).rglob("*.[hm][td]*"):
17+
# Skip files in redir directory
18+
if "redir" in file_path.parts:
19+
continue
20+
try:
21+
post = frontmatter.load(file_path)
22+
if 'permalink' in post:
23+
permalink = post['permalink']
24+
# Use permalink as-is if it has a domain, otherwise prepend base_url
25+
url = permalink if permalink.startswith(('http://', 'https://')) else f"{base_url}/{permalink.lstrip('/')}"
26+
# Skip dash.plotly.com URLs
27+
if 'dash.plotly.com' in url:
28+
continue
29+
# Skip chart-studio URLs
30+
if 'chart-studio' in url:
31+
continue
32+
# Only include URLs that start with the specified prefix (with or without leading slash)
33+
if permalink.startswith(url_prefix) or permalink.startswith(f"/{url_prefix}"):
34+
urls.append(url)
35+
except Exception as e:
36+
pass
37+
38+
# Remove duplicates and sort
39+
urls = sorted(set(urls))
40+
41+
# Generate sitemap
42+
xml = '<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'
43+
xml += ''.join(f' <url>\n <loc>{url}</loc>\n <priority>0.5</priority>\n </url>\n' for url in urls)
44+
xml += '</urlset>'
45+
46+
# Write to file
47+
Path(output_dir).mkdir(exist_ok=True)
48+
Path(f'{output_dir}/sitemap.xml').write_text(xml, encoding='utf-8')
49+
50+
print(f"Generated {output_dir} sitemap with {len(urls)} URLs")
51+
52+
def generate_sitemap():
53+
# Generate Python sitemap
54+
generate_sitemap_for_language("_posts/python", "python/", "python", "_posts/reference_pages/python")
55+
56+
# Generate JavaScript sitemap
57+
generate_sitemap_for_language("_posts/plotly_js", "javascript/", "javascript", "_posts/reference_pages/javascript")
58+
59+
if __name__ == "__main__":
60+
generate_sitemap()
61+

python-sitemap.py

Lines changed: 0 additions & 45 deletions
This file was deleted.

0 commit comments

Comments
 (0)