Skip to content

Commit 9ea88ea

Browse files
committed
generate sitemap
1 parent 26a5f6b commit 9ea88ea

File tree

2 files changed

+44
-0
lines changed

2 files changed

+44
-0
lines changed

.circleci/config.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,12 @@ jobs:
7171
python check-or-enforce-order.py _posts/r/
7272
python check-or-enforce-order.py _posts/matlab
7373
python check-or-enforce-order.py _posts/plotly_js
74+
python python-sitemap.py
7475
echo `md5 -q all_static/css/main.css` > _data/cache_bust_css.yml
7576
deactivate
7677
rm -rf venv
7778
bundle exec jekyll build
79+
cp python/sitemap.xml _site/python/sitemap.xml
7880
rm _data/mapbox_token.yml
7981
mkdir snapshots
8082
cd _site
@@ -111,6 +113,10 @@ jobs:
111113
rm -f snapshots/*/*/*.bkp
112114
bundle exec percy snapshot snapshots --enable_javascript
113115
rm -rf 'snapshots/'
116+
# Save _site as artifacts for testing
117+
- store_artifacts:
118+
path: _site
119+
destination: built-site
114120
if [ "${CIRCLE_BRANCH}" == "master" ]; then
115121
git clone --depth=1 --branch=gh-pages https://github.com/plotly/documentation.git
116122
git config user.name plotlydocbot

python-sitemap.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import os
2+
import glob
3+
import re
4+
5+
def get_permalinks():
6+
html_files = glob.glob('_posts/python/**/*.html', recursive=True)
7+
md_files = glob.glob('_posts/python/**/*.md', recursive=True)
8+
files = html_files + md_files
9+
permalinks = []
10+
for f in files:
11+
if '/html/' in f or '/chart-studio/' in f:
12+
continue
13+
try:
14+
with open(f, encoding='utf-8') as file:
15+
m = re.search(r'permalink:\s*(.+?)(?:\n|$)', file.read())
16+
if m:
17+
link = m.group(1).strip()
18+
if not link.startswith('/'):
19+
link = '/' + link
20+
permalinks.append(link)
21+
except Exception:
22+
continue
23+
return permalinks
24+
25+
def main():
26+
base_url = 'https://plotly.com'
27+
permalinks = get_permalinks()
28+
xml = '<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'
29+
xml += ''.join(f' <url>\n <loc>{base_url}{p}</loc>\n </url>\n' for p in permalinks)
30+
xml += '</urlset>'
31+
os.makedirs('python', exist_ok=True)
32+
with open('python/sitemap.xml', 'w') as f:
33+
f.write(xml)
34+
print(f"Generated Python sitemap with {len(permalinks)} URLs")
35+
36+
if __name__ == '__main__':
37+
main()
38+

0 commit comments

Comments
 (0)