Skip to content

Commit 807ef4b

Browse files
authored
feat(indexing): add site indexing to release script (#4723)
* add site indexing to release script * move pipenv setup before server test and clone scraper into __release * release script simplifications
1 parent 8d131b2 commit 807ef4b

File tree

3 files changed

+117
-1
lines changed

3 files changed

+117
-1
lines changed

docsearchenv.example

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
APPLICATION_ID=123
2+
API_KEY=abc

release.sh

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,62 @@ if [ "$build_site_only" = false ]; then
106106
# Publish the tarball to the Heroku app
107107
heroku builds:create --source-tar site-release.tar.gz -a ${HEROKU_APP_NAME}
108108

109+
# Clone Algolia Docsearch Scraper
110+
echo "» Installing docsearch scraper..."
111+
git clone https://github.com/algolia/docsearch-scraper.git
112+
113+
cd ./.www/
114+
115+
# start python server on port 80
116+
python3 -m http.server 80 &
117+
118+
# install python3
119+
brew install python
120+
121+
cd ../docsearch-scraper/
122+
123+
# crudely install pipenv
124+
curl https://raw.githubusercontent.com/pypa/pipenv/master/get-pipenv.py | python
125+
126+
# copy Algolia environment variables into .env
127+
cp ../../.docsearchenv .env
128+
129+
# install pipenv dependencies
130+
pipenv install
131+
132+
# check for active python server on port 80
133+
max_iterations=10
134+
wait_seconds=1
135+
http_endpoint="http://127.0.0.1:80/"
136+
iterations=0
137+
138+
while true
139+
do
140+
((iterations++))
141+
echo "» Attempt $iterations"
142+
sleep $wait_seconds
143+
http_code=$(curl --verbose -s -o /tmp/result.txt -w '%{http_code}' "$http_endpoint";)
144+
145+
# python server is running, begin crawling
146+
if [ "$http_code" -eq 200 ]; then
147+
echo "» Python server active. Crawling site..."
148+
pipenv run ./docsearch run ../../searchconfig.json
149+
break
150+
fi
151+
152+
# no active server, end loop
153+
if [ "$iterations" -ge "$max_iterations" ]; then
154+
echo "» No active python server. Skipping indexing operation..."
155+
exit 1
156+
fi
157+
done
158+
159+
# kill python server
160+
lsof -ti tcp:80 | xargs kill
161+
109162
# Exit back to parent directory and clean-up after ourselves
110-
cd ..
163+
cd ../../
164+
111165
echo "» Removing '__release' folder..."
112166
rm -rf __release/
113167
cp postcss.config.js.bak postcss.config.js

searchconfig.json

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
{
2+
"index_name": "winter-22",
3+
"start_urls": [
4+
{
5+
"url": "http://localhost/components/overview",
6+
"page_rank": 2
7+
},
8+
{
9+
"url": "http://localhost/design-tokens/",
10+
"page_rank": 1
11+
},
12+
{
13+
"url": "http://localhost/icons/",
14+
"page_rank": 1
15+
},
16+
{
17+
"url": "http://localhost/guidelines/",
18+
"page_rank": -1
19+
},
20+
{
21+
"url": "http://localhost/faq/",
22+
"page_rank": -2
23+
},
24+
{
25+
"url": "http://localhost/platforms/",
26+
"page_rank": -2
27+
},
28+
{
29+
"url": "http://localhost/getting-started/",
30+
"page_rank": -2
31+
},
32+
{
33+
"url": "http://localhost/release-notes/",
34+
"page_rank": -3
35+
},
36+
{
37+
"url": "http://localhost/articles/",
38+
"page_rank": -3
39+
},
40+
{
41+
"url": "http://localhost/downloads/",
42+
"page_rank": -3
43+
},
44+
{
45+
"url": "http://localhost/",
46+
"page_rank": 1
47+
}
48+
],
49+
"separatorsToIndex": ".-_",
50+
"stop_urls": [],
51+
"selectors_exclude": ["span.slds-badge", ".docsearch-ignore"],
52+
"selectors": {
53+
"lvl0": ".docsearch-category",
54+
"lvl1": "h1",
55+
"lvl2": ".docsearch-level-2",
56+
"text": ".site-main-stage p, .docsearch-text, .slds-text-longform p, .site-content th"
57+
},
58+
"min_indexed_level": 1,
59+
"nb_hits": 19300
60+
}

0 commit comments

Comments
 (0)