Skip to content

Commit 43c4ff5

Browse files
committed
Setup search scraper action
1 parent cd6f4b7 commit 43c4ff5

File tree

5 files changed

+344
-0
lines changed

5 files changed

+344
-0
lines changed

.github/workflows/build.yml

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
name: Build Docs
2+
3+
on:
4+
push:
5+
branches:
6+
- feature/docsearch
7+
- 7.x
8+
- 7.dev
9+
10+
jobs:
11+
build:
12+
name: Build Documentation HTML
13+
runs-on: ubuntu-latest
14+
outputs:
15+
DOCSEARCH_INDEX: ${{ steps.setup_vars.outputs.DOCSEARCH_INDEX }}
16+
DOCS_URL: ${{ steps.setup_vars.outputs.DOCS_URL }}
17+
steps:
18+
- name: Checkout code
19+
uses: actions/checkout@v2
20+
21+
- name: Setup Variables
22+
id: setup_vars
23+
run: |
24+
if [[ "${{github.base_ref}}" == "7.x" || "${{github.ref}}" == "refs/heads/7.x" ]]; then
25+
echo "DOCSEARCH_INDEX=expressionengine7" >> "$GITHUB_OUTPUT"
26+
else
27+
echo "DOCSEARCH_INDEX=expressionengine7_staging" >> "$GITHUB_OUTPUT"
28+
fi
29+
30+
- name: Update Docsearch Index
31+
uses: richardrigutins/replace-in-files@v2
32+
with:
33+
search-text: "docsearch_index: 'expressionengine'"
34+
replacement-text: "docsearch_index: '${{steps.setup_vars.outputs.DOCSEARCH_INDEX}}'"
35+
files: ./config.yml
36+
37+
- name: Install NPM and build
38+
run: |
39+
npm install
40+
npm run build
41+
42+
- name: Archive Build files
43+
uses: actions/upload-artifact@v4
44+
with:
45+
name: EEDocs7.latest
46+
path: build
47+
48+
- name: Zip the docs
49+
uses: montudor/action-zip@v1
50+
with:
51+
args: zip -qq -r EEDocs7.latest.zip build
52+
53+
# search:
54+
# name: Build Search Index
55+
# needs: build
56+
# runs-on: ubuntu-latest
57+
# steps:
58+
# - name: Checkout code
59+
# uses: actions/checkout@v2
60+
61+
# - name: Setup Variables
62+
# run: |
63+
# if [[ "${{github.base_ref}}" == "7.x" || "${{github.ref}}" == "refs/heads/7.x" ]]; then
64+
# echo "DOCS_URL=https://docs.expressionengine.com/latest" >> "$GITHUB_ENV"
65+
# else
66+
# echo "::add-mask::${{secrets.STAGING_DOCS_DOMAIN}}"
67+
# echo "DOCS_URL=${{secrets.STAGING_DOCS_URL}}" >> "$GITHUB_ENV"
68+
# fi
69+
70+
# - name: Configure Docsearch Index
71+
# uses: richardrigutins/replace-in-files@v2
72+
# with:
73+
# search-text: "\"index_uid\": \"expressionengine\""
74+
# replacement-text: "\"index_uid\": \"${{ needs.build.outputs.DOCSEARCH_INDEX }}\""
75+
# files: ./search.config.json
76+
77+
# - name: Configure Docs Url
78+
# uses: richardrigutins/replace-in-files@v2
79+
# with:
80+
# search-text: "https://docs.expressionengine.com/latest"
81+
# replacement-text: ${{ env.DOCS_URL }}
82+
# files: ./search.config.json
83+
84+
# - name: Scrape Docs
85+
# env:
86+
# HOST_URL: ${{ secrets.MEILISEARCH_HOST_URL }}
87+
# API_KEY: ${{ secrets.MEILISEARCH_API_KEY }}
88+
# run: |
89+
# docker run -t --rm \
90+
# -e MEILISEARCH_HOST_URL=$HOST_URL \
91+
# -e MEILISEARCH_API_KEY=$API_KEY \
92+
# -v ./search.config.json:/docs-scraper/search.config.json \
93+
# getmeili/docs-scraper:latest pipenv run ./docs_scraper search.config.json

.github/workflows/search.yml

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
name: search
2+
3+
on:
4+
workflow_dispatch:
5+
6+
jobs:
7+
search:
8+
name: Build Search Index
9+
runs-on: ubuntu-latest
10+
steps:
11+
- name: Checkout code
12+
uses: actions/checkout@v4
13+
14+
- name: Setup Variables
15+
run: |
16+
if [[ "${{github.base_ref}}" == "7.x" || "${{github.ref}}" == "refs/heads/7.x" ]]; then
17+
echo "DOCSEARCH_INDEX=expressionengine7" >> "$GITHUB_ENV"
18+
echo "DOCS_URL=https://docs.expressionengine.com/latest" >> "$GITHUB_ENV"
19+
else
20+
echo "DOCSEARCH_INDEX=expressionengine7_staging" >> "$GITHUB_ENV"
21+
echo "::add-mask::${{secrets.STAGING_DOCS_DOMAIN}}"
22+
echo "DOCS_URL=${{secrets.STAGING_DOCS_URL}}" >> "$GITHUB_ENV"
23+
fi
24+
25+
- name: Configure Docsearch Index
26+
uses: richardrigutins/replace-in-files@v2
27+
with:
28+
search-text: "\"index_uid\": \"expressionengine\""
29+
replacement-text: "\"index_uid\": \"${{ env.DOCSEARCH_INDEX }}\""
30+
files: ./search.config.json
31+
32+
- name: Configure Docs Url
33+
uses: richardrigutins/replace-in-files@v2
34+
with:
35+
search-text: "https://docs.expressionengine.com/latest"
36+
replacement-text: ${{ env.DOCS_URL }}
37+
files: ./search.config.json
38+
39+
- name: Scrape Docs
40+
env:
41+
HOST_URL: ${{ secrets.MEILISEARCH_HOST_URL }}
42+
API_KEY: ${{ secrets.MEILISEARCH_API_KEY }}
43+
run: |
44+
docker run -t --rm \
45+
-e MEILISEARCH_HOST_URL=$HOST_URL \
46+
-e MEILISEARCH_API_KEY=$API_KEY \
47+
-v ./search.config.json:/docs-scraper/search.config.json \
48+
getmeili/docs-scraper:latest pipenv run ./docs_scraper search.config.json

README.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,24 @@ To build the theme assets, run `npm run buildAssets`. You can also dynamically r
3434

3535
Manually load `/build/index.html` in your browser to view your local build. For example, `file:///Users/<username>/Documents/ExpressionEngine-User-Guide/build/index.html`. You can use the side navigation to navigate to different local files, but the search functionality always takes you to the live version at docs.expressionengine.com.
3636

37+
## Using DocSearch Locally
38+
39+
First you will need to choose a docsearch index name to use for your local testing and set that in `config.yml`
40+
41+
Then you will need to build the docs and serve a local copy. For simplicity's sake we recommend using the node http-server like this `npx http-server -o ./build`
42+
43+
Next you will need to update all the urls in `search.config.json` to point at your local copy of the documentation. Do a find/replace on `https://docs.expressionengine.com/latest` => `http://localhost:8080`. You will also need to update the `allowed_domains` array to include this new url. (If you are on a mac you may need to use `http://host.docker.internal:8080` instead so that the scraper container can connect to the docs on your local http-server.)
44+
45+
Finally you can scrape your local docs with the following docker command
46+
47+
```
48+
docker run -t --rm --network=host
49+
-e MEILISEARCH_HOST_URL=https://docsearch.expressionengine.com \
50+
-e MEILISEARCH_API_KEY={{ SECRET_KEY }} \
51+
-v ./search.config.json:/docs-scraper/search.config.json \
52+
getmeili/docs-scraper:latest pipenv run ./docs_scraper search.config.json
53+
```
54+
3755
## Contributing
3856

3957
See something that needs fixing? Want to improve the user guide or make it more helpful? Great! Check out [CONTRIBUTING.md](CONTRIBUTING.md) for details.

config.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,5 @@ themeDir: theme
88
customVariables:
99
current_version: '7'
1010
current_year: 2024
11+
docsearch_index: 'expressionengine'
12+
docsearch_public_key: '7d283b55c1d7c0e5f340c71b5dfc751d8dc625708f29e582134f7643bc95dbd7'

search.config.json

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
{
2+
"index_uid": "expressionengine",
3+
"allowed_domains": [],
4+
"start_urls": [
5+
{
6+
"url": "https://docs.expressionengine.com/latest/general",
7+
"page_rank": 50
8+
},
9+
{
10+
"url": "https://docs.expressionengine.com/latest/getting-started",
11+
"page_rank": 45
12+
},
13+
{
14+
"url": "https://docs.expressionengine.com/latest/installation",
15+
"page_rank": 40
16+
},
17+
{
18+
"url": "https://docs.expressionengine.com/latest/control-panel",
19+
"page_rank": 5
20+
},
21+
{
22+
"url": "https://docs.expressionengine.com/latest/channels",
23+
"page_rank": 65
24+
},
25+
{
26+
"url": "https://docs.expressionengine.com/latest/config",
27+
"page_rank": 55
28+
},
29+
{
30+
"url": "https://docs.expressionengine.com/latest/templates",
31+
"page_rank": 60
32+
},
33+
{
34+
"url": "https://docs.expressionengine.com/latest/troubleshooting",
35+
"page_rank": 50
36+
},
37+
{
38+
"url": "https://docs.expressionengine.com/latest/fieldtypes",
39+
"page_rank": 55
40+
},
41+
{
42+
"url": "https://docs.expressionengine.com/latest/comment",
43+
"page_rank": 30
44+
},
45+
{
46+
"url": "https://docs.expressionengine.com/latest/cli",
47+
"page_rank": 35
48+
},
49+
{
50+
"url": "https://docs.expressionengine.com/latest/member",
51+
"page_rank": 30
52+
},
53+
{
54+
"url": "https://docs.expressionengine.com/latest/optimization",
55+
"page_rank": 35
56+
},
57+
{
58+
"url": "https://docs.expressionengine.com/latest/security",
59+
"page_rank": 35
60+
},
61+
{
62+
"url": "https://docs.expressionengine.com/latest/localization",
63+
"page_rank": 35
64+
},
65+
{
66+
"url": "https://docs.expressionengine.com/latest/msm",
67+
"page_rank": 30
68+
},
69+
{
70+
"url": "https://docs.expressionengine.com/latest/advanced-usage",
71+
"page_rank": 25
72+
},
73+
{
74+
"url": "https://docs.expressionengine.com/latest/add-ons",
75+
"page_rank": 25
76+
},
77+
{
78+
"url": "https://docs.expressionengine.com/latest/development",
79+
"page_rank": 30
80+
}
81+
],
82+
"sitemap_urls": [],
83+
"stop_urls": [],
84+
"min_indexed_level": 2,
85+
"selectors_exclude": [
86+
".table-of-contents",
87+
".docs-footer"
88+
],
89+
"selectors": {
90+
"lvl0": {
91+
"selector": "#docsearch-content h1",
92+
"global": false
93+
},
94+
"lvl1": {
95+
"selector": "#docsearch-content h2"
96+
},
97+
"lvl2": "#docsearch-content h3",
98+
"lvl3": "#docsearch-content h4",
99+
"lvl4": "#docsearch-content h5",
100+
"lvl5": "#docsearch-content h6",
101+
"text": "#docsearch-content > article > *:not(section), #docsearch-content > article > section > *:not(pre)"
102+
},
103+
"custom_settings": {
104+
"displayedAttributes": [
105+
"*"
106+
],
107+
"searchableAttributes": [
108+
"hierarchy_radio_lvl0",
109+
"hierarchy_radio_lvl1",
110+
"hierarchy_radio_lvl2",
111+
"hierarchy_radio_lvl3",
112+
"hierarchy_radio_lvl4",
113+
"hierarchy_radio_lvl5",
114+
"hierarchy_lvl0",
115+
"hierarchy_lvl1",
116+
"hierarchy_lvl2",
117+
"hierarchy_lvl3",
118+
"hierarchy_lvl4",
119+
"hierarchy_lvl5",
120+
"hierarchy_lvl6",
121+
"content",
122+
"anchor",
123+
"url",
124+
"objectID"
125+
],
126+
"filterableAttributes": [
127+
"hierarchy_lvl0",
128+
"hierarchy_lvl1",
129+
"hierarchy_lvl2",
130+
"hierarchy_lvl3"
131+
],
132+
"sortableAttributes": [],
133+
"rankingRules":
134+
[
135+
"words",
136+
"typo",
137+
"proximity",
138+
"attribute",
139+
"page_rank:desc",
140+
"exactness",
141+
"sort"
142+
],
143+
"stopWords": [
144+
"a", "and", "as", "at", "be", "but", "by",
145+
"do", "does", "doesn't", "for", "from",
146+
"in", "is", "it", "no", "nor", "not",
147+
"of", "off", "on", "or",
148+
"so", "should", "than", "that", "that's", "the",
149+
"then", "there", "there's", "these",
150+
"this", "those", "to", "too",
151+
"up", "was", "wasn't", "what", "what's", "when", "when's",
152+
"where", "where's", "which", "while", "who", "who's",
153+
"with", "won't", "would", "wouldn't", "example", "option"
154+
],
155+
"nonSeparatorTokens": [],
156+
"separatorTokens": [],
157+
"dictionary": [],
158+
"synonyms": {
159+
"add-on": ["addon"],
160+
"addon": ["add-on"],
161+
"fieldtype": ["field type"],
162+
"field type": ["fieldtype"]
163+
},
164+
"distinctAttribute": null,
165+
"typoTolerance": {
166+
"enabled": true,
167+
"minWordSizeForTypos": {
168+
"oneTypo": 5,
169+
"twoTypos": 9
170+
},
171+
"disableOnWords": [],
172+
"disableOnAttributes": []
173+
},
174+
"faceting": {
175+
"maxValuesPerFacet": 100
176+
},
177+
"pagination": {
178+
"maxTotalHits": 50
179+
},
180+
"proximityPrecision": "byWord",
181+
"searchCutoffMs": null
182+
}
183+
}

0 commit comments

Comments
 (0)