Skip to content

Commit 89d3bf8

Browse files
committed
Pre-build and commit search indexes
1 parent a6309c4 commit 89d3bf8

File tree

11 files changed

+350
-45
lines changed

11 files changed

+350
-45
lines changed
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
name: "Rebuild Search Index"
2+
3+
# Runs when data files change on staging
4+
# Rebuilds indexes, commits them, then triggers the build workflow
5+
on:
6+
push:
7+
branches:
8+
- staging
9+
paths:
10+
- 'data/**'
11+
12+
jobs:
13+
rebuild-index:
14+
runs-on: ubuntu-latest
15+
steps:
16+
- name: Check out repository
17+
uses: actions/checkout@v4
18+
with:
19+
token: ${{ secrets.GITHUB_TOKEN }}
20+
21+
- name: Set up Python
22+
uses: actions/setup-python@v5
23+
with:
24+
python-version: '3.11'
25+
26+
- name: Install pipenv
27+
run: pip install pipenv
28+
29+
- name: Cache pipenv virtualenv
30+
uses: actions/cache@v4
31+
with:
32+
path: ~/.local/share/virtualenvs
33+
key: ${{ runner.os }}-pipenv-${{ hashFiles('Pipfile.lock') }}
34+
35+
- name: Install dependencies
36+
run: pipenv install --deploy
37+
38+
- name: Download spaCy model
39+
run: pipenv run python -m spacy download en_core_web_md
40+
41+
- name: Check if indexes are stale
42+
id: check
43+
run: |
44+
if pipenv run python manage.py indexctl check; then
45+
echo "stale=false" >> $GITHUB_OUTPUT
46+
else
47+
echo "stale=true" >> $GITHUB_OUTPUT
48+
fi
49+
env:
50+
SECRET_KEY: "github-actions-build-key"
51+
52+
- name: Rebuild and pack indexes
53+
if: steps.check.outputs.stale == 'true'
54+
run: |
55+
echo "Indexes are stale, rebuilding..."
56+
pipenv run python manage.py quicksetup --rebuild-index
57+
pipenv run python manage.py indexctl pack
58+
env:
59+
SECRET_KEY: "github-actions-build-key"
60+
61+
- name: Commit updated indexes
62+
if: steps.check.outputs.stale == 'true'
63+
run: |
64+
git config user.name "github-actions[bot]"
65+
git config user.email "github-actions[bot]@users.noreply.github.com"
66+
git add search/indexes/ search/index_manifest.json
67+
if git diff --staged --quiet; then
68+
echo "No changes to commit"
69+
else
70+
git commit -m "Rebuild search indexes
71+
72+
Triggered by data changes in ${{ github.sha }}"
73+
git push
74+
fi
75+

.gitignore

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,10 @@ db.sqlite3-journal
1919
# Django static files
2020
staticfiles/
2121

22-
# Search indexes
22+
# Search indexes (unpacked - these are generated from search/indexes/)
2323
server/whoosh_index/
2424
whoosh_index/
25+
server/vector_index.pkl
2526

2627
# IDE and editor configurations
2728
.vscode/
@@ -33,7 +34,5 @@ api/tests/approved_files/*.recieved.*
3334
# Generated files
3435
openapi-schema.yml
3536

36-
server/vector_index.pkl
37-
3837
# Temporary files for magic items update
3938
temp_magic_items/

README.md

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -74,24 +74,31 @@ pipenv install --dev
7474

7575
## Build
7676

77-
Crate a local database and import game content.
77+
Create a local database, import game content, and set up search indexes:
78+
7879
```bash
79-
pipenv run python manage.py quicksetup --noindex
80+
pipenv run python manage.py quicksetup
8081
```
8182

82-
To make sure the API is always using your updated code, this command must be run again if:
83-
- You add/remove/edit Game Content
84-
- You edit Python code
85-
- You switch git branches
83+
Run this again if you switch git branches or pull new changes.
8684

8785

8886
### Search Indexing
8987

90-
To use the search function, you must build the search index by running the above command without the `--noindex` flag.
88+
Search indexes are pre-built and included in the repo. Running `quicksetup` unpacks them automatically:
89+
9190
```bash
9291
pipenv run python manage.py quicksetup
9392
```
9493

94+
If you've changed data in `data/`, rebuild the indexes before committing:
95+
96+
```bash
97+
pipenv run python manage.py quickindex
98+
```
99+
100+
This takes 2-3 minutes and updates `search/indexes/` which should be committed with your data changes.
101+
95102
## Run
96103

97104
Run the server locally. This server is only for development and shall __not__ be used in production. The server will be available at `http://localhost:8000`.

api/management/commands/quicksetup.py

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,12 @@ def add_arguments(self, parser: argparse.ArgumentParser):
1717
parser.add_argument(
1818
"--noindex",
1919
action="store_true",
20-
help="Skip building search indexes.",
20+
help="Skip unpacking/building search indexes.",
21+
)
22+
parser.add_argument(
23+
"--rebuild-index",
24+
action="store_true",
25+
help="Force rebuild indexes from data (slow, ~2-3 min).",
2126
)
2227
parser.add_argument(
2328
"--clean",
@@ -67,12 +72,14 @@ def handle(self, *args, **options):
6772
))
6873
return
6974

70-
if not options['noindex']:
71-
if settings.BUILD_V2_INDEX:
72-
self.stdout.write('Building the search index...')
73-
build_search_index()
74-
else:
75-
self.stdout.write('Skipping index build because of --noindex.')
75+
if options['noindex']:
76+
self.stdout.write('Skipping indexes because of --noindex.')
77+
elif options['rebuild_index']:
78+
self.stdout.write('Rebuilding search indexes from data (this takes 2-3 min)...')
79+
build_search_index()
80+
elif settings.BUILD_V2_INDEX:
81+
self.stdout.write('Setting up search indexes...')
82+
setup_search_index()
7683

7784
self.stdout.write(self.style.SUCCESS('API setup complete.'))
7885

@@ -121,3 +128,18 @@ def collect_static():
121128

122129
def build_search_index():
123130
call_command('buildindex', '--v1', '--v2')
131+
132+
133+
def setup_search_index():
134+
"""Unpack pre-built indexes if available, otherwise build from scratch."""
135+
archive_dir = Path("search/indexes")
136+
whoosh_archive = archive_dir / "whoosh_index.tar.gz"
137+
vector_archive = archive_dir / "vector_index.pkl.gz"
138+
139+
if whoosh_archive.exists() and vector_archive.exists():
140+
print("Unpacking pre-built indexes...")
141+
call_command('indexctl', 'unpack')
142+
else:
143+
print("No pre-built indexes found, building from scratch...")
144+
print("(This takes 2-3 min. Run 'manage.py indexctl pack' to avoid this next time.)")
145+
build_search_index()

search/index_manifest.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"data_hash": "1d4de910e94fdbc038b2168426aea7ad",
3+
"version": 1
4+
}

search/indexes/vector_index.pkl.gz

19.3 MB
Binary file not shown.

search/indexes/whoosh_index.tar.gz

17.1 MB
Binary file not shown.

search/management/commands/buildindex.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,16 @@
88
from django.core.management import call_command
99
from django.core.management.base import BaseCommand
1010

11-
from django.db import connection, transaction
11+
from django.db import connection
1212

1313
from api import models as v1
1414
from api_v2 import models as v2
1515
from search import models as search
1616

1717
class Command(BaseCommand):
18-
"""Implementation for the `manage.py `index_v1` subcommand."""
18+
"""Build search indexes for text, fuzzy, and semantic search."""
1919

20-
help = 'Build the v1 search index.'
20+
help = 'Build the search indexes (FTS, Whoosh, and vector).'
2121

2222
def add_arguments(self, parser: argparse.ArgumentParser):
2323
"""Define arguments for the `manage.py quicksetup` subcommand."""
@@ -194,7 +194,7 @@ def build_vector_index(self):
194194
print("Saving vector index to disk...")
195195

196196
index_data = {
197-
"names": all_names,
197+
"names": all_names,
198198
"metadata": all_metadata,
199199
"embeddings": embeddings,
200200
"vector_size": nlp.vocab.vectors_length

0 commit comments

Comments
 (0)