Skip to content

Commit 4a0450d

Browse files
authored
fix(embeddings): modified embeddings utils to only index english docs (#2078)
1 parent 00ae718 commit 4a0450d

File tree

3 files changed

+36
-10
lines changed

3 files changed

+36
-10
lines changed

.github/workflows/ci.yml

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -198,10 +198,30 @@ jobs:
198198
"${IMAGE_BASE}:${{ github.sha }}-arm64"
199199
docker manifest push "${IMAGE_BASE}:${{ github.sha }}"
200200
201-
# Process docs embeddings (after ECR images are pushed)
201+
# Check if docs changed
202+
check-docs-changes:
203+
name: Check Docs Changes
204+
runs-on: blacksmith-4vcpu-ubuntu-2404
205+
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
206+
outputs:
207+
docs_changed: ${{ steps.filter.outputs.docs }}
208+
steps:
209+
- uses: actions/checkout@v4
210+
with:
211+
fetch-depth: 2 # Need at least 2 commits to detect changes
212+
- uses: dorny/paths-filter@v3
213+
id: filter
214+
with:
215+
filters: |
216+
docs:
217+
- 'apps/docs/content/docs/en/**'
218+
- 'apps/sim/scripts/process-docs.ts'
219+
- 'apps/sim/lib/chunkers/**'
220+
221+
# Process docs embeddings (only when docs change, after ECR images are pushed)
202222
process-docs:
203223
name: Process Docs
204-
needs: build-amd64
205-
if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/staging')
224+
needs: [build-amd64, check-docs-changes]
225+
if: needs.check-docs-changes.outputs.docs_changed == 'true'
206226
uses: ./.github/workflows/docs-embeddings.yml
207227
secrets: inherit

.github/workflows/docs-embeddings.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ jobs:
88
process-docs-embeddings:
99
name: Process Documentation Embeddings
1010
runs-on: blacksmith-8vcpu-ubuntu-2404
11-
if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/staging'
11+
if: github.ref == 'refs/heads/main'
1212

1313
steps:
1414
- name: Checkout code
@@ -41,6 +41,6 @@ jobs:
4141
- name: Process docs embeddings
4242
working-directory: ./apps/sim
4343
env:
44-
DATABASE_URL: ${{ github.ref == 'refs/heads/main' && secrets.DATABASE_URL || secrets.STAGING_DATABASE_URL }}
44+
DATABASE_URL: ${{ secrets.DATABASE_URL }}
4545
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
4646
run: bun run scripts/process-docs.ts --clear

apps/sim/scripts/process-docs.ts

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ interface ProcessingOptions {
3434
*/
3535
async function processDocs(options: ProcessingOptions = {}) {
3636
const config = {
37-
docsPath: options.docsPath || path.join(process.cwd(), '../../apps/docs/content/docs'),
37+
docsPath: options.docsPath || path.join(process.cwd(), '../../apps/docs/content/docs/en'),
3838
baseUrl: options.baseUrl || (isDev ? 'http://localhost:4000' : 'https://docs.sim.ai'),
3939
chunkSize: options.chunkSize || 1024,
4040
minChunkSize: options.minChunkSize || 100,
@@ -216,25 +216,31 @@ async function main() {
216216
217217
Usage: bun run process-docs.ts [options]
218218
219+
By default, processes English (en) documentation only.
220+
Note: Use --clear flag when changing language scope to remove old embeddings.
221+
219222
Options:
220223
--clear Clear existing embeddings before processing
221224
--dry-run Process and display results without saving to DB
222225
--verbose Show detailed output including text previews
223-
--path <path> Custom path to docs directory
226+
--path <path> Custom path to docs directory (default: docs/en)
224227
--url <url> Custom base URL for links
225228
--chunk-size <n> Custom chunk size in tokens (default: 1024)
226229
--help, -h Show this help message
227230
228231
Examples:
229-
# Dry run to test chunking
232+
# Dry run to test chunking (English docs)
230233
bun run process-docs.ts --dry-run
231234
232-
# Process and save to database
235+
# Process and save to database (English docs)
233236
bun run process-docs.ts
234237
235-
# Clear existing and reprocess
238+
# Clear existing and reprocess (English docs)
236239
bun run process-docs.ts --clear
237240
241+
# Process a different language
242+
bun run process-docs.ts --path ../../apps/docs/content/docs/es
243+
238244
# Custom path with verbose output
239245
bun run process-docs.ts --path ./my-docs --verbose
240246
`)

0 commit comments

Comments
 (0)