Skip to content

Commit db21b66

Browse files
fern-supportKapil GowruKapil Gowru
authored
Scribe issue turbopuffer (#222)
Co-authored-by: Kapil Gowru <[email protected]> Co-authored-by: Kapil Gowru <[email protected]>
1 parent 0bd750a commit db21b66

File tree

2 files changed

+98
-38
lines changed

2 files changed

+98
-38
lines changed

.github/scripts/fern-scribe.js

Lines changed: 96 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
const { Octokit } = require('@octokit/rest');
2+
const Turbopuffer = require('@turbopuffer/turbopuffer').default;
23
const fs = require('fs').promises;
34
const path = require('path');
45

56
class FernScribe {
67
constructor() {
78
this.octokit = new Octokit({ auth: process.env.GITHUB_TOKEN });
8-
this.turbopufferEndpoint = process.env.TURBOPUFFER_ENDPOINT;
9-
this.turbopufferApiKey = process.env.TURBOPUFFER_API_KEY;
9+
this.turbopuffer = new Turbopuffer({
10+
apiKey: process.env.TURBOPUFFER_API_KEY,
11+
region: "gcp-us-east4",
12+
});
1013
this.anthropicApiKey = process.env.ANTHROPIC_API_KEY;
1114
this.slackToken = process.env.SLACK_USER_TOKEN;
1215

@@ -303,6 +306,39 @@ class FernScribe {
303306
}
304307
}
305308

309+
reciprocalRankFusion(semanticResults, bm25Results) {
310+
const k = 60; // RRF constant
311+
const combinedScores = new Map();
312+
313+
// Add semantic results with RRF scoring
314+
semanticResults.forEach((result, index) => {
315+
const score = 1 / (k + index + 1);
316+
const id = result.id;
317+
if (id) {
318+
combinedScores.set(id, { result, score });
319+
}
320+
});
321+
322+
// Add BM25 results with RRF scoring
323+
bm25Results.forEach((result, index) => {
324+
const score = 1 / (k + index + 1);
325+
const id = result.id;
326+
if (id) {
327+
const existing = combinedScores.get(id);
328+
if (existing) {
329+
existing.score += score;
330+
} else {
331+
combinedScores.set(id, { result, score });
332+
}
333+
}
334+
});
335+
336+
// Sort by combined score and return results
337+
return Array.from(combinedScores.values())
338+
.sort((a, b) => b.score - a.score)
339+
.map(item => item.result);
340+
}
341+
306342
async queryTurbopuffer(query, opts = {}) {
307343
if (!query || query.trimStart().length === 0) {
308344
console.log('🔧 Empty query provided to Turbopuffer');
@@ -312,46 +348,69 @@ class FernScribe {
312348
try {
313349
console.log('🔧 Querying Turbopuffer with options:', JSON.stringify(opts, null, 2));
314350

351+
const {
352+
namespace,
353+
topK = 10,
354+
mode = "hybrid",
355+
documentIdsToIgnore = [],
356+
urlsToIgnore = []
357+
} = opts;
358+
359+
const ns = this.turbopuffer.namespace(namespace);
360+
315361
// Create embedding for the query
316-
const embeddingResponse = await this.createEmbedding(query);
317-
if (!embeddingResponse) {
362+
const vector = await this.createEmbedding(query);
363+
if (!vector) {
318364
console.error('🔧 Failed to create embedding for query');
319365
return [];
320366
}
321367

322-
const requestBody = {
323-
query_embedding: embeddingResponse,
324-
top_k: opts.topK || 10,
325-
namespace: opts.namespace,
326-
...(opts.documentIdsToIgnore && { document_ids_to_ignore: opts.documentIdsToIgnore }),
327-
...(opts.urlsToIgnore && { urls_to_ignore: opts.urlsToIgnore })
328-
};
329-
330-
console.log('🔧 Turbopuffer request body (without embedding):', {
331-
...requestBody,
332-
query_embedding: `[${embeddingResponse.length} dimensions]`
333-
});
334-
335-
const response = await fetch(this.turbopufferEndpoint, {
336-
method: 'POST',
337-
headers: {
338-
'Authorization': `Bearer ${this.turbopufferApiKey}`,
339-
'Content-Type': 'application/json'
340-
},
341-
body: JSON.stringify(requestBody)
342-
});
343-
344-
if (!response.ok) {
345-
const errorText = await response.text();
346-
console.error('🔧 Turbopuffer API error details:', errorText);
347-
throw new Error(`Turbopuffer API error: ${response.status}`);
348-
}
349-
350-
const data = await response.json();
351-
console.log('🔧 Turbopuffer response structure:', Object.keys(data));
352-
console.log('🔧 Turbopuffer results count:', data.results?.length || 0);
368+
// Build filters
369+
const documentIdFilters = documentIdsToIgnore.map((id) => ["id", "NotEq", id]);
370+
const urlFilters = urlsToIgnore.map((url) => ["url", "NotEq", url]);
371+
372+
const allFilters = [...documentIdFilters, ...urlFilters];
373+
const queryFilters = allFilters.length > 0
374+
? (allFilters.length === 1 ? allFilters[0] : ["And", allFilters])
375+
: undefined;
376+
377+
console.log('🔧 Turbopuffer query filters:', queryFilters);
378+
379+
// Semantic search (vector similarity)
380+
const semanticResponse = mode !== "bm25" ? await ns.query({
381+
rank_by: ["vector", "ANN", vector],
382+
top_k: topK,
383+
include_attributes: true,
384+
filters: queryFilters,
385+
}) : { rows: [] };
386+
387+
// BM25 search (keyword matching) - search across multiple text fields
388+
const bm25Response = mode !== "semantic" && query.length < 1024 ? await ns.query({
389+
rank_by: [
390+
"Sum",
391+
[
392+
["chunk", "BM25", query],
393+
["title", "BM25", query],
394+
["keywords", "BM25", query],
395+
],
396+
],
397+
top_k: topK,
398+
include_attributes: true,
399+
filters: queryFilters,
400+
}) : { rows: [] };
401+
402+
const semanticResults = semanticResponse.rows || [];
403+
const bm25Results = bm25Response.rows || [];
404+
405+
console.log('🔧 Semantic results count:', semanticResults.length);
406+
console.log('🔧 BM25 results count:', bm25Results.length);
407+
408+
// Combine results using reciprocal rank fusion
409+
const fusedResults = this.reciprocalRankFusion(semanticResults, bm25Results);
410+
411+
console.log('🔧 Fused results count:', fusedResults.length);
353412

354-
return data.results || [];
413+
return fusedResults;
355414
} catch (error) {
356415
console.error('🔧 Turbopuffer query failed:', error);
357416
return [];
@@ -370,7 +429,7 @@ class FernScribe {
370429
'Content-Type': 'application/json'
371430
},
372431
body: JSON.stringify({
373-
model: 'text-embedding-3-small',
432+
model: 'text-embedding-3-large',
374433
input: text
375434
})
376435
});

.github/scripts/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@
55
"main": "fern-scribe.js",
66
"dependencies": {
77
"@octokit/rest": "^20.0.2",
8+
"@turbopuffer/turbopuffer": "^0.10.14",
89
"node-fetch": "^3.3.2"
910
},
1011
"engines": {
1112
"node": ">=18"
1213
}
13-
}
14+
}

0 commit comments

Comments
 (0)