11const { Octokit } = require ( '@octokit/rest' ) ;
2+ const Turbopuffer = require ( '@turbopuffer/turbopuffer' ) . default ;
23const fs = require ( 'fs' ) . promises ;
34const path = require ( 'path' ) ;
45
56class FernScribe {
67 constructor ( ) {
78 this . octokit = new Octokit ( { auth : process . env . GITHUB_TOKEN } ) ;
8- this . turbopufferEndpoint = process . env . TURBOPUFFER_ENDPOINT ;
9- this . turbopufferApiKey = process . env . TURBOPUFFER_API_KEY ;
9+ this . turbopuffer = new Turbopuffer ( {
10+ apiKey : process . env . TURBOPUFFER_API_KEY ,
11+ region : "gcp-us-east4" ,
12+ } ) ;
1013 this . anthropicApiKey = process . env . ANTHROPIC_API_KEY ;
1114 this . slackToken = process . env . SLACK_USER_TOKEN ;
1215
@@ -303,6 +306,39 @@ class FernScribe {
303306 }
304307 }
305308
309+ reciprocalRankFusion ( semanticResults , bm25Results ) {
310+ const k = 60 ; // RRF constant
311+ const combinedScores = new Map ( ) ;
312+
313+ // Add semantic results with RRF scoring
314+ semanticResults . forEach ( ( result , index ) => {
315+ const score = 1 / ( k + index + 1 ) ;
316+ const id = result . id ;
317+ if ( id ) {
318+ combinedScores . set ( id , { result, score } ) ;
319+ }
320+ } ) ;
321+
322+ // Add BM25 results with RRF scoring
323+ bm25Results . forEach ( ( result , index ) => {
324+ const score = 1 / ( k + index + 1 ) ;
325+ const id = result . id ;
326+ if ( id ) {
327+ const existing = combinedScores . get ( id ) ;
328+ if ( existing ) {
329+ existing . score += score ;
330+ } else {
331+ combinedScores . set ( id , { result, score } ) ;
332+ }
333+ }
334+ } ) ;
335+
336+ // Sort by combined score and return results
337+ return Array . from ( combinedScores . values ( ) )
338+ . sort ( ( a , b ) => b . score - a . score )
339+ . map ( item => item . result ) ;
340+ }
341+
306342 async queryTurbopuffer ( query , opts = { } ) {
307343 if ( ! query || query . trimStart ( ) . length === 0 ) {
308344 console . log ( '🔧 Empty query provided to Turbopuffer' ) ;
@@ -312,46 +348,69 @@ class FernScribe {
312348 try {
313349 console . log ( '🔧 Querying Turbopuffer with options:' , JSON . stringify ( opts , null , 2 ) ) ;
314350
351+ const {
352+ namespace,
353+ topK = 10 ,
354+ mode = "hybrid" ,
355+ documentIdsToIgnore = [ ] ,
356+ urlsToIgnore = [ ]
357+ } = opts ;
358+
359+ const ns = this . turbopuffer . namespace ( namespace ) ;
360+
315361 // Create embedding for the query
316- const embeddingResponse = await this . createEmbedding ( query ) ;
317- if ( ! embeddingResponse ) {
362+ const vector = await this . createEmbedding ( query ) ;
363+ if ( ! vector ) {
318364 console . error ( '🔧 Failed to create embedding for query' ) ;
319365 return [ ] ;
320366 }
321367
322- const requestBody = {
323- query_embedding : embeddingResponse ,
324- top_k : opts . topK || 10 ,
325- namespace : opts . namespace ,
326- ...( opts . documentIdsToIgnore && { document_ids_to_ignore : opts . documentIdsToIgnore } ) ,
327- ...( opts . urlsToIgnore && { urls_to_ignore : opts . urlsToIgnore } )
328- } ;
329-
330- console . log ( '🔧 Turbopuffer request body (without embedding):' , {
331- ...requestBody ,
332- query_embedding : `[${ embeddingResponse . length } dimensions]`
333- } ) ;
334-
335- const response = await fetch ( this . turbopufferEndpoint , {
336- method : 'POST' ,
337- headers : {
338- 'Authorization' : `Bearer ${ this . turbopufferApiKey } ` ,
339- 'Content-Type' : 'application/json'
340- } ,
341- body : JSON . stringify ( requestBody )
342- } ) ;
343-
344- if ( ! response . ok ) {
345- const errorText = await response . text ( ) ;
346- console . error ( '🔧 Turbopuffer API error details:' , errorText ) ;
347- throw new Error ( `Turbopuffer API error: ${ response . status } ` ) ;
348- }
349-
350- const data = await response . json ( ) ;
351- console . log ( '🔧 Turbopuffer response structure:' , Object . keys ( data ) ) ;
352- console . log ( '🔧 Turbopuffer results count:' , data . results ?. length || 0 ) ;
368+ // Build filters
369+ const documentIdFilters = documentIdsToIgnore . map ( ( id ) => [ "id" , "NotEq" , id ] ) ;
370+ const urlFilters = urlsToIgnore . map ( ( url ) => [ "url" , "NotEq" , url ] ) ;
371+
372+ const allFilters = [ ...documentIdFilters , ...urlFilters ] ;
373+ const queryFilters = allFilters . length > 0
374+ ? ( allFilters . length === 1 ? allFilters [ 0 ] : [ "And" , allFilters ] )
375+ : undefined ;
376+
377+ console . log ( '🔧 Turbopuffer query filters:' , queryFilters ) ;
378+
379+ // Semantic search (vector similarity)
380+ const semanticResponse = mode !== "bm25" ? await ns . query ( {
381+ rank_by : [ "vector" , "ANN" , vector ] ,
382+ top_k : topK ,
383+ include_attributes : true ,
384+ filters : queryFilters ,
385+ } ) : { rows : [ ] } ;
386+
387+ // BM25 search (keyword matching) - search across multiple text fields
388+ const bm25Response = mode !== "semantic" && query . length < 1024 ? await ns . query ( {
389+ rank_by : [
390+ "Sum" ,
391+ [
392+ [ "chunk" , "BM25" , query ] ,
393+ [ "title" , "BM25" , query ] ,
394+ [ "keywords" , "BM25" , query ] ,
395+ ] ,
396+ ] ,
397+ top_k : topK ,
398+ include_attributes : true ,
399+ filters : queryFilters ,
400+ } ) : { rows : [ ] } ;
401+
402+ const semanticResults = semanticResponse . rows || [ ] ;
403+ const bm25Results = bm25Response . rows || [ ] ;
404+
405+ console . log ( '🔧 Semantic results count:' , semanticResults . length ) ;
406+ console . log ( '🔧 BM25 results count:' , bm25Results . length ) ;
407+
408+ // Combine results using reciprocal rank fusion
409+ const fusedResults = this . reciprocalRankFusion ( semanticResults , bm25Results ) ;
410+
411+ console . log ( '🔧 Fused results count:' , fusedResults . length ) ;
353412
354- return data . results || [ ] ;
413+ return fusedResults ;
355414 } catch ( error ) {
356415 console . error ( '🔧 Turbopuffer query failed:' , error ) ;
357416 return [ ] ;
@@ -370,7 +429,7 @@ class FernScribe {
370429 'Content-Type' : 'application/json'
371430 } ,
372431 body : JSON . stringify ( {
373- model : 'text-embedding-3-small ' ,
432+ model : 'text-embedding-3-large ' ,
374433 input : text
375434 } )
376435 } ) ;
0 commit comments