@@ -16,10 +16,10 @@ const sqliteAIBaseUrl = "https://aiserver.vital-rhino.eks.euc1.ryujaz.sqlite.clo
1616const sqliteAIAPI = "/v1/ai/embeddings"
1717//-----------------------
1818
19- const requestid = request . params . requestid ;
2019const query = request . params . query ;
20+ const limit = parseInt ( request . params . limit ) || 10 ; // Number of top results to return
2121
22- // get embedding from sqlite-ai-server
22+ // Get embedding from sqlite-ai-server
2323const data = { "text" : query } ;
2424const response = await fetch ( sqliteAIBaseUrl + sqliteAIAPI , {
2525 method : "POST" ,
@@ -36,26 +36,21 @@ if (!response.ok) {
3636const result = await response . json ( ) ;
3737const query_embedding = result . data . embedding ;
3838
39- // clean query for full-text search
39+ // Clean query for full-text search
4040const query_fts = ( query . toLowerCase ( ) . match ( / \b \w + \b / g) || [ ] ) . join ( " " ) + "*" ;
4141
42- // --- TEST ---
43- //const test_embedding = await connection.sql('SELECT embedding FROM chunks LIMIT 1;');
44- //const query_embedding = test_embedding[0].embedding;
45- // ------------
46-
4742// Vector configuration must match the embedding parameters used during database generation
4843await connection . sql ( "SELECT vector_init('chunks', 'embedding', 'type=INT8,dimension=768,distance=cosine')" ) ;
4944
5045const res = await connection . sql (
5146 `
52- -- sqlite-vector KNN vector search results
47+ -- sqlite-vector KNN vector search results
5348 WITH vec_matches AS (
5449 SELECT
5550 v.rowid AS chunk_id,
5651 row_number() OVER (ORDER BY v.distance) AS rank_number,
5752 v.distance
58- FROM vector_quantize_scan('chunks', 'embedding', ?, 10 ) AS v
53+ FROM vector_quantize_scan('chunks', 'embedding', ?, ? ) AS v
5954 ),
6055 -- Full-text search results
6156 fts_matches AS (
@@ -65,7 +60,7 @@ const res = await connection.sql(
6560 rank AS score
6661 FROM chunks_fts
6762 WHERE chunks_fts MATCH ?
68- LIMIT 10
63+ LIMIT ?
6964 ),
7065 -- combine FTS5 + vector search results with RRF
7166 matches AS (
@@ -84,28 +79,70 @@ const res = await connection.sql(
8479 FULL OUTER JOIN fts_matches
8580 ON vec_matches.chunk_id = fts_matches.chunk_id
8681 )
87- SELECT
88- documents.id,
89- documents.uri,
90- documents.content as document_content,
91- documents.metadata,
92- chunks.content AS snippet,
93- vec_rank,
94- fts_rank,
95- combined_rank,
96- vec_distance,
97- fts_score
98- FROM matches
99- JOIN chunks ON chunks.id = matches.chunk_id
100- JOIN documents ON documents.id = chunks.document_id
82+ SELECT
83+ documents.id,
84+ documents.uri,
85+ documents.content as document_content,
86+ documents.metadata,
87+ chunks.content AS snippet,
88+ vec_rank,
89+ fts_rank,
90+ combined_rank,
91+ vec_distance,
92+ fts_score
93+ FROM matches
94+ JOIN chunks ON chunks.id = matches.chunk_id
95+ JOIN documents ON documents.id = chunks.document_id
10196 ORDER BY combined_rank DESC
10297 ;
103- ` , query_embedding , query_fts )
98+ ` , query_embedding , limit , query_fts , limit )
99+
100+ // The results from the query contain may have multiple resulted chunks per document.
101+ // We want to return one result per document, so we will group by document id and take
102+ // the top-ranked chunk as a snippet.
103+ const documentsChunk = new Map ( ) ;
104+ res . forEach ( item => {
105+ if ( ! documentsChunk . has ( item . id ) || item . combined_rank > documentsChunk . get ( item . id ) . combined_rank ) {
106+ documentsChunk . set ( item . id , item ) ;
107+ }
108+ } ) ;
109+ const topResults = Array . from ( documentsChunk . values ( ) ) . slice ( 0 , limit ) ;
104110
111+ // ----- URLs for results -----
112+ // Customize this section based on how URLs should be constructed for your documents.
113+ // This example uses 'base_url' from metadata and 'slug' if available, otherwise derives from URI.
114+ // ----------------------------
115+ const resultsWithUrls = topResults
116+ . map ( item => {
117+ const metadata = JSON . parse ( item . metadata ) ;
118+ const baseUrl = metadata . base_url ;
119+ const slug = metadata . extracted ?. slug ;
120+ const uri = item . uri ;
121+
122+ let fullUrl ;
123+ if ( slug ) {
124+ fullUrl = `${ baseUrl } ${ slug } ` ;
125+ } else {
126+ const uriWithoutExtension = uri
127+ . toLowerCase ( )
128+ . replace ( / \. ( m d x ? | m d ) $ / i, '' ) ;
129+ fullUrl = `${ baseUrl } ${ uriWithoutExtension } ` ;
130+ }
131+
132+ return {
133+ id : item . id ,
134+ url : fullUrl ,
135+ title : metadata . extracted ?. title || metadata . generated ?. title ,
136+ snippet : item . snippet ,
137+ } ;
138+ } ) ;
105139
106140return {
107141 data : {
108- search : res ,
109- requestid : requestid
142+ /**
143+ * @type {Array<{id: number, url: string, title: string, snippet: string}> }
144+ * The search results with constructed URLs, titles, and snippets.
145+ */
146+ search : resultsWithUrls
110147 }
111148}
0 commit comments