diff --git a/plugins/tantivy/js/bindings.gen.ts b/plugins/tantivy/js/bindings.gen.ts index 5c8ae58f3a..1db2690f03 100644 --- a/plugins/tantivy/js/bindings.gen.ts +++ b/plugins/tantivy/js/bindings.gen.ts @@ -60,11 +60,11 @@ async removeDocument(id: string, collection: string | null) : Promise> Tantivy<'a, R, M> { let use_fuzzy = request.options.fuzzy.unwrap_or(false); + // Title boost factor (3x) to match Orama's title:3, content:1 behavior + const TITLE_BOOST: f32 = 3.0; + let mut combined_query: Box = if use_fuzzy { let distance = request.options.distance.unwrap_or(1); let terms: Vec<&str> = request.query.split_whitespace().collect(); - let mut subqueries: Vec<(Occur, Box)> = Vec::new(); + let mut term_queries: Vec<(Occur, Box)> = Vec::new(); + // For each term, create a Must clause that requires the term to match + // in either title OR content (with title boosted) for term in terms { let title_fuzzy = FuzzyTermQuery::new(Term::from_field_text(fields.title, term), distance, true); @@ -107,16 +115,28 @@ impl<'a, R: tauri::Runtime, M: tauri::Manager> Tantivy<'a, R, M> { true, ); - subqueries.push((Occur::Should, Box::new(title_fuzzy))); - subqueries.push((Occur::Should, Box::new(content_fuzzy))); + // Boost title matches by 3x + let boosted_title: Box = + Box::new(BoostQuery::new(Box::new(title_fuzzy), TITLE_BOOST)); + let content_query: Box = Box::new(content_fuzzy); + + // Each term must match in at least one field (title OR content) + let term_field_query = BooleanQuery::new(vec![ + (Occur::Should, boosted_title), + (Occur::Should, content_query), + ]); + + // All terms must be present (Must for each term) + term_queries.push((Occur::Must, Box::new(term_field_query))); } - Box::new(BooleanQuery::new(subqueries)) + Box::new(BooleanQuery::new(term_queries)) } else { let query_parser = QueryParser::for_index(index, vec![fields.title, fields.content]); query_parser.parse_query(&request.query)? }; + // Apply created_at filter if let Some(ref created_at_filter) = request.filters.created_at { let range_query = build_created_at_range_query(fields.created_at, created_at_filter); if let Some(rq) = range_query { @@ -127,7 +147,21 @@ impl<'a, R: tauri::Runtime, M: tauri::Manager> Tantivy<'a, R, M> { } } - let top_docs = searcher.search(&combined_query, &TopDocs::with_limit(request.limit))?; + // Apply doc_type filter + if let Some(ref doc_type) = request.filters.doc_type { + let doc_type_term = Term::from_field_text(fields.doc_type, doc_type); + let doc_type_query = TermQuery::new(doc_type_term, IndexRecordOption::Basic); + combined_query = Box::new(BooleanQuery::new(vec![ + (Occur::Must, combined_query), + (Occur::Must, Box::new(doc_type_query)), + ])); + } + + // Use tuple collector to get both top docs and total count + let (top_docs, count) = searcher.search( + &combined_query, + &(TopDocs::with_limit(request.limit), Count), + )?; let mut hits = Vec::new(); for (score, doc_address) in top_docs { @@ -141,7 +175,7 @@ impl<'a, R: tauri::Runtime, M: tauri::Manager> Tantivy<'a, R, M> { } } - Ok(SearchResult { hits }) + Ok(SearchResult { hits, count }) } pub async fn reindex(&self, collection: Option) -> Result<(), crate::Error> { diff --git a/plugins/tantivy/src/lib.rs b/plugins/tantivy/src/lib.rs index aa408db75d..a41e0a2c43 100644 --- a/plugins/tantivy/src/lib.rs +++ b/plugins/tantivy/src/lib.rs @@ -38,6 +38,7 @@ pub struct SearchHit { #[derive(Debug, Clone, Serialize, Deserialize, specta::Type)] pub struct SearchResult { pub hits: Vec, + pub count: usize, } #[derive(Debug, Clone, Default, Serialize, Deserialize, specta::Type)] @@ -52,6 +53,7 @@ pub struct CreatedAtFilter { #[derive(Debug, Clone, Default, Serialize, Deserialize, specta::Type)] pub struct SearchFilters { pub created_at: Option, + pub doc_type: Option, } #[derive(Debug, Clone, Default, Serialize, Deserialize, specta::Type)]