Skip to content

Commit d5fdd78

Browse files
feat(tantivy): Phase 2 feature parity with Orama (#2801)
Co-authored-by: yujonglee <yujonglee.dev@gmail.com> Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
1 parent d261e13 commit d5fdd78

File tree

3 files changed

+46
-10
lines changed

3 files changed

+46
-10
lines changed

plugins/tantivy/js/bindings.gen.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,11 @@ async removeDocument(id: string, collection: string | null) : Promise<Result<nul
6060

6161
export type CreatedAtFilter = { gte: number | null; lte: number | null; gt: number | null; lt: number | null; eq: number | null }
6262
export type SearchDocument = { id: string; doc_type: string; language: string | null; title: string; content: string; created_at: number }
63-
export type SearchFilters = { created_at: CreatedAtFilter | null }
63+
export type SearchFilters = { created_at: CreatedAtFilter | null; doc_type: string | null }
6464
export type SearchHit = { score: number; document: SearchDocument }
6565
export type SearchOptions = { fuzzy: boolean | null; distance: number | null }
6666
export type SearchRequest = { query: string; collection?: string | null; filters?: SearchFilters; limit?: number; options?: SearchOptions }
67-
export type SearchResult = { hits: SearchHit[] }
67+
export type SearchResult = { hits: SearchHit[]; count: number }
6868

6969
/** tauri-specta globals **/
7070

plugins/tantivy/src/ext.rs

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1-
use tantivy::collector::TopDocs;
2-
use tantivy::query::{BooleanQuery, FuzzyTermQuery, Occur, Query, QueryParser};
1+
use tantivy::collector::{Count, TopDocs};
2+
use tantivy::query::{
3+
BooleanQuery, BoostQuery, FuzzyTermQuery, Occur, Query, QueryParser, TermQuery,
4+
};
5+
use tantivy::schema::IndexRecordOption;
36
use tantivy::{Index, ReloadPolicy, TantivyDocument, Term};
47
use tauri_plugin_path2::Path2PluginExt;
58

@@ -93,11 +96,16 @@ impl<'a, R: tauri::Runtime, M: tauri::Manager<R>> Tantivy<'a, R, M> {
9396

9497
let use_fuzzy = request.options.fuzzy.unwrap_or(false);
9598

99+
// Title boost factor (3x) to match Orama's title:3, content:1 behavior
100+
const TITLE_BOOST: f32 = 3.0;
101+
96102
let mut combined_query: Box<dyn Query> = if use_fuzzy {
97103
let distance = request.options.distance.unwrap_or(1);
98104
let terms: Vec<&str> = request.query.split_whitespace().collect();
99-
let mut subqueries: Vec<(Occur, Box<dyn Query>)> = Vec::new();
105+
let mut term_queries: Vec<(Occur, Box<dyn Query>)> = Vec::new();
100106

107+
// For each term, create a Must clause that requires the term to match
108+
// in either title OR content (with title boosted)
101109
for term in terms {
102110
let title_fuzzy =
103111
FuzzyTermQuery::new(Term::from_field_text(fields.title, term), distance, true);
@@ -107,16 +115,28 @@ impl<'a, R: tauri::Runtime, M: tauri::Manager<R>> Tantivy<'a, R, M> {
107115
true,
108116
);
109117

110-
subqueries.push((Occur::Should, Box::new(title_fuzzy)));
111-
subqueries.push((Occur::Should, Box::new(content_fuzzy)));
118+
// Boost title matches by 3x
119+
let boosted_title: Box<dyn Query> =
120+
Box::new(BoostQuery::new(Box::new(title_fuzzy), TITLE_BOOST));
121+
let content_query: Box<dyn Query> = Box::new(content_fuzzy);
122+
123+
// Each term must match in at least one field (title OR content)
124+
let term_field_query = BooleanQuery::new(vec![
125+
(Occur::Should, boosted_title),
126+
(Occur::Should, content_query),
127+
]);
128+
129+
// All terms must be present (Must for each term)
130+
term_queries.push((Occur::Must, Box::new(term_field_query)));
112131
}
113132

114-
Box::new(BooleanQuery::new(subqueries))
133+
Box::new(BooleanQuery::new(term_queries))
115134
} else {
116135
let query_parser = QueryParser::for_index(index, vec![fields.title, fields.content]);
117136
query_parser.parse_query(&request.query)?
118137
};
119138

139+
// Apply created_at filter
120140
if let Some(ref created_at_filter) = request.filters.created_at {
121141
let range_query = build_created_at_range_query(fields.created_at, created_at_filter);
122142
if let Some(rq) = range_query {
@@ -127,7 +147,21 @@ impl<'a, R: tauri::Runtime, M: tauri::Manager<R>> Tantivy<'a, R, M> {
127147
}
128148
}
129149

130-
let top_docs = searcher.search(&combined_query, &TopDocs::with_limit(request.limit))?;
150+
// Apply doc_type filter
151+
if let Some(ref doc_type) = request.filters.doc_type {
152+
let doc_type_term = Term::from_field_text(fields.doc_type, doc_type);
153+
let doc_type_query = TermQuery::new(doc_type_term, IndexRecordOption::Basic);
154+
combined_query = Box::new(BooleanQuery::new(vec![
155+
(Occur::Must, combined_query),
156+
(Occur::Must, Box::new(doc_type_query)),
157+
]));
158+
}
159+
160+
// Use tuple collector to get both top docs and total count
161+
let (top_docs, count) = searcher.search(
162+
&combined_query,
163+
&(TopDocs::with_limit(request.limit), Count),
164+
)?;
131165

132166
let mut hits = Vec::new();
133167
for (score, doc_address) in top_docs {
@@ -141,7 +175,7 @@ impl<'a, R: tauri::Runtime, M: tauri::Manager<R>> Tantivy<'a, R, M> {
141175
}
142176
}
143177

144-
Ok(SearchResult { hits })
178+
Ok(SearchResult { hits, count })
145179
}
146180

147181
pub async fn reindex(&self, collection: Option<String>) -> Result<(), crate::Error> {

plugins/tantivy/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ pub struct SearchHit {
3838
#[derive(Debug, Clone, Serialize, Deserialize, specta::Type)]
3939
pub struct SearchResult {
4040
pub hits: Vec<SearchHit>,
41+
pub count: usize,
4142
}
4243

4344
#[derive(Debug, Clone, Default, Serialize, Deserialize, specta::Type)]
@@ -52,6 +53,7 @@ pub struct CreatedAtFilter {
5253
#[derive(Debug, Clone, Default, Serialize, Deserialize, specta::Type)]
5354
pub struct SearchFilters {
5455
pub created_at: Option<CreatedAtFilter>,
56+
pub doc_type: Option<String>,
5557
}
5658

5759
#[derive(Debug, Clone, Default, Serialize, Deserialize, specta::Type)]

0 commit comments

Comments
 (0)