fastrepl · yujonglee · Jan 4, 2026 · Jan 4, 2026 · Jan 4, 2026
diff --git a/plugins/tantivy/js/bindings.gen.ts b/plugins/tantivy/js/bindings.gen.ts
@@ -59,12 +59,14 @@ async removeDocument(id: string, collection: string | null) : Promise<Result<nul
 /** user-defined types **/
 
 export type CreatedAtFilter = { gte: number | null; lte: number | null; gt: number | null; lt: number | null; eq: number | null }
-export type SearchDocument = { id: string; doc_type: string; language: string | null; title: string; content: string; created_at: number }
-export type SearchFilters = { created_at: CreatedAtFilter | null; doc_type: string | null }
-export type SearchHit = { score: number; document: SearchDocument }
-export type SearchOptions = { fuzzy: boolean | null; distance: number | null }
+export type HighlightRange = { start: number; end: number }
+export type SearchDocument = { id: string; doc_type: string; language: string | null; title: string; content: string; created_at: number; facets?: string[] }
+export type SearchFilters = { created_at: CreatedAtFilter | null; doc_type: string | null; facet: string | null }
+export type SearchHit = { score: number; document: SearchDocument; title_snippet: Snippet | null; content_snippet: Snippet | null }
+export type SearchOptions = { fuzzy: boolean | null; distance: number | null; snippets: boolean | null; snippet_max_chars: number | null; phrase_slop: number | null }
 export type SearchRequest = { query: string; collection?: string | null; filters?: SearchFilters; limit?: number; options?: SearchOptions }
 export type SearchResult = { hits: SearchHit[]; count: number }
+export type Snippet = { fragment: string; highlights: HighlightRange[] }
 
 /** tauri-specta globals **/
 

diff --git a/plugins/tantivy/src/ext.rs b/plugins/tantivy/src/ext.rs
@@ -1,23 +1,74 @@
 use tantivy::collector::{Count, TopDocs};
 use tantivy::query::{
-    BooleanQuery, BoostQuery, FuzzyTermQuery, Occur, Query, QueryParser, TermQuery,
+    BooleanQuery, BoostQuery, FuzzyTermQuery, Occur, PhraseQuery, Query, QueryParser, TermQuery,
 };
-use tantivy::schema::IndexRecordOption;
+use tantivy::schema::{Facet, IndexRecordOption};
+use tantivy::snippet::SnippetGenerator;
 use tantivy::{Index, ReloadPolicy, TantivyDocument, Term};
 use tauri_plugin_path2::Path2PluginExt;
 
 use crate::query::build_created_at_range_query;
 use crate::schema::{extract_search_document, get_fields};
 use crate::tokenizer::register_tokenizers;
 use crate::{
-    CollectionConfig, CollectionIndex, IndexState, SearchDocument, SearchHit, SearchRequest,
-    SearchResult,
+    CollectionConfig, CollectionIndex, HighlightRange, IndexState, SearchDocument, SearchHit,
+    SearchRequest, SearchResult, Snippet,
 };
 
 pub fn detect_language(text: &str) -> hypr_language::Language {
     hypr_language::detect(text)
 }
 
+fn parse_query_parts(query: &str) -> (Vec<&str>, Vec<&str>) {
+    let mut phrases = Vec::new();
+    let mut regular_terms = Vec::new();
+    let mut in_quote = false;
+    let mut quote_start = 0;
+    let mut current_start = 0;
+
+    let chars: Vec<char> = query.chars().collect();
+    let mut i = 0;
+
+    while i < chars.len() {
+        if chars[i] == '"' {
+            if in_quote {
+                let phrase = &query[quote_start..i];
+                if !phrase.trim().is_empty() {
+                    phrases.push(phrase.trim());
+                }
+                in_quote = false;
+                current_start = i + 1;
+            } else {
+                let before = &query[current_start..i];
+                for term in before.split_whitespace() {
+                    if !term.is_empty() {
+                        regular_terms.push(term);
+                    }
+                }
+                in_quote = true;
+                quote_start = i + 1;
+            }
+        }
+        i += 1;
+    }
+
+    if in_quote {
+        let phrase = &query[quote_start..];
+        if !phrase.trim().is_empty() {
+            phrases.push(phrase.trim());
+        }
+    } else {
+        let remaining = &query[current_start..];
+        for term in remaining.split_whitespace() {
+            if !term.is_empty() {
+                regular_terms.push(term);
+            }
+        }
+    }
+
+    (phrases, regular_terms)
+}
+
 pub struct Tantivy<'a, R: tauri::Runtime, M: tauri::Manager<R>> {
     manager: &'a M,
     _runtime: std::marker::PhantomData<fn() -> R>,
@@ -95,18 +146,80 @@ impl<'a, R: tauri::Runtime, M: tauri::Manager<R>> Tantivy<'a, R, M> {
         let searcher = reader.searcher();
 
         let use_fuzzy = request.options.fuzzy.unwrap_or(false);
+        let phrase_slop = request.options.phrase_slop.unwrap_or(0);
 
         // Title boost factor (3x) to match Orama's title:3, content:1 behavior
         const TITLE_BOOST: f32 = 3.0;
 
         let mut combined_query: Box<dyn Query> = if use_fuzzy {
             let distance = request.options.distance.unwrap_or(1);
-            let terms: Vec<&str> = request.query.split_whitespace().collect();
+
+            // Parse query to extract phrases (quoted) and regular terms
+            let (phrases, regular_terms) = parse_query_parts(&request.query);
+
             let mut term_queries: Vec<(Occur, Box<dyn Query>)> = Vec::new();
 
-            // For each term, create a Must clause that requires the term to match
-            // in either title OR content (with title boosted)
-            for term in terms {
+            // Handle quoted phrases with PhraseQuery
+            for phrase in phrases {
+                let words: Vec<&str> = phrase.split_whitespace().collect();
+                if words.len() > 1 {
+                    // Create phrase query for title field
+                    let title_terms: Vec<Term> = words
+                        .iter()
+                        .map(|w| Term::from_field_text(fields.title, w))
+                        .collect();
+                    let mut title_phrase = PhraseQuery::new(title_terms);
+                    title_phrase.set_slop(phrase_slop);
+
+                    // Create phrase query for content field
+                    let content_terms: Vec<Term> = words
+                        .iter()
+                        .map(|w| Term::from_field_text(fields.content, w))
+                        .collect();
+                    let mut content_phrase = PhraseQuery::new(content_terms);
+                    content_phrase.set_slop(phrase_slop);
+
+                    // Boost title matches by 3x
+                    let boosted_title: Box<dyn Query> =
+                        Box::new(BoostQuery::new(Box::new(title_phrase), TITLE_BOOST));
+                    let content_query: Box<dyn Query> = Box::new(content_phrase);
+
+                    // Phrase must match in at least one field (title OR content)
+                    let phrase_field_query = BooleanQuery::new(vec![
+                        (Occur::Should, boosted_title),
+                        (Occur::Should, content_query),
+                    ]);
+
+                    term_queries.push((Occur::Must, Box::new(phrase_field_query)));
+                } else if !words.is_empty() {
+                    // Single word "phrase" - treat as regular term
+                    let word = words[0];
+                    let title_fuzzy = FuzzyTermQuery::new(
+                        Term::from_field_text(fields.title, word),
+                        distance,
+                        true,
+                    );
+                    let content_fuzzy = FuzzyTermQuery::new(
+                        Term::from_field_text(fields.content, word),
+                        distance,
+                        true,
+                    );
+
+                    let boosted_title: Box<dyn Query> =
+                        Box::new(BoostQuery::new(Box::new(title_fuzzy), TITLE_BOOST));
+                    let content_query: Box<dyn Query> = Box::new(content_fuzzy);
+
+                    let term_field_query = BooleanQuery::new(vec![
+                        (Occur::Should, boosted_title),
+                        (Occur::Should, content_query),
+                    ]);
+
+                    term_queries.push((Occur::Must, Box::new(term_field_query)));
+                }
+            }
+
+            // Handle regular (unquoted) terms with fuzzy matching
+            for term in regular_terms {
                 let title_fuzzy =
                     FuzzyTermQuery::new(Term::from_field_text(fields.title, term), distance, true);
                 let content_fuzzy = FuzzyTermQuery::new(
@@ -157,20 +270,81 @@ impl<'a, R: tauri::Runtime, M: tauri::Manager<R>> Tantivy<'a, R, M> {
             ]));
         }
 
+        // Apply facet filter
+        if let Some(ref facet_path) = request.filters.facet {
+            if let Ok(facet) = Facet::from_text(facet_path) {
+                let facet_term = Term::from_facet(fields.facets, &facet);
+                let facet_query = TermQuery::new(facet_term, IndexRecordOption::Basic);
+                combined_query = Box::new(BooleanQuery::new(vec![
+                    (Occur::Must, combined_query),
+                    (Occur::Must, Box::new(facet_query)),
+                ]));
+            }
+        }
+
         // Use tuple collector to get both top docs and total count
         let (top_docs, count) = searcher.search(
             &combined_query,
             &(TopDocs::with_limit(request.limit), Count),
         )?;
 
+        let generate_snippets = request.options.snippets.unwrap_or(false);
+        let snippet_max_chars = request.options.snippet_max_chars.unwrap_or(150);
+
+        let (title_snippet_gen, content_snippet_gen) = if generate_snippets {
+            let mut title_gen =
+                SnippetGenerator::create(&searcher, &*combined_query, fields.title)?;
+            title_gen.set_max_num_chars(snippet_max_chars);
+
+            let mut content_gen =
+                SnippetGenerator::create(&searcher, &*combined_query, fields.content)?;
+            content_gen.set_max_num_chars(snippet_max_chars);
+
+            (Some(title_gen), Some(content_gen))
+        } else {
+            (None, None)
+        };
+
         let mut hits = Vec::new();
         for (score, doc_address) in top_docs {
             let retrieved_doc: TantivyDocument = searcher.doc(doc_address)?;
 
             if let Some(search_doc) = extract_search_document(schema, &fields, &retrieved_doc) {
+                let title_snippet = title_snippet_gen.as_ref().map(|generator| {
+                    let snippet = generator.snippet_from_doc(&retrieved_doc);
+                    Snippet {
+                        fragment: snippet.fragment().to_string(),
+                        highlights: snippet
+                            .highlighted()
+                            .iter()
+                            .map(|range| HighlightRange {
+                                start: range.start,
+                                end: range.end,
+                            })
+                            .collect(),
+                    }
+                });
+
+                let content_snippet = content_snippet_gen.as_ref().map(|generator| {
+                    let snippet = generator.snippet_from_doc(&retrieved_doc);
+                    Snippet {
+                        fragment: snippet.fragment().to_string(),
+                        highlights: snippet
+                            .highlighted()
+                            .iter()
+                            .map(|range| HighlightRange {
+                                start: range.start,
+                                end: range.end,
+                            })
+                            .collect(),
+                    }
+                });
+
                 hits.push(SearchHit {
                     score,
                     document: search_doc,
+                    title_snippet,
+                    content_snippet,
                 });
             }
         }
@@ -232,6 +406,12 @@ impl<'a, R: tauri::Runtime, M: tauri::Manager<R>> Tantivy<'a, R, M> {
         doc.add_text(fields.content, &document.content);
         doc.add_i64(fields.created_at, document.created_at);
 
+        for facet_path in &document.facets {
+            if let Ok(facet) = Facet::from_text(facet_path) {
+                doc.add_facet(fields.facets, facet);
+            }
+        }
+
         writer.add_document(doc)?;
         writer.commit()?;
 
@@ -273,6 +453,12 @@ impl<'a, R: tauri::Runtime, M: tauri::Manager<R>> Tantivy<'a, R, M> {
         doc.add_text(fields.content, &document.content);
         doc.add_i64(fields.created_at, document.created_at);
 
+        for facet_path in &document.facets {
+            if let Ok(facet) = Facet::from_text(facet_path) {
+                doc.add_facet(fields.facets, facet);
+            }
+        }
+
         writer.add_document(doc)?;
         writer.commit()?;
 

diff --git a/plugins/tantivy/src/lib.rs b/plugins/tantivy/src/lib.rs
@@ -27,12 +27,28 @@ pub struct SearchDocument {
     pub title: String,
     pub content: String,
     pub created_at: i64,
+    #[serde(default)]
+    pub facets: Vec<String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, specta::Type)]
+pub struct Snippet {
+    pub fragment: String,
+    pub highlights: Vec<HighlightRange>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, specta::Type)]
+pub struct HighlightRange {
+    pub start: usize,
+    pub end: usize,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize, specta::Type)]
 pub struct SearchHit {
     pub score: f32,
     pub document: SearchDocument,
+    pub title_snippet: Option<Snippet>,
+    pub content_snippet: Option<Snippet>,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize, specta::Type)]
@@ -54,12 +70,16 @@ pub struct CreatedAtFilter {
 pub struct SearchFilters {
     pub created_at: Option<CreatedAtFilter>,
     pub doc_type: Option<String>,
+    pub facet: Option<String>,
 }
 
 #[derive(Debug, Clone, Default, Serialize, Deserialize, specta::Type)]
 pub struct SearchOptions {
     pub fuzzy: Option<bool>,
     pub distance: Option<u8>,
+    pub snippets: Option<bool>,
+    pub snippet_max_chars: Option<usize>,
+    pub phrase_slop: Option<u32>,
 }
 
 fn default_limit() -> usize {

diff --git a/plugins/tantivy/src/schema.rs b/plugins/tantivy/src/schema.rs
@@ -1,5 +1,7 @@
 use tantivy::TantivyDocument;
-use tantivy::schema::{FAST, Field, STORED, STRING, Schema, TextFieldIndexing, TextOptions, Value};
+use tantivy::schema::{
+    FAST, FacetOptions, Field, STORED, STRING, Schema, TextFieldIndexing, TextOptions, Value,
+};
 
 use crate::SearchDocument;
 
@@ -10,6 +12,7 @@ pub struct SchemaFields {
     pub title: Field,
     pub content: Field,
     pub created_at: Field,
+    pub facets: Field,
 }
 
 pub fn build_schema() -> Schema {
@@ -28,6 +31,7 @@ pub fn build_schema() -> Schema {
     schema_builder.add_text_field("title", text_options.clone());
     schema_builder.add_text_field("content", text_options);
     schema_builder.add_i64_field("created_at", FAST | STORED);
+    schema_builder.add_facet_field("facets", FacetOptions::default());
     schema_builder.build()
 }
 
@@ -39,6 +43,7 @@ pub fn get_fields(schema: &Schema) -> SchemaFields {
         title: schema.get_field("title").unwrap(),
         content: schema.get_field("content").unwrap(),
         created_at: schema.get_field("created_at").unwrap(),
+        facets: schema.get_field("facets").unwrap(),
     }
 }
 
@@ -57,13 +62,19 @@ pub fn extract_search_document(
     let content = doc.get_first(fields.content)?.as_str()?.to_string();
     let created_at = doc.get_first(fields.created_at)?.as_i64()?;
 
+    let facets: Vec<String> = doc
+        .get_all(fields.facets)
+        .filter_map(|v| v.as_facet().map(|f| f.to_string()))
+        .collect();
+
     Some(SearchDocument {
         id,
         doc_type,
         language,
         title,
         content,
         created_at,
+        facets,
     })
 }