Skip to content

Commit 9935ff3

Browse files
Knowledge beta improvements phase 2: Refactor async_client and add support for BM25 (#2608)
* [feat] Adds support to embedding-type - Remove unused BM25TextEmbedder from embedder factory - Replace with MockTextEmbedder for Fast embedding type - Remove bm25.rs file and related imports/exports - Fix BM25Context and SemanticContext to save data after adding points - Fix BM25 data filename from bm25_data.json to data.bm25.json - Add base_dir storage to ContextManager for proper path resolution - Major refactoring to async context management with background operations - Adds separate optimized index for bm25 - Fix all clippy warnings and remove dead code BM25 search now works correctly with persistent contexts. * fix: Update cancel_most_recent_operation to use OperationManager - Fix cancel_most_recent_operation to delegate to OperationManager instead of accessing active_operations directly - Add missing cancel_most_recent_operation method to OperationManager - Ensures proper separation of concerns in the refactored architecture * fix: Remove BM25 from benchmark tests - Remove BM25TextEmbedder references from benchmark_test.rs - Remove benchmark_bm25_model function - Keep only Candle model benchmarks - Fixes compilation error after BM25TextEmbedder removal * docs: Update semantic-search-client README for index types - Update description to mention BM25 and vector embeddings - Add Multiple Index Types feature - Update Embeddings section to Index Types section - Remove ONNX references (no longer supported) - Reflect Fast (BM25) vs Best (Semantic) terminology - Update section headers for consistency * fix: remove auto-save from context add_data_points methods - Remove automatic save() calls from add_data_points in both semantic_context.rs and bm25_context.rs - Add explicit save() calls in context_creator.rs after data addition is complete - Improves performance by avoiding multiple disk writes during batch operations - Addresses PR #2608 feedback about inefficient disk I/O on each context addition * fix: resolve compilation error and operation cancel warnings - Fix return type mismatch in knowledge_store.rs cancel_operation method - Change cancel_most_recent_operation to return Ok instead of Err when no operations exist - Eliminates 'Failed to cancel operations' warnings when no operations are active * fix: improve error handling and code cleanup - Update error handling in knowledge_store.rs - Clean up context_creator.rs formatting and comments --------- Co-authored-by: Kenneth S. <[email protected]>
1 parent 9316c6b commit 9935ff3

31 files changed

+2782
-1772
lines changed

Cargo.lock

Lines changed: 35 additions & 35 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/chat-cli/src/cli/chat/cli/knowledge.rs

Lines changed: 53 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ pub enum KnowledgeSubcommand {
3737
/// Exclude patterns (e.g., `node_modules/**`, `target/**`)
3838
#[arg(long, action = clap::ArgAction::Append)]
3939
exclude: Vec<String>,
40+
/// Index type to use (Fast, Best)
41+
#[arg(long)]
42+
index_type: Option<String>,
4043
},
4144
/// Remove specified knowledge base entry by path
4245
#[command(alias = "rm")]
@@ -108,7 +111,12 @@ impl KnowledgeSubcommand {
108111
Err(e) => OperationResult::Error(format!("Failed to show knowledge base entries: {}", e)),
109112
}
110113
},
111-
KnowledgeSubcommand::Add { path, include, exclude } => Self::handle_add(os, path, include, exclude).await,
114+
KnowledgeSubcommand::Add {
115+
path,
116+
include,
117+
exclude,
118+
index_type,
119+
} => Self::handle_add(os, path, include, exclude, index_type).await,
112120
KnowledgeSubcommand::Remove { path } => Self::handle_remove(os, path).await,
113121
KnowledgeSubcommand::Update { path } => Self::handle_update(os, path).await,
114122
KnowledgeSubcommand::Clear => Self::handle_clear(os, session).await,
@@ -205,7 +213,11 @@ impl KnowledgeSubcommand {
205213
session.stderr,
206214
style::Print(" Items: "),
207215
style::SetForegroundColor(Color::Yellow),
208-
style::Print(format!("{}", entry.item_count)),
216+
style::Print(entry.item_count.to_string()),
217+
style::SetForegroundColor(Color::Reset),
218+
style::Print(" | Index Type: "),
219+
style::SetForegroundColor(Color::Magenta),
220+
style::Print(entry.embedding_type.description().to_string()),
209221
style::SetForegroundColor(Color::Reset),
210222
style::Print(" | Persistent: ")
211223
)?;
@@ -231,11 +243,21 @@ impl KnowledgeSubcommand {
231243
}
232244

233245
/// Handle add operation
246+
fn get_db_patterns(os: &crate::os::Os, setting: crate::database::settings::Setting) -> Vec<String> {
247+
os.database
248+
.settings
249+
.get(setting)
250+
.and_then(|v| v.as_array())
251+
.map(|arr| arr.iter().filter_map(|v| v.as_str().map(|s| s.to_string())).collect())
252+
.unwrap_or_default()
253+
}
254+
234255
async fn handle_add(
235256
os: &Os,
236257
path: &str,
237258
include_patterns: &[String],
238259
exclude_patterns: &[String],
260+
index_type: &Option<String>,
239261
) -> OperationResult {
240262
match Self::validate_and_sanitize_path(os, path) {
241263
Ok(sanitized_path) => {
@@ -245,14 +267,30 @@ impl KnowledgeSubcommand {
245267
};
246268
let mut store = async_knowledge_store.lock().await;
247269

248-
let options = if include_patterns.is_empty() && exclude_patterns.is_empty() {
249-
crate::util::knowledge_store::AddOptions::with_db_defaults(os)
270+
let include = if include_patterns.is_empty() {
271+
Self::get_db_patterns(os, crate::database::settings::Setting::KnowledgeDefaultIncludePatterns)
250272
} else {
251-
crate::util::knowledge_store::AddOptions::new()
252-
.with_include_patterns(include_patterns.to_vec())
253-
.with_exclude_patterns(exclude_patterns.to_vec())
273+
include_patterns.to_vec()
254274
};
255275

276+
let exclude = if exclude_patterns.is_empty() {
277+
Self::get_db_patterns(os, crate::database::settings::Setting::KnowledgeDefaultExcludePatterns)
278+
} else {
279+
exclude_patterns.to_vec()
280+
};
281+
282+
let embedding_type_resolved = index_type.clone().or_else(|| {
283+
os.database
284+
.settings
285+
.get(crate::database::settings::Setting::KnowledgeIndexType)
286+
.and_then(|v| v.as_str().map(|s| s.to_string()))
287+
});
288+
289+
let options = crate::util::knowledge_store::AddOptions::new()
290+
.with_include_patterns(include)
291+
.with_exclude_patterns(exclude)
292+
.with_embedding_type(embedding_type_resolved);
293+
256294
match store.add(path, &sanitized_path.clone(), options).await {
257295
Ok(message) => OperationResult::Info(message),
258296
Err(e) => {
@@ -586,7 +624,10 @@ mod tests {
586624
assert!(result.is_ok());
587625
let cli = result.unwrap();
588626

589-
if let KnowledgeSubcommand::Add { path, include, exclude } = cli.knowledge {
627+
if let KnowledgeSubcommand::Add {
628+
path, include, exclude, ..
629+
} = cli.knowledge
630+
{
590631
assert_eq!(path, "/some/path");
591632
assert_eq!(include, vec!["*.rs", "**/*.md"]);
592633
assert_eq!(exclude, vec!["node_modules/**", "target/**"]);
@@ -615,7 +656,10 @@ mod tests {
615656
assert!(result.is_ok());
616657

617658
let cli = result.unwrap();
618-
if let KnowledgeSubcommand::Add { path, include, exclude } = cli.knowledge {
659+
if let KnowledgeSubcommand::Add {
660+
path, include, exclude, ..
661+
} = cli.knowledge
662+
{
619663
assert_eq!(path, "/some/path");
620664
assert!(include.is_empty());
621665
assert!(exclude.is_empty());

0 commit comments

Comments
 (0)