Skip to content

Implementation of knowledge base directory setting #2484

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 54 additions & 17 deletions crates/chat-cli/src/cli/chat/cli/knowledge.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::io::Write;
use std::path::Path;

use clap::Subcommand;
use crossterm::queue;
Expand All @@ -12,6 +13,8 @@ use semantic_search_client::{
OperationStatus,
SystemStatus,
};
use serde::Deserialize;
use tracing::error;

use crate::cli::chat::tools::sanitize_path_tool_arg;
use crate::cli::chat::{
Expand Down Expand Up @@ -54,6 +57,13 @@ enum OperationResult {
Error(String),
}

#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Settings {
#[serde(default)]
pub(crate) base_dir: String,
}

impl KnowledgeSubcommand {
pub async fn execute(self, os: &Os, session: &mut ChatSession) -> Result<ChatState, ChatError> {
if !Self::is_feature_enabled(os) {
Expand Down Expand Up @@ -90,25 +100,48 @@ impl KnowledgeSubcommand {
}
}

fn get_knowledge_base_dir(session: &mut ChatSession) -> Option<impl AsRef<Path> + use<>> {
let agent = session.conversation.agents.get_active();
if agent.is_none() {
return None;
}

agent.unwrap().tools_settings.get("knowledge")?.get("base_dir");
match agent.unwrap().tools_settings.get("knowledge") {
Some(settings) => match serde_json::from_value::<Settings>(settings.clone()) {
Ok(settings) => Some(settings.base_dir),
Err(e) => {
error!("Failed to deserialize tool settings for execute_bash: {:?}", e);
None
},
},
None => None,
}
}

async fn execute_operation(&self, os: &Os, session: &mut ChatSession) -> OperationResult {
let knowledge_base_dir = Self::get_knowledge_base_dir(session);
match self {
KnowledgeSubcommand::Show => {
match Self::handle_show(session).await {
Ok(_) => OperationResult::Info("".to_string()), // Empty Info, formatting already done
Err(e) => OperationResult::Error(format!("Failed to show contexts: {}", e)),
}
},
KnowledgeSubcommand::Add { path } => Self::handle_add(os, path).await,
KnowledgeSubcommand::Remove { path } => Self::handle_remove(os, path).await,
KnowledgeSubcommand::Update { path } => Self::handle_update(os, path).await,
KnowledgeSubcommand::Add { path } => Self::handle_add(os, path, knowledge_base_dir).await,
KnowledgeSubcommand::Remove { path } => Self::handle_remove(os, path, knowledge_base_dir).await,
KnowledgeSubcommand::Update { path } => Self::handle_update(os, path, knowledge_base_dir).await,
KnowledgeSubcommand::Clear => Self::handle_clear(session).await,
KnowledgeSubcommand::Status => Self::handle_status().await,
KnowledgeSubcommand::Cancel { operation_id } => Self::handle_cancel(operation_id.as_deref()).await,
KnowledgeSubcommand::Status => Self::handle_status(knowledge_base_dir).await,
KnowledgeSubcommand::Cancel { operation_id } => {
Self::handle_cancel(operation_id.as_deref(), knowledge_base_dir).await
},
Comment on lines +131 to +138
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be better to pass the session in here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed, the less parameters the better specially if more than 1 parameter has same type.

}
}

async fn handle_show(session: &mut ChatSession) -> Result<(), std::io::Error> {
let async_knowledge_store = KnowledgeStore::get_async_instance().await;
let knowledge_base_dir = Self::get_knowledge_base_dir(session);
let async_knowledge_store = KnowledgeStore::get_async_instance(knowledge_base_dir).await;
let store = async_knowledge_store.lock().await;

// Use the async get_all method which is concurrent with indexing
Expand Down Expand Up @@ -210,10 +243,10 @@ impl KnowledgeSubcommand {
}

/// Handle add operation
async fn handle_add(os: &Os, path: &str) -> OperationResult {
async fn handle_add(os: &Os, path: &str, knowledge_base_dir: Option<impl AsRef<Path>>) -> OperationResult {
match Self::validate_and_sanitize_path(os, path) {
Ok(sanitized_path) => {
let async_knowledge_store = KnowledgeStore::get_async_instance().await;
let async_knowledge_store = KnowledgeStore::get_async_instance(knowledge_base_dir).await;
let mut store = async_knowledge_store.lock().await;

// Use the async add method which is fire-and-forget
Expand All @@ -227,9 +260,9 @@ impl KnowledgeSubcommand {
}

/// Handle remove operation
async fn handle_remove(os: &Os, path: &str) -> OperationResult {
async fn handle_remove(os: &Os, path: &str, knowledge_base_dir: Option<impl AsRef<Path>>) -> OperationResult {
let sanitized_path = sanitize_path_tool_arg(os, path);
let async_knowledge_store = KnowledgeStore::get_async_instance().await;
let async_knowledge_store = KnowledgeStore::get_async_instance(knowledge_base_dir).await;
let mut store = async_knowledge_store.lock().await;

// Try path first, then name
Expand All @@ -243,10 +276,10 @@ impl KnowledgeSubcommand {
}

/// Handle update operation
async fn handle_update(os: &Os, path: &str) -> OperationResult {
async fn handle_update(os: &Os, path: &str, knowledge_base_dir: Option<impl AsRef<Path>>) -> OperationResult {
match Self::validate_and_sanitize_path(os, path) {
Ok(sanitized_path) => {
let async_knowledge_store = KnowledgeStore::get_async_instance().await;
let async_knowledge_store = KnowledgeStore::get_async_instance(knowledge_base_dir).await;
let mut store = async_knowledge_store.lock().await;

match store.update_by_path(&sanitized_path).await {
Expand Down Expand Up @@ -278,7 +311,8 @@ impl KnowledgeSubcommand {
return OperationResult::Info("Clear operation cancelled".to_string());
}

let async_knowledge_store = KnowledgeStore::get_async_instance().await;
let knowledge_base_dir = Self::get_knowledge_base_dir(session);
let async_knowledge_store = KnowledgeStore::get_async_instance(knowledge_base_dir).await;
let mut store = async_knowledge_store.lock().await;

// First, cancel any pending operations
Expand Down Expand Up @@ -308,8 +342,8 @@ impl KnowledgeSubcommand {
}

/// Handle status operation
async fn handle_status() -> OperationResult {
let async_knowledge_store = KnowledgeStore::get_async_instance().await;
async fn handle_status(knowledge_base_dir: Option<impl AsRef<Path>>) -> OperationResult {
let async_knowledge_store = KnowledgeStore::get_async_instance(knowledge_base_dir).await;
let store = async_knowledge_store.lock().await;

match store.get_status_data().await {
Expand Down Expand Up @@ -416,8 +450,11 @@ impl KnowledgeSubcommand {
}

/// Handle cancel operation
async fn handle_cancel(operation_id: Option<&str>) -> OperationResult {
let async_knowledge_store = KnowledgeStore::get_async_instance().await;
async fn handle_cancel(
operation_id: Option<&str>,
knowledge_base_dir: Option<impl AsRef<Path>>,
) -> OperationResult {
let async_knowledge_store = KnowledgeStore::get_async_instance(knowledge_base_dir).await;
let mut store = async_knowledge_store.lock().await;

match store.cancel_operation(operation_id).await {
Expand Down
5 changes: 4 additions & 1 deletion crates/chat-cli/src/cli/chat/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1881,7 +1881,10 @@ impl ChatSession {
ev.is_accepted = true;
});

let invoke_result = tool.tool.invoke(os, &mut self.stdout).await;
let invoke_result = tool
.tool
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice

.invoke(os, &mut self.stdout, self.conversation.agents.get_active())
.await;

if self.spinner.is_some() {
queue!(
Expand Down
30 changes: 27 additions & 3 deletions crates/chat-cli/src/cli/chat/tools/knowledge.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::io::Write;
use std::path::Path;

use crossterm::queue;
use crossterm::style::{
Expand All @@ -7,12 +8,17 @@ use crossterm::style::{
};
use eyre::Result;
use serde::Deserialize;
use tracing::warn;
use tracing::{
error,
warn,
};

use super::{
InvokeOutput,
OutputKind,
};
use crate::cli::agent::Agent;
use crate::cli::chat::cli::knowledge::Settings;
use crate::database::settings::Setting;
use crate::os::Os;
use crate::util::knowledge_store::KnowledgeStore;
Expand Down Expand Up @@ -305,9 +311,9 @@ impl Knowledge {
Ok(())
}

pub async fn invoke(&self, os: &Os, _updates: &mut impl Write) -> Result<InvokeOutput> {
pub async fn invoke(&self, os: &Os, _updates: &mut impl Write, agent: Option<&Agent>) -> Result<InvokeOutput> {
// Get the async knowledge store singleton
let async_knowledge_store = KnowledgeStore::get_async_instance().await;
let async_knowledge_store = KnowledgeStore::get_async_instance(Self::get_knowledge_base_dir(agent)).await;
let mut store = async_knowledge_store.lock().await;

let result = match self {
Expand Down Expand Up @@ -542,4 +548,22 @@ impl Knowledge {
)
}
}

fn get_knowledge_base_dir(agent: Option<&Agent>) -> Option<impl AsRef<Path> + use<>> {
if agent.is_none() {
return None;
}

agent.unwrap().tools_settings.get("knowledge")?.get("base_dir");
match agent.unwrap().tools_settings.get("knowledge") {
Some(settings) => match serde_json::from_value::<Settings>(settings.clone()) {
Ok(settings) => Some(settings.base_dir),
Err(e) => {
error!("Failed to deserialize tool settings for execute_bash: {:?}", e);
None
},
},
None => None,
}
}
Comment on lines +552 to +568
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a better way to share this function between the knowledge base tool and setting?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO, it's fine if the tool has the responsibility to know where this needs to be extracted, careful with usage of unwrap here. So we can gracefully handle failure scenarios.

}
4 changes: 2 additions & 2 deletions crates/chat-cli/src/cli/chat/tools/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,15 +107,15 @@ impl Tool {
}

/// Invokes the tool asynchronously
pub async fn invoke(&self, os: &Os, stdout: &mut impl Write) -> Result<InvokeOutput> {
pub async fn invoke(&self, os: &Os, stdout: &mut impl Write, agent: Option<&Agent>) -> Result<InvokeOutput> {
match self {
Tool::FsRead(fs_read) => fs_read.invoke(os, stdout).await,
Tool::FsWrite(fs_write) => fs_write.invoke(os, stdout).await,
Tool::ExecuteCommand(execute_command) => execute_command.invoke(stdout).await,
Tool::UseAws(use_aws) => use_aws.invoke(os, stdout).await,
Tool::Custom(custom_tool) => custom_tool.invoke(os, stdout).await,
Tool::GhIssue(gh_issue) => gh_issue.invoke(os, stdout).await,
Tool::Knowledge(knowledge) => knowledge.invoke(os, stdout).await,
Tool::Knowledge(knowledge) => knowledge.invoke(os, stdout, agent).await,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very unsure of this - is there another way for us to pass tool settings? Looks like we've started to pass the agent for requires_acceptance, so figured this would be an acceptable pattern. Again, open to feedback though.

Tool::Thinking(think) => think.invoke(stdout).await,
}
}
Expand Down
28 changes: 19 additions & 9 deletions crates/chat-cli/src/util/knowledge_store.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::path::Path;
use std::sync::{
Arc,
LazyLock as Lazy,
Expand Down Expand Up @@ -32,21 +33,21 @@ pub struct KnowledgeStore {

impl KnowledgeStore {
/// Get singleton instance
pub async fn get_async_instance() -> Arc<Mutex<Self>> {
pub async fn get_async_instance(path: Option<impl AsRef<Path>>) -> Arc<Mutex<Self>> {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because any invocation of this could technically create the instance, we need to always pass the path if possible.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that's the downside of a Lazy singleton

static ASYNC_INSTANCE: Lazy<tokio::sync::OnceCell<Arc<Mutex<KnowledgeStore>>>> =
Lazy::new(tokio::sync::OnceCell::new);

if cfg!(test) {
Arc::new(Mutex::new(
KnowledgeStore::new()
KnowledgeStore::new(path)
.await
.expect("Failed to create test async knowledge store"),
))
} else {
ASYNC_INSTANCE
.get_or_init(|| async {
Arc::new(Mutex::new(
KnowledgeStore::new()
KnowledgeStore::new(path)
.await
.expect("Failed to create async knowledge store"),
))
Expand All @@ -56,12 +57,21 @@ impl KnowledgeStore {
}
}

pub async fn new() -> Result<Self> {
let client = AsyncSemanticSearchClient::new_with_default_dir()
.await
.map_err(|e| eyre::eyre!("Failed to create client: {}", e))?;

Ok(Self { client })
pub async fn new(path: Option<impl AsRef<Path>>) -> Result<Self> {
match path {
Some(path) => {
let client = AsyncSemanticSearchClient::new(path)
.await
.map_err(|e| eyre::eyre!("Failed to create client: {}", e))?;
Ok(Self { client })
},
None => {
let client = AsyncSemanticSearchClient::new_with_default_dir()
.await
.map_err(|e| eyre::eyre!("Failed to create client: {}", e))?;
Ok(Self { client })
},
}
}

/// Add context - delegates to async client
Expand Down