diff --git a/docs/.astro/types.d.ts b/docs/.astro/types.d.ts
index f4a7f4ee5..6db6e4f04 100644
--- a/docs/.astro/types.d.ts
+++ b/docs/.astro/types.d.ts
@@ -409,13 +409,6 @@ declare module 'astro:content' {
collection: "docs";
data: InferEntrySchema<"docs">
} & { render(): Render[".md"] };
-"guides/usage-based-pricing.md": {
- id: "guides/usage-based-pricing.md";
- slug: "guides/usage-based-pricing";
- body: string;
- collection: "docs";
- data: InferEntrySchema<"docs">
-} & { render(): Render[".md"] };
"guides/version-specific/enterprise/getting-started.md": {
id: "guides/version-specific/enterprise/getting-started.md";
slug: "guides/version-specific/enterprise/getting-started";
@@ -500,6 +493,13 @@ declare module 'astro:content' {
collection: "docs";
data: InferEntrySchema<"docs">
} & { render(): Render[".md"] };
+"introduction/usage-based-pricing.md": {
+ id: "introduction/usage-based-pricing.md";
+ slug: "introduction/usage-based-pricing";
+ body: string;
+ collection: "docs";
+ data: InferEntrySchema<"docs">
+} & { render(): Render[".md"] };
"privacy.md": {
id: "privacy.md";
slug: "privacy";
diff --git a/docs/astro.config.mjs b/docs/astro.config.mjs
index 9597d5aff..4016c39fc 100644
--- a/docs/astro.config.mjs
+++ b/docs/astro.config.mjs
@@ -351,10 +351,10 @@ export default defineConfig({
}
},
{
- label: 'BYOK',
+ label: 'Configure Providers (BYOK)',
link: '/byok/',
attrs: {
- 'aria-label': 'Learn about Bring Your Own Key (BYOK)'
+ 'aria-label': 'Configure Providers (BYOK) documentation'
}
},
{
diff --git a/docs/src/assets/byok.png b/docs/src/assets/byok.png
deleted file mode 100644
index cdc35950e..000000000
Binary files a/docs/src/assets/byok.png and /dev/null differ
diff --git a/docs/src/assets/byok_login_start.png b/docs/src/assets/byok_login_start.png
deleted file mode 100644
index 6212a08f8..000000000
Binary files a/docs/src/assets/byok_login_start.png and /dev/null differ
diff --git a/docs/src/assets/configure_providers/chat_model_configuration_dialog.png b/docs/src/assets/configure_providers/chat_model_configuration_dialog.png
new file mode 100644
index 000000000..feabf84de
Binary files /dev/null and b/docs/src/assets/configure_providers/chat_model_configuration_dialog.png differ
diff --git a/docs/src/assets/byok_2.png b/docs/src/assets/configure_providers/choose_provider.png
similarity index 100%
rename from docs/src/assets/byok_2.png
rename to docs/src/assets/configure_providers/choose_provider.png
diff --git a/docs/src/assets/configure_providers/completion_model_configuration_dialog.png b/docs/src/assets/configure_providers/completion_model_configuration_dialog.png
new file mode 100644
index 000000000..a30d7b8a0
Binary files /dev/null and b/docs/src/assets/configure_providers/completion_model_configuration_dialog.png differ
diff --git a/docs/src/assets/byok_1.png b/docs/src/assets/configure_providers/configure_providers_menu.png
similarity index 100%
rename from docs/src/assets/byok_1.png
rename to docs/src/assets/configure_providers/configure_providers_menu.png
diff --git a/docs/src/assets/configure_providers/provider_configuration.png b/docs/src/assets/configure_providers/provider_configuration.png
new file mode 100644
index 000000000..04abe638f
Binary files /dev/null and b/docs/src/assets/configure_providers/provider_configuration.png differ
diff --git a/docs/src/content/docs/byok.md b/docs/src/content/docs/byok.md
index 7ccb15f5f..c1244a1c7 100644
--- a/docs/src/content/docs/byok.md
+++ b/docs/src/content/docs/byok.md
@@ -1,22 +1,115 @@
---
-title: "Bring Your Own Key (BYOK)"
+title: "Configure Providers (BYOK)"
+description: "How to use Bring Your Own Key (BYOK) to connect your own API keys and models in Refact."
---
-## Introduction
+# Introduction
-Bring Your Own Key (BYOK) allows users to specify their API keys and select models for chat, completion, and embedding tasks across different AI platforms. This feature enables seamless integration with various services while maintaining control over API keys.
+The **Configure Providers** feature (also known as BYOK – Bring Your Own Key) allows you to connect your own API keys for supported AI providers, giving you full control over which models you use and how you are billed.
-## How to Switch Providers in the Plugin
+---
+
+## What is "Configure Providers" (BYOK)?
+
+- **Bring Your Own Key (BYOK)** lets you use your own API keys for services like OpenAI, Anthropic, DeepSeek, and others, instead of (or in addition to) Refact’s built-in cloud models.
+- This is ideal if you have your own API access, want to use specific models, or need to keep billing and data usage under your own account.
+
+---
+
+## Supported Providers
+
+You can connect API keys for:
+- **OpenAI** (e.g., GPT-3.5, GPT-4, GPT-4o, etc.)
+- **Anthropic** (e.g., Claude models)
+- **DeepSeek** (e.g., deepseek-chat, deepseek-reasoner)
+- **Local models** (if supported by your Refact instance)
+- Other providers as they become available
+
+---
+
+## How to Configure Providers (Step-by-Step)
+
+### 1. Open the Providers Menu
+
+- In the Refact plugin, click the menu button (three horizontal lines or "burger" icon) in the top right corner.
+- Select **Configure Providers** from the dropdown menu.
+
+ 
+
+---
+
+### 2. Add a New Provider
+
+- In the **Configure Providers** window, click **Add Provider** or the "+" button.
+- Choose your provider from the list (e.g., OpenAI, Anthropic, DeepSeek).
+
+ 
+
+---
+
+### 3. Enter Your API Key and Configure Provider Settings
+
+- Paste your API key into the field provided.
+- (Optional) Give the provider a custom name for easy identification.
+- Enable or disable the provider as needed.
+- Click **Save**.
+
+ 
-By default, your provider is Refact.ai Cloud. If you want to switch from it, follow these steps:
+---
+
+### 4. Configure Models for Each Provider
+
+- For each provider, you can add and configure models for the tasks that provider supports (such as **Chat**, **Completion**, or **Embeddings**).
+- The available model types and settings will depend on the provider you select.
+- Click **Add model** to open the model configuration dialog.
+
+ 
+
+ 
+
+#### Model Configuration Fields
+- **Name**: The model’s name/ID (e.g., `gpt-4o`, `deepseek-chat`).
+- **Context Window (n_ctx)**: Maximum context length (tokens) the model can handle.
+- **Tokenizer**: The tokenizer to use (e.g., `hf://` for HuggingFace models).
+- **Default Temperature**: Controls randomness/creativity of model outputs.
+- **Reasoning Style**: (Dropdown) Choose a reasoning style, if supported.
+- **Capabilities**: Select which features the model supports (Tools, Multimodality, Clicks, Agent, etc.).
+
+---
+
+### 5. Switch Between Providers and Models
+
+- You can add multiple providers and models, and switch between them at any time.
+- The currently active provider/model will be used for new requests.
+
+---
-1. Navigate to the "Burger" button in the right upper corner of the plugin interface and click it.
-2. Go to the "Configure providers" tab and click it.
-
-3. Choose the provider you want to add from the list.
-
-4. You can enable or disable providers and delete them if needed.
+## Billing and Usage
-## Additional Resources
+- **When using BYOK, your requests are billed directly by the provider (e.g., OpenAI, Anthropic, DeepSeek).**
+- **Refact coins are NOT consumed** for BYOK requests.
+- You are responsible for monitoring your API usage and costs with your provider.
+
+---
+
+## Best Practices & Troubleshooting
+
+- **Keep your API keys secure.** Never share them publicly.
+- If a provider or model is not working, double-check your API key, model name, and account status.
+- Some providers may have usage limits or require specific permissions.
+- For help, visit our [Discord Community](https://smallcloud.ai/discord) or check the FAQ.
+
+---
+
+## FAQ
+
+**Q: Can I use multiple providers at once?**
+A: Yes! You can add and switch between multiple providers as needed.
+
+**Q: What happens if my API key runs out of credit?**
+A: Requests will fail until you add more credit or switch to another provider.
+
+---
-For more examples and configurations, please visit the [Refact GitHub repository](https://github.com/smallcloudai/refact-lsp/tree/main/bring_your_own_key).
+For more help, see our [FAQ](/faq/) or contact support.
diff --git a/docs/src/content/docs/introduction/usage-based-pricing.md b/docs/src/content/docs/introduction/usage-based-pricing.md
index 723b5692b..78c2c0e23 100644
--- a/docs/src/content/docs/introduction/usage-based-pricing.md
+++ b/docs/src/content/docs/introduction/usage-based-pricing.md
@@ -120,10 +120,11 @@ showDollarsBtn.onclick = () => setTable('dollars');
| Self-hosting option available | |
| Discord support | |
-## Bring Your Own Key (BYOK)
+## Configure Providers (BYOK)
-If you prefer to use your own API key (for OpenAI, Anthropic, or local models), you can connect it to Refact.ai. When using BYOK, requests are billed by your provider and do not consume Refact.ai coins.
+Refact.ai allows you to connect your own API keys for OpenAI, Anthropic, DeepSeek, and other providers using the **Configure Providers** feature (also known as BYOK – Bring Your Own Key). This gives you full control over which models you use and how you are billed.
**No commission:** For now, Refact.ai does not take any commission or markup on API usage. You pay only for the actual API cost of the model you use.
-For more information on how to use Bring Your Own Key (BYOK), see the [BYOK documentation](https://github.com/smallcloudai/refact/blob/main/docs/byok.md) in the repository.
+For a step-by-step guide on setting up and using this feature, see the [Configure Providers (BYOK) documentation](/byok/).
+
diff --git a/docs/src/content/docs/supported-models.md b/docs/src/content/docs/supported-models.md
index e67c98c5b..5160b0593 100644
--- a/docs/src/content/docs/supported-models.md
+++ b/docs/src/content/docs/supported-models.md
@@ -29,9 +29,9 @@ For select models, click the `💡Think` button to enable advanced reasoning, he
- Qwen2.5-Coder-1.5B
-## BYOK (Bring your own key)
+## Configure Providers (BYOK)
-Refact.ai gives flexibility to connect your API key and use any external LLM like Gemini, Grok, OpenAI, Deepseek, and others. Read the guide in our [BYOK Documentation](https://docs.refact.ai/byok/).
+Refact.ai gives you the flexibility to connect your own API key and use external LLMs like Gemini, Grok, OpenAI, DeepSeek, and others. For a step-by-step guide, see the [Configure Providers (BYOK) documentation](/byok/).
## Self-Hosted Version
diff --git a/refact-agent/engine/Cargo.toml b/refact-agent/engine/Cargo.toml
index 416dd3b09..e2a811f17 100644
--- a/refact-agent/engine/Cargo.toml
+++ b/refact-agent/engine/Cargo.toml
@@ -6,7 +6,7 @@ lto = true
[package]
name = "refact-lsp"
-version = "0.10.20"
+version = "0.10.21"
edition = "2021"
build = "build.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
@@ -29,11 +29,13 @@ chrono = { version = "0.4.31", features = ["serde"] }
diff = "0.1.13"
dunce = "1.0.5"
dyn_partial_eq = "=0.1.2"
+filetime = "0.2.25"
futures = "0.3"
git2 = "0.20.2"
glob = "0.3.1"
hashbrown = "0.15.2"
headless_chrome = "1.0.16"
+heed = "0.22.0"
home = "0.5"
html2text = "0.12.5"
hyper = { version = "0.14", features = ["server", "stream"] }
@@ -69,7 +71,6 @@ shell-words = "1.1.0"
shell-escape = "0.1.5"
select = "0.6.0"
similar = "2.3.0"
-sled = { version = "0.34", default-features = false, features = [] }
sqlite-vec = { version = "0.1.6" }
strip-ansi-escapes = "0.2.1"
strsim = "0.11.1"
@@ -86,13 +87,13 @@ tower-lsp = "0.20"
tracing = "0.1"
tracing-appender = "0.2.3"
tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
-tree-sitter = "0.22"
-tree-sitter-cpp = "0.22"
-tree-sitter-java = "0.21"
-tree-sitter-javascript = "0.21"
-tree-sitter-python = "0.21"
-tree-sitter-rust = "0.21"
-tree-sitter-typescript = "0.21"
+tree-sitter = "0.25"
+tree-sitter-cpp = "0.23"
+tree-sitter-java = "0.23"
+tree-sitter-javascript = "0.23"
+tree-sitter-python = "0.23"
+tree-sitter-rust = "0.23"
+tree-sitter-typescript = "0.23"
typetag = "0.2"
url = "2.4.1"
uuid = { version = "1", features = ["v4", "serde"] }
diff --git a/refact-agent/engine/src/ast/ast_db.rs b/refact-agent/engine/src/ast/ast_db.rs
index 74164ae47..e69b150cb 100644
--- a/refact-agent/engine/src/ast/ast_db.rs
+++ b/refact-agent/engine/src/ast/ast_db.rs
@@ -1,16 +1,17 @@
+use std::path::PathBuf;
use std::time::Instant;
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
+use heed::{RoTxn, RwTxn};
use indexmap::IndexMap;
-use tokio::sync::Mutex as AMutex;
use tokio::task;
use serde_cbor;
-use sled::Db;
use lazy_static::lazy_static;
use regex::Regex;
use crate::ast::ast_structs::{AstDB, AstDefinition, AstCounters, AstErrorStats};
use crate::ast::ast_parse_anything::{parse_anything_and_add_file_path, filesystem_path_to_double_colon_path};
+use crate::custom_error::MapErrToString;
use crate::fuzzy_search::fuzzy_search;
// ## How the database works ##
@@ -59,6 +60,7 @@ use crate::fuzzy_search::fuzzy_search;
// Read tests below, the show what this index can do!
+const MAX_DB_SIZE: usize = 10 * 1024 * 1024 * 1024; // 10GB
const A_LOT_OF_PRINTS: bool = false;
macro_rules! debug_print {
@@ -69,105 +71,81 @@ macro_rules! debug_print {
};
}
-const CACHE_CAPACITY_BYTES: u64 = 256 * 1024 * 1024; // 256M cache
-
-pub async fn ast_index_init(ast_permanent: String, ast_max_files: usize, want_perf_report: bool) -> Arc>
+pub async fn ast_index_init(ast_permanent: String, ast_max_files: usize) -> Arc
{
- let mut config = sled::Config::default()
- .cache_capacity(CACHE_CAPACITY_BYTES)
- .use_compression(false)
- .print_profile_on_drop(want_perf_report)
- .mode(sled::Mode::HighThroughput)
- .flush_every_ms(Some(5000));
-
- if ast_permanent.is_empty() {
- config = config.temporary(true).create_new(true);
+ let db_temp_dir = if ast_permanent.is_empty() {
+ Some(tempfile::TempDir::new().expect("Failed to create tempdir"))
} else {
- config = config.path(ast_permanent.clone());
- }
+ None
+ };
+ let db_path = if let Some(tempdir) = &db_temp_dir {
+ tempdir.path().to_path_buf()
+ } else {
+ PathBuf::from(&ast_permanent)
+ };
tracing::info!("starting AST db, ast_permanent={:?}", ast_permanent);
- let db: Arc = Arc::new(task::spawn_blocking(
- move || config.open().unwrap()
- ).await.unwrap());
- db.clear().unwrap();
+ let db_env: Arc = Arc::new(task::spawn_blocking(move || {
+ let mut options = heed::EnvOpenOptions::new();
+ options.map_size(MAX_DB_SIZE);
+ options.max_dbs(10);
+ unsafe { options.open(db_path).unwrap() }
+ }).await.unwrap());
+
+ let db: Arc> = Arc::new(db_env.write_txn().map(|mut txn| {
+ let db = db_env.create_database(&mut txn, Some("ast")).expect("Failed to create ast db");
+ let _ = db.clear(&mut txn);
+ txn.commit().expect("Failed to commit to lmdb env");
+ db
+ }).expect("Failed to start transaction to create ast db"));
+
tracing::info!("/starting AST");
let ast_index = AstDB {
- sleddb: db,
- sledbatch: Arc::new(AMutex::new(sled::Batch::default())),
- batch_counter: 0,
- counters_increase: HashMap::new(),
+ db_env,
+ db,
+ _db_temp_dir: db_temp_dir,
ast_max_files,
};
- Arc::new(AMutex::new(ast_index))
+ Arc::new(ast_index)
}
-pub async fn fetch_counters(ast_index: Arc>) -> AstCounters
+pub fn fetch_counters(ast_index: Arc) -> Result
{
- let db = ast_index.lock().await.sleddb.clone();
- let counter_defs = db.get(b"counters|defs").unwrap().map(|v| serde_cbor::from_slice::(&v).unwrap()).unwrap_or(0);
- let counter_usages = db.get(b"counters|usages").unwrap().map(|v| serde_cbor::from_slice::(&v).unwrap()).unwrap_or(0);
- let counter_docs = db.get(b"counters|docs").unwrap().map(|v| serde_cbor::from_slice::(&v).unwrap()).unwrap_or(0);
- AstCounters {
+ let txn = ast_index.db_env.read_txn().unwrap();
+ let counter_defs = ast_index.db.get(&txn, "counters|defs")
+ .map_err_with_prefix("Failed to get counters|defs")?
+ .map(|v| serde_cbor::from_slice::(&v).unwrap())
+ .unwrap_or(0);
+ let counter_usages = ast_index.db.get(&txn, "counters|usages")
+ .map_err_with_prefix("Failed to get counters|usages")?
+ .map(|v| serde_cbor::from_slice::(&v).unwrap())
+ .unwrap_or(0);
+ let counter_docs = ast_index.db.get(&txn, "counters|docs")
+ .map_err_with_prefix("Failed to get counters|docs")?
+ .map(|v| serde_cbor::from_slice::(&v).unwrap())
+ .unwrap_or(0);
+ Ok(AstCounters {
counter_defs,
counter_usages,
counter_docs,
- }
+ })
}
-fn _increase_counter_commit(db: &sled::Db, counter_key: &[u8], adjustment: i32) {
+fn increase_counter<'a>(ast_index: Arc, txn: &mut heed::RwTxn<'a>, counter_key: &str, adjustment: i32) {
if adjustment == 0 {
return;
}
- match db.update_and_fetch(counter_key, |counter| {
- let counter = counter.map(|v| serde_cbor::from_slice::(&v).unwrap()).unwrap_or(0) + adjustment;
- Some(serde_cbor::to_vec(&counter).unwrap())
- }) {
- Ok(_) => {},
- Err(e) => tracing::error!("failed to update and fetch counter: {:?}", e),
+ let new_value = ast_index.db.get(txn, counter_key)
+ .unwrap_or(None)
+ .map(|v| serde_cbor::from_slice::(v).unwrap())
+ .unwrap_or(0) + adjustment;
+ if let Err(e) = ast_index.db.put(txn, counter_key, &serde_cbor::to_vec(&new_value).unwrap()) {
+ tracing::error!("failed to update counter: {:?}", e);
}
}
-async fn _increase_counter(ast_index: Arc>, counter_key: &str, adjustment: i32) {
- if adjustment == 0 {
- return;
- }
- let mut ast_index_locked = ast_index.lock().await;
- let counter = ast_index_locked.counters_increase.entry(counter_key.to_string()).or_insert(0);
- *counter += adjustment;
-}
-
-pub async fn flush_sled_batch(
- ast_db: Arc>,
- threshold: usize, // if zero, flush everything including counters
-) -> Arc> {
- let mut ast_index_locked = ast_db.lock().await;
- if ast_index_locked.batch_counter >= threshold {
- let sleddb = ast_index_locked.sleddb.clone();
- let batch_arc = std::mem::replace(&mut ast_index_locked.sledbatch, Arc::new(AMutex::new(sled::Batch::default())));
- let was_counter = std::mem::replace(&mut ast_index_locked.batch_counter, 0);
- let counters_increase = std::mem::replace(&mut ast_index_locked.counters_increase, HashMap::new());
- drop(ast_index_locked);
- if was_counter > 0 {
- // tracing::info!("flushing {} sled batches", was_counter);
- let mut batch = batch_arc.lock().await;
- let batch_to_apply = std::mem::replace(&mut *batch, sled::Batch::default());
- if let Err(e) = sleddb.apply_batch(batch_to_apply) {
- tracing::error!("failed to apply batch: {:?}", e);
- }
- }
- for (counter_key, adjustment) in counters_increase {
- _increase_counter_commit(&sleddb, counter_key.as_bytes(), adjustment);
- }
- let ast_index_locked2 = ast_db.lock().await;
- return ast_index_locked2.sledbatch.clone();
- }
- ast_index_locked.batch_counter += 1;
- ast_index_locked.sledbatch.clone()
-}
-
pub async fn doc_add(
- ast_index: Arc>,
+ ast_index: Arc,
cpath: &String,
text: &String,
errors: &mut AstErrorStats,
@@ -175,137 +153,166 @@ pub async fn doc_add(
{
let file_global_path = filesystem_path_to_double_colon_path(cpath);
let (defs, language) = parse_anything_and_add_file_path(&cpath, text, errors)?; // errors mostly "no such parser" here
- let db = ast_index.lock().await.sleddb.clone();
- let batch_arc = flush_sled_batch(ast_index.clone(), 1000).await;
- let mut batch = batch_arc.lock().await;
- let mut added_defs: i32 = 0;
- let mut added_usages: i32 = 0;
- let mut unresolved_usages: i32 = 0;
- for definition in defs.iter() {
- assert!(definition.cpath == *cpath);
- let serialized = serde_cbor::to_vec(&definition).unwrap();
- let official_path = definition.official_path.join("::");
- let d_key = format!("d|{}", official_path);
- debug_print!("writing {}", d_key);
- batch.insert(d_key.as_bytes(), serialized);
- let mut path_parts: Vec<&str> = definition.official_path.iter().map(|s| s.as_str()).collect();
- while !path_parts.is_empty() {
- let c_key = format!("c|{} ⚡ {}", path_parts.join("::"), official_path);
- batch.insert(c_key.as_bytes(), b"");
- path_parts.remove(0);
- }
- for usage in &definition.usages {
- if !usage.resolved_as.is_empty() {
- let u_key = format!("u|{} ⚡ {}", usage.resolved_as, official_path);
- batch.insert(u_key.as_bytes(), serde_cbor::to_vec(&usage.uline).unwrap());
- } else if usage.targets_for_guesswork.len() == 1 && !usage.targets_for_guesswork[0].starts_with("?::") {
- let homeless_key = format!("homeless|{} ⚡ {}", usage.targets_for_guesswork[0], official_path);
- batch.insert(homeless_key.as_bytes(), serde_cbor::to_vec(&usage.uline).unwrap());
- debug_print!(" homeless {}", homeless_key);
- continue;
- } else {
- unresolved_usages += 1;
- }
- added_usages += 1;
- }
- // this_is_a_class: cpp🔎CosmicGoat, derived_from: "cpp🔎Goat" "cpp🔎CosmicJustice"
- for from in &definition.this_class_derived_from {
- let t_key = format!("classes|{} ⚡ {}", from, official_path);
- batch.insert(t_key.as_bytes(), definition.this_is_a_class.as_bytes());
- }
- added_defs += 1;
- }
- if unresolved_usages > 0 {
- let resolve_todo_key = format!("resolve-todo|{}", file_global_path.join("::"));
- batch.insert(resolve_todo_key.as_bytes(), cpath.as_bytes());
- }
- let doc_key = format!("doc-cpath|{}", file_global_path.join("::"));
- if db.get(doc_key.as_bytes()).unwrap().is_none() {
- _increase_counter(ast_index.clone(), "counters|docs", 1).await;
- db.insert(doc_key.as_bytes(), cpath.as_bytes()).unwrap();
- }
- _increase_counter(ast_index.clone(), "counters|defs", added_defs).await;
- _increase_counter(ast_index.clone(), "counters|usages", added_usages).await;
-
- Ok((defs.into_iter().map(Arc::new).collect(), language))
-}
-pub async fn doc_remove(ast_index: Arc>, cpath: &String)
-{
- let file_global_path = filesystem_path_to_double_colon_path(cpath);
- let d_prefix = format!("d|{}::", file_global_path.join("::"));
- let db = ast_index.lock().await.sleddb.clone();
- let batch_arc = flush_sled_batch(ast_index.clone(), 1000).await;
- let mut batch = batch_arc.lock().await;
- let mut iter = db.scan_prefix(d_prefix);
- let mut deleted_defs: i32 = 0;
- let mut deleted_usages: i32 = 0;
- while let Some(Ok((key, value))) = iter.next() {
- let d_key = key.clone();
- if let Ok(definition) = serde_cbor::from_slice::(&value) {
- let mut path_parts: Vec<&str> = definition.official_path.iter().map(|s| s.as_str()).collect();
+ let result = ast_index.db_env.write_txn().and_then(|mut txn| {
+ let mut added_defs: i32 = 0;
+ let mut added_usages: i32 = 0;
+ let mut unresolved_usages: i32 = 0;
+ for definition in defs.iter() {
+ assert!(definition.cpath == *cpath);
+ let serialized = serde_cbor::to_vec(&definition).unwrap();
let official_path = definition.official_path.join("::");
+ let d_key = format!("d|{}", official_path);
+ debug_print!("writing {}", d_key);
+ ast_index.db.put(&mut txn, &d_key, &serialized)?;
+ let mut path_parts: Vec<&str> = definition.official_path.iter().map(|s| s.as_str()).collect();
while !path_parts.is_empty() {
let c_key = format!("c|{} ⚡ {}", path_parts.join("::"), official_path);
- batch.remove(c_key.as_bytes());
+ ast_index.db.put(&mut txn, &c_key, b"")?;
path_parts.remove(0);
}
for usage in &definition.usages {
if !usage.resolved_as.is_empty() {
let u_key = format!("u|{} ⚡ {}", usage.resolved_as, official_path);
- batch.remove(u_key.as_bytes());
+ ast_index.db.put(&mut txn, &u_key, &serde_cbor::to_vec(&usage.uline).unwrap())?;
} else if usage.targets_for_guesswork.len() == 1 && !usage.targets_for_guesswork[0].starts_with("?::") {
let homeless_key = format!("homeless|{} ⚡ {}", usage.targets_for_guesswork[0], official_path);
- batch.remove(homeless_key.as_bytes());
+ ast_index.db.put(&mut txn, &homeless_key, &serde_cbor::to_vec(&usage.uline).unwrap())?;
debug_print!(" homeless {}", homeless_key);
continue;
+ } else {
+ unresolved_usages += 1;
}
- deleted_usages += 1;
+ added_usages += 1;
}
+ // this_is_a_class: cpp🔎CosmicGoat, derived_from: "cpp🔎Goat" "cpp🔎CosmicJustice"
for from in &definition.this_class_derived_from {
let t_key = format!("classes|{} ⚡ {}", from, official_path);
- batch.remove(t_key.as_bytes());
+ ast_index.db.put(&mut txn, &t_key, &definition.this_is_a_class.as_bytes())?;
}
- let cleanup_key = format!("resolve-cleanup|{}", definition.official_path.join("::"));
- if let Ok(Some(cleanup_value)) = db.get(cleanup_key.as_bytes()) {
- if let Ok(all_saved_ulinks) = serde_cbor::from_slice::>(&cleanup_value) {
- for ulink in all_saved_ulinks {
- batch.remove(ulink.as_bytes());
+ added_defs += 1;
+ }
+ if unresolved_usages > 0 {
+ let resolve_todo_key = format!("resolve-todo|{}", file_global_path.join("::"));
+ ast_index.db.put(&mut txn, &resolve_todo_key, &cpath.as_bytes())?;
+ }
+ let doc_key = format!("doc-cpath|{}", file_global_path.join("::"));
+ if ast_index.db.get(&txn, &doc_key)?.is_none() {
+ increase_counter(ast_index.clone(), &mut txn, "counters|docs", 1);
+ ast_index.db.put(&mut txn, &doc_key, &cpath.as_bytes())?;
+ }
+ increase_counter(ast_index.clone(), &mut txn, "counters|defs", added_defs);
+ increase_counter(ast_index.clone(), &mut txn, "counters|usages", added_usages);
+
+ txn.commit()
+ });
+
+ if let Err(e) = result {
+ tracing::error!("Failed to add document: {:?}", e);
+ }
+
+ Ok((defs.into_iter().map(Arc::new).collect(), language))
+}
+
+pub fn doc_remove(ast_index: Arc, cpath: &String) -> ()
+{
+ let file_global_path = filesystem_path_to_double_colon_path(cpath);
+ let d_prefix = format!("d|{}::", file_global_path.join("::"));
+
+ let result = ast_index.db_env.write_txn().and_then(|mut txn| {
+ let mut keys_to_remove = Vec::new();
+ let mut deleted_defs = 0;
+ let mut deleted_usages = 0;
+
+ {
+ let mut cursor = ast_index.db.prefix_iter(&txn, &d_prefix)?;
+ while let Some(Ok((d_key, value))) = cursor.next() {
+ if let Ok(definition) = serde_cbor::from_slice::(&value) {
+ let mut path_parts: Vec<&str> = definition.official_path.iter().map(|s| s.as_str()).collect();
+ let official_path = definition.official_path.join("::");
+ while !path_parts.is_empty() {
+ let c_key = format!("c|{} ⚡ {}", path_parts.join("::"), official_path);
+ keys_to_remove.push(c_key);
+ path_parts.remove(0);
}
- } else {
- tracing::error!("failed to deserialize cleanup_value for key: {}", cleanup_key);
+ for usage in &definition.usages {
+ if !usage.resolved_as.is_empty() {
+ let u_key = format!("u|{} ⚡ {}", usage.resolved_as, official_path);
+ keys_to_remove.push(u_key);
+ } else if usage.targets_for_guesswork.len() == 1 && !usage.targets_for_guesswork[0].starts_with("?::") {
+ let homeless_key = format!("homeless|{} ⚡ {}", usage.targets_for_guesswork[0], official_path);
+ debug_print!(" homeless {}", homeless_key);
+ keys_to_remove.push(homeless_key);
+ continue;
+ }
+ deleted_usages += 1;
+ }
+ for from in &definition.this_class_derived_from {
+ let t_key = format!("classes|{} ⚡ {}", from, official_path);
+ keys_to_remove.push(t_key);
+ }
+ let cleanup_key = format!("resolve-cleanup|{}", definition.official_path.join("::"));
+ if let Ok(Some(cleanup_value)) = ast_index.db.get(&txn, &cleanup_key) {
+ if let Ok(all_saved_ulinks) = serde_cbor::from_slice::>(&cleanup_value) {
+ for ulink in all_saved_ulinks {
+ keys_to_remove.push(ulink);
+ }
+ } else {
+ tracing::error!("failed to deserialize cleanup_value for key: {}", cleanup_key);
+ }
+ keys_to_remove.push(cleanup_key);
+ }
+ deleted_defs += 1;
}
- batch.remove(cleanup_key.as_bytes());
+ debug_print!("removing {d_key}");
+ keys_to_remove.push(d_key.to_string());
}
- deleted_defs += 1;
}
- debug_print!("removing {}", String::from_utf8_lossy(&d_key));
- batch.remove(&d_key);
- }
- let doc_resolved_key = format!("doc-resolved|{}", file_global_path.join("::"));
- batch.remove(doc_resolved_key.as_bytes());
- let doc_key = format!("doc-cpath|{}", file_global_path.join("::"));
- if db.get(doc_key.as_bytes()).unwrap().is_some() {
- _increase_counter(ast_index.clone(), "counters|docs", -1).await;
- db.remove(doc_key.as_bytes()).unwrap();
+ let doc_resolved_key = format!("doc-resolved|{}", file_global_path.join("::"));
+ keys_to_remove.push(doc_resolved_key);
+
+ for key in keys_to_remove {
+ ast_index.db.delete(&mut txn, &key)?;
+ }
+
+ let doc_key = format!("doc-cpath|{}", file_global_path.join("::"));
+ if ast_index.db.get(&txn, &doc_key)?.is_some() {
+ increase_counter(ast_index.clone(), &mut txn, "counters|docs", -1);
+ ast_index.db.delete(&mut txn, &doc_key)?;
+ }
+ increase_counter(ast_index.clone(), &mut txn, "counters|defs", -deleted_defs);
+ increase_counter(ast_index.clone(), &mut txn, "counters|usages", -deleted_usages);
+
+ txn.commit()
+ });
+
+ if let Err(e) = result {
+ tracing::error!("Failed to remove document: {:?}", e);
}
- _increase_counter(ast_index.clone(), "counters|defs", -deleted_defs).await;
- _increase_counter(ast_index.clone(), "counters|usages", -deleted_usages).await;
}
-pub async fn doc_defs(ast_index: Arc>, cpath: &String) -> Vec>
+pub fn doc_defs(ast_index: Arc, cpath: &String) -> Vec>
{
- let db = ast_index.lock().await.sleddb.clone();
- doc_def_internal(db, cpath)
+ match ast_index.db_env.read_txn() {
+ Ok(txn) => doc_defs_internal(ast_index.clone(), &txn, cpath),
+ Err(e) => {
+ tracing::error!("Failed to open transaction: {:?}", e);
+ Vec::new()
+ }
+ }
}
-pub fn doc_def_internal(db: Arc, cpath: &String) -> Vec>
-{
- let to_search_prefix = filesystem_path_to_double_colon_path(cpath);
- let d_prefix = format!("d|{}::", to_search_prefix.join("::"));
+pub fn doc_defs_internal<'a>(ast_index: Arc, txn: &RoTxn<'a>, cpath: &String) -> Vec> {
+ let d_prefix = format!("d|{}::", filesystem_path_to_double_colon_path(cpath).join("::"));
let mut defs = Vec::new();
- let mut iter = db.scan_prefix(d_prefix);
- while let Some(Ok((_, value))) = iter.next() {
+ let mut cursor = match ast_index.db.prefix_iter(txn, &d_prefix) {
+ Ok(cursor) => cursor,
+ Err(e) => {
+ tracing::error!("Failed to open prefix iterator: {:?}", e);
+ return Vec::new();
+ },
+ };
+ while let Some(Ok((_, value))) = cursor.next() {
if let Ok(definition) = serde_cbor::from_slice::(&value) {
defs.push(Arc::new(definition));
}
@@ -313,13 +320,10 @@ pub fn doc_def_internal(db: Arc, cpath: &String) -> Vec>
defs
}
-pub async fn doc_usages(ast_index: Arc>, cpath: &String) -> Vec<(usize, String)>
-{
- let definitions = doc_defs(ast_index.clone(), cpath).await;
- let db = ast_index.lock().await.sleddb.clone();
+pub async fn doc_usages(ast_index: Arc, cpath: &String) -> Vec<(usize, String)> {
+ let definitions = doc_defs(ast_index.clone(), cpath);
let mut usages = Vec::new();
- // Simple usages
for def in definitions {
for usage in &def.usages {
if !usage.resolved_as.is_empty() {
@@ -328,12 +332,14 @@ pub async fn doc_usages(ast_index: Arc>, cpath: &String) -> Vec<(u
}
}
- // Scan for usages that needed resolving
let file_global_path = filesystem_path_to_double_colon_path(cpath);
let doc_resolved_key = format!("doc-resolved|{}", file_global_path.join("::"));
- if let Ok(Some(resolved_usages)) = db.get(doc_resolved_key.as_bytes()) {
- let resolved_usages_vec = serde_cbor::from_slice::>(&resolved_usages).unwrap();
- usages.extend(resolved_usages_vec);
+ if let Ok(txn) = ast_index.db_env.read_txn() {
+ if let Ok(Some(resolved_usages)) = ast_index.db.get(&txn, &doc_resolved_key) {
+ if let Ok(resolved_usages_vec) = serde_cbor::from_slice::>(&resolved_usages) {
+ usages.extend(resolved_usages_vec);
+ }
+ }
}
usages
@@ -349,84 +355,115 @@ pub struct ConnectUsageContext {
pub t0: Instant,
}
-pub async fn connect_usages(ast_index: Arc>, ucx: &mut ConnectUsageContext) -> bool
+impl Default for ConnectUsageContext {
+ fn default() -> Self {
+ ConnectUsageContext {
+ derived_from_map: IndexMap::default(),
+ errstats: AstErrorStats::default(),
+ usages_homeless: 0,
+ usages_connected: 0,
+ usages_not_found: 0,
+ usages_ambiguous: 0,
+ t0: Instant::now(),
+ }
+ }
+}
+
+pub fn connect_usages(ast_index: Arc, ucx: &mut ConnectUsageContext) -> Result
{
- let db = ast_index.lock().await.sleddb.clone();
- let mut iter = db.scan_prefix("resolve-todo|").take(1);
+ let mut txn = ast_index.db_env.write_txn()
+ .map_err_with_prefix("Failed to open transaction:")?;
+
+ let (todo_key, todo_value) = {
+ let mut cursor = ast_index.db.prefix_iter(&txn, "resolve-todo|")
+ .map_err_with_prefix("Failed to open db prefix iterator:")?;
+ if let Some(Ok((todo_key, todo_value))) = cursor.next() {
+ (todo_key.to_string(), todo_value.to_vec())
+ } else {
+ return Ok(false);
+ }
+ };
- if let Some(Ok((todo_key, todo_value))) = iter.next() {
- let todo_key_string = String::from_utf8(todo_key.to_vec()).unwrap();
- let global_file_path = todo_key_string.strip_prefix("resolve-todo|").unwrap();
- let cpath = String::from_utf8(todo_value.to_vec()).unwrap();
- debug_print!("resolving {}", cpath);
+ let global_file_path = todo_key.strip_prefix("resolve-todo|").unwrap();
+ let cpath = String::from_utf8(todo_value.to_vec()).unwrap();
+ debug_print!("resolving {}", cpath);
- // delete immediately, to make sure connect_usages() does not continue forever, even if there are errors and stuff
- if let Err(e) = db.remove(&todo_key) {
- tracing::error!("connect_usages() failed to remove resolve-todo key: {:?}", e);
- }
+ ast_index.db.delete(&mut txn, &todo_key).map_err_with_prefix("Failed to delete resolve-todo| key")?;
- let definitions = doc_defs(ast_index.clone(), &cpath.to_string()).await;
- let batch_arc = flush_sled_batch(ast_index.clone(), 1000).await;
- let mut batch = batch_arc.lock().await;
+ let definitions = doc_defs_internal(ast_index.clone(), &txn, &cpath);
- let mut resolved_usages: Vec<(usize, String)> = vec![];
- for def in definitions {
- let tmp = _connect_usages_helper(&db, ucx, &def, &mut batch).await;
- resolved_usages.extend(tmp);
- }
- batch.insert(
- format!("doc-resolved|{}", global_file_path).as_bytes(),
- serde_cbor::to_vec(&resolved_usages).unwrap().as_slice(),
- );
- return true;
+ let mut resolved_usages: Vec<(usize, String)> = vec![];
+ for def in definitions {
+ let tmp = _connect_usages_helper(ast_index.clone(), ucx, def, &mut txn)?;
+ resolved_usages.extend(tmp);
}
- false
+ ast_index.db.put(
+ &mut txn,
+ &format!("doc-resolved|{}", global_file_path),
+ &serde_cbor::to_vec(&resolved_usages).unwrap(),
+ ).map_err_with_prefix("Failed to insert doc-resolved:")?;
+
+ txn.commit().map_err_with_prefix("Failed to commit transaction:")?;
+
+ Ok(true)
}
-pub async fn connect_usages_look_if_full_reset_needed(ast_index: Arc>) -> ConnectUsageContext
+pub fn connect_usages_look_if_full_reset_needed(ast_index: Arc) -> Result
{
- flush_sled_batch(ast_index.clone(), 0).await;
- let db = ast_index.lock().await.sleddb.clone();
- let class_hierarchy_key = b"class-hierarchy|";
- let existing_hierarchy: IndexMap> = match db.get(class_hierarchy_key) {
- Ok(Some(value)) => serde_cbor::from_slice(&value).unwrap_or_default(),
- _ => IndexMap::new(),
- };
+ let class_hierarchy_key = "class-hierarchy|";
+
+ let new_derived_from_map = _derived_from(ast_index.clone()).unwrap_or_default();
- let new_derived_from_map = _derived_from(&db).await;
- let mut batch = sled::Batch::default();
+ let mut txn = ast_index.db_env.write_txn()
+ .map_err(|e| format!("Failed to create write transaction: {:?}", e))?;
+
+ let existing_hierarchy: IndexMap> = match ast_index.db.get(&txn, class_hierarchy_key) {
+ Ok(Some(value)) => serde_cbor::from_slice(value).unwrap_or_default(),
+ Ok(None) => IndexMap::new(),
+ Err(e) => return Err(format!("Failed to get class hierarchy: {:?}", e))
+ };
if existing_hierarchy.is_empty() {
let serialized_hierarchy = serde_cbor::to_vec(&new_derived_from_map).unwrap();
- batch.insert(class_hierarchy_key, serialized_hierarchy.as_slice());
- // first run, do nothing because all the definitions are already in the todo list
-
+ ast_index.db.put(&mut txn, class_hierarchy_key, &serialized_hierarchy)
+ .map_err_with_prefix("Failed to put class_hierarchy in db:")?;
+ // First run, serialize and store the new hierarchy
} else if new_derived_from_map != existing_hierarchy {
- // XXX first branch is not covered by tests (simple enough to work and not break?)
- tracing::info!(" * * * class hierarchy changed {} classes => {} classes, all usages need to be reconnected * * *", existing_hierarchy.len(), new_derived_from_map.len());
+ tracing::info!(" * * * class hierarchy changed {} classes => {} classes, all usages need to be reconnected * * *",
+ existing_hierarchy.len(), new_derived_from_map.len());
+
let serialized_hierarchy = serde_cbor::to_vec(&new_derived_from_map).unwrap();
- batch.insert(class_hierarchy_key, serialized_hierarchy.as_slice());
-
- let mut iter = db.scan_prefix("doc-cpath|");
- let mut cnt = 0;
- while let Some(Ok((key, value))) = iter.next() {
- let key_string = String::from_utf8(key.to_vec()).unwrap();
- if let Some(file_global_path) = key_string.strip_prefix("doc-cpath|") {
- let cpath = String::from_utf8(value.to_vec()).unwrap();
- let resolve_todo_key = format!("resolve-todo|{}", file_global_path);
- batch.insert(resolve_todo_key.as_bytes(), cpath.as_bytes());
- cnt += 1;
+ ast_index.db.put(&mut txn, class_hierarchy_key, &serialized_hierarchy)
+ .map_err(|e| format!("Failed to put class hierarchy: {:?}", e))?;
+
+ let mut keys_to_update = Vec::new();
+
+ {
+ let mut cursor = ast_index.db.prefix_iter(&txn, "doc-cpath|")
+ .map_err(|e| format!("Failed to create prefix iterator: {:?}", e))?;
+
+ while let Some(Ok((key, value))) = cursor.next() {
+ if let Some(file_global_path) = key.strip_prefix("doc-cpath|") {
+ let cpath = String::from_utf8(value.to_vec())
+ .map_err(|e| format!("Failed to parse value as UTF-8: {:?}", e))?;
+
+ let resolve_todo_key = format!("resolve-todo|{}", file_global_path);
+ keys_to_update.push((resolve_todo_key, cpath));
+ }
}
}
- tracing::info!("added {} items to resolve-todo", cnt);
- }
- if let Err(e) = db.apply_batch(batch) {
- tracing::error!("connect_usages_look_if_full_reset_needed() failed to apply batch: {:?}", e);
+ tracing::info!("adding {} items to resolve-todo", keys_to_update.len());
+ for (key, cpath) in keys_to_update {
+ ast_index.db.put(&mut txn, &key, cpath.as_bytes())
+ .map_err_with_prefix("Failed to put db key to resolve-todo:")?;
+ }
}
- ConnectUsageContext {
+ txn.commit().map_err(|e| format!("Failed to commit transaction: {:?}", e))?;
+
+ Ok(ConnectUsageContext {
derived_from_map: new_derived_from_map,
errstats: AstErrorStats::default(),
usages_homeless: 0,
@@ -434,19 +471,19 @@ pub async fn connect_usages_look_if_full_reset_needed(ast_index: Arc(
+ ast_index: Arc,
ucx: &mut ConnectUsageContext,
- definition: &AstDefinition,
- batch: &mut sled::Batch
-) -> Vec<(usize, String)> {
+ definition: Arc,
+ txn: &mut RwTxn<'a>,
+) -> Result, String> {
// Data example:
// (1) c/Animal::self_review ⚡ alt_testsuite::cpp_goat_library::Animal::self_review
// (2) c/cpp_goat_library::Animal::self_review ⚡ alt_testsuite::cpp_goat_library::Animal::self_review
@@ -546,10 +583,10 @@ async fn _connect_usages_helper(
let c_prefix = format!("c|{}", v);
debug_print!(" scanning {}", c_prefix);
// println!(" c_prefix {:?} because v={:?}", c_prefix, v);
- let mut c_iter = db.scan_prefix(&c_prefix);
+ let mut c_iter = ast_index.db.prefix_iter(txn, &c_prefix)
+ .map_err_with_prefix("Failed to open db range iter:")?;
while let Some(Ok((c_key, _))) = c_iter.next() {
- let c_key_string = String::from_utf8(c_key.to_vec()).unwrap();
- let parts: Vec<&str> = c_key_string.split(" ⚡ ").collect();
+ let parts: Vec<&str> = c_key.split(" ⚡ ").collect();
if parts.len() == 2 {
if parts[0] == c_prefix {
let resolved_target = parts[1].trim();
@@ -574,7 +611,8 @@ async fn _connect_usages_helper(
}
let single_thing_found = found.into_iter().next().unwrap();
let u_key = format!("u|{} ⚡ {}", single_thing_found, official_path);
- batch.insert(u_key.as_bytes(), serde_cbor::to_vec(&usage.uline).unwrap());
+ ast_index.db.put(txn, &u_key, &serde_cbor::to_vec(&usage.uline).unwrap())
+ .map_err_with_prefix("Failed to insert key in db:")?;
debug_print!(" add {:?} <= {}", u_key, usage.uline);
all_saved_ulinks.push(u_key);
result.push((usage.uline, single_thing_found));
@@ -584,30 +622,37 @@ async fn _connect_usages_helper(
} // for usages
let cleanup_key = format!("resolve-cleanup|{}", definition.official_path.join("::"));
let cleanup_value = serde_cbor::to_vec(&all_saved_ulinks).unwrap();
- batch.insert(cleanup_key.as_bytes(), cleanup_value.as_slice());
- result
+ ast_index.db.put(txn, &cleanup_key, cleanup_value.as_slice())
+ .map_err_with_prefix("Failed to insert key in db:")?;
+ Ok(result)
}
-async fn _derived_from(db: &sled::Db) -> IndexMap>
+fn _derived_from(ast_index: Arc) -> Result>, String>
{
// Data example:
// classes/cpp🔎Animal ⚡ alt_testsuite::cpp_goat_library::Goat 👉 "cpp🔎Goat"
let mut derived_map: IndexMap> = IndexMap::new();
let t_prefix = "classes|";
- let mut iter = db.scan_prefix(t_prefix);
- while let Some(Ok((key, value))) = iter.next() {
- let key_string = String::from_utf8(key.to_vec()).unwrap();
- let value_string = String::from_utf8(value.to_vec()).unwrap();
- let parts: Vec<&str> = key_string.split(" ⚡ ").collect();
- if parts.len() == 2 {
- let parent = parts[0].trim().strip_prefix(t_prefix).unwrap_or(parts[0].trim()).to_string();
- let child = value_string.trim().to_string();
- let entry = derived_map.entry(child).or_insert_with(Vec::new);
- if !entry.contains(&parent) {
- entry.push(parent);
+ {
+ let txn = ast_index.db_env.read_txn()
+ .map_err(|e| format!("Failed to create read transaction: {:?}", e))?;
+ let mut cursor = ast_index.db.prefix_iter(&txn, t_prefix)
+ .map_err(|e| format!("Failed to create prefix iterator: {:?}", e))?;
+
+ while let Some(Ok((key, value))) = cursor.next() {
+ let value_string = String::from_utf8(value.to_vec()).unwrap();
+
+ let parts: Vec<&str> = key.split(" ⚡ ").collect();
+ if parts.len() == 2 {
+ let parent = parts[0].trim().strip_prefix(t_prefix).unwrap_or(parts[0].trim()).to_string();
+ let child = value_string.trim().to_string();
+ let entry = derived_map.entry(child).or_insert_with(Vec::new);
+ if !entry.contains(&parent) {
+ entry.push(parent);
+ }
+ } else {
+ tracing::warn!("bad key {key}");
}
- } else {
- tracing::warn!("bad key {}", key_string);
}
}
// Have perfectly good [child, [parent1, parent2, ..]]
@@ -644,63 +689,71 @@ async fn _derived_from(db: &sled::Db) -> IndexMap>
build_all_derived_from(klass, &derived_map, &mut all_derived_from, &mut visited);
}
// now have all_derived_from {"cpp🔎CosmicGoat": ["cpp🔎CosmicJustice", "cpp🔎Goat", "cpp🔎Animal"], "cpp🔎CosmicJustice": [], "cpp🔎Goat": ["cpp🔎Animal"], "cpp🔎Animal": []}
- all_derived_from
+ Ok(all_derived_from)
}
-pub async fn usages(ast_index: Arc>, full_official_path: String, limit_n: usize) -> Vec<(Arc, usize)>
+/// The best way to get full_official_path is to call definitions() first
+pub fn usages(ast_index: Arc, full_official_path: String, limit_n: usize) -> Result, usize)>, String>
{
- // The best way to get full_official_path is to call definitions() first
- let db = ast_index.lock().await.sleddb.clone();
let mut usages = Vec::new();
let u_prefix1 = format!("u|{} ", full_official_path); // this one has space
let u_prefix2 = format!("u|{}", full_official_path);
- let mut iter = db.scan_prefix(&u_prefix1);
- while let Some(Ok((u_key, u_value))) = iter.next() {
+
+ let txn = ast_index.db_env.read_txn()
+ .map_err(|e| format!("Failed to create read transaction: {:?}", e))?;
+
+ let mut cursor = ast_index.db.prefix_iter(&txn, &u_prefix1)
+ .map_err(|e| format!("Failed to create prefix iterator: {:?}", e))?;
+
+ while let Some(Ok((u_key, u_value))) = cursor.next() {
if usages.len() >= limit_n {
break;
}
- let key_string = String::from_utf8(u_key.to_vec()).unwrap();
- let uline: usize = serde_cbor::from_slice(&u_value).unwrap_or(0); // Assuming `uline` is stored in the value
- let parts: Vec<&str> = key_string.split(" ⚡ ").collect();
+
+ let parts: Vec<&str> = u_key.split(" ⚡ ").collect();
if parts.len() == 2 && parts[0] == u_prefix2 {
let full_path = parts[1].trim();
let d_key = format!("d|{}", full_path);
- if let Ok(Some(d_value)) = db.get(d_key.as_bytes()) {
+
+ if let Ok(Some(d_value)) = ast_index.db.get(&txn, &d_key) {
+ let uline = serde_cbor::from_slice::(&u_value).unwrap_or(0);
+
match serde_cbor::from_slice::(&d_value) {
- Ok(definition) => {
- usages.push((Arc::new(definition), uline));
- },
- Err(e) => println!("Failed to deserialize value for {}: {:?}", d_key, e),
+ Ok(defintion) => usages.push((Arc::new(defintion), uline)),
+ Err(e) => tracing::error!("Failed to deserialize value for {}: {:?}", d_key, e),
}
}
- } else if parts.len() != 2 {
- tracing::error!("usage record has more than two ⚡ key was: {}", key_string);
+ } else if parts.len() != 2 {
+ tracing::error!("usage record has more than two ⚡ key was: {}", u_key);
}
}
- usages
+
+ Ok(usages)
}
-pub async fn definitions(ast_index: Arc>, double_colon_path: &str) -> Vec>
+pub fn definitions(ast_index: Arc, double_colon_path: &str) -> Result>, String>
{
- let db = ast_index.lock().await.sleddb.clone();
let c_prefix1 = format!("c|{} ", double_colon_path); // has space
let c_prefix2 = format!("c|{}", double_colon_path);
+
+ let txn = ast_index.db_env.read_txn()
+ .map_err_with_prefix("Failed to create read transaction:")?;
+
let mut path_groups: HashMap> = HashMap::new();
- // println!("definitions(c_prefix={:?})", c_prefix);
- let mut iter = db.scan_prefix(&c_prefix1);
- while let Some(Ok((key, _))) = iter.next() {
- let key_string = String::from_utf8(key.to_vec()).unwrap();
- if key_string.contains(" ⚡ ") {
- let parts: Vec<&str> = key_string.split(" ⚡ ").collect();
+ let mut cursor = ast_index.db.prefix_iter(&txn, &c_prefix1)
+ .map_err_with_prefix("Failed to create db prefix iterator:")?;
+ while let Some(Ok((key, _))) = cursor.next() {
+ if key.contains(" ⚡ ") {
+ let parts: Vec<&str> = key.split(" ⚡ ").collect();
if parts.len() == 2 && parts[0] == c_prefix2 {
let full_path = parts[1].trim().to_string();
let colon_count = full_path.matches("::").count();
path_groups.entry(colon_count).or_insert_with(Vec::new).push(full_path);
} else if parts.len() != 2 {
- tracing::error!("c-record has more than two ⚡ key was: {}", key_string);
+ tracing::error!("c-record has more than two ⚡ key was: {}", key);
}
} else {
- tracing::error!("c-record doesn't have ⚡ key: {}", key_string);
+ tracing::error!("c-record doesn't have ⚡ key: {}", key);
}
}
let min_colon_count = path_groups.keys().min().cloned().unwrap_or(usize::MAX);
@@ -708,19 +761,19 @@ pub async fn definitions(ast_index: Arc>, double_colon_path: &str)
if let Some(paths) = path_groups.get(&min_colon_count) {
for full_path in paths {
let d_key = format!("d|{}", full_path);
- if let Ok(Some(d_value)) = db.get(d_key.as_bytes()) {
+ if let Ok(Some(d_value)) = ast_index.db.get(&txn, &d_key) {
match serde_cbor::from_slice::(&d_value) {
Ok(definition) => defs.push(Arc::new(definition)),
- Err(e) => println!("Failed to deserialize value for {}: {:?}", d_key, e),
+ Err(e) => return Err(format!("Failed to deserialize value for {}: {:?}", d_key, e)),
}
}
}
}
- defs
+ Ok(defs)
}
#[allow(dead_code)]
-pub async fn type_hierarchy(ast_index: Arc>, language: String, subtree_of: String) -> String
+pub fn type_hierarchy(ast_index: Arc, language: String, subtree_of: String) -> Result
{
// Data example:
// classes/cpp🔎Animal ⚡ alt_testsuite::cpp_goat_library::Goat 👉 "cpp🔎Goat"
@@ -740,20 +793,24 @@ pub async fn type_hierarchy(ast_index: Arc>, language: String, sub
// CosmicJustice
// CosmicGoat
//
- let db = ast_index.lock().await.sleddb.clone();
let t_prefix = format!("classes|{}", language);
- let mut iter = db.scan_prefix(&t_prefix);
let mut hierarchy_map: IndexMap> = IndexMap::new();
- while let Some(Ok((key, value))) = iter.next() {
- let key_string = String::from_utf8(key.to_vec()).unwrap();
- let value_string = String::from_utf8(value.to_vec()).unwrap();
- if key_string.contains(" ⚡ ") {
- let parts: Vec<&str> = key_string.split(" ⚡ ").collect();
- if parts.len() == 2 {
- let parent = parts[0].trim().strip_prefix("classes|").unwrap_or(parts[0].trim()).to_string();
- let child = value_string.trim().to_string();
- hierarchy_map.entry(parent).or_insert_with(Vec::new).push(child);
+ {
+ let txn = ast_index.db_env.read_txn()
+ .map_err_with_prefix("Failed to create read transaction:")?;
+ let mut cursor = ast_index.db.prefix_iter(&txn, &t_prefix)
+ .map_err_with_prefix("Failed to create prefix iterator:")?;
+
+ while let Some(Ok((key, value))) = cursor.next() {
+ let value_string = String::from_utf8(value.to_vec()).unwrap();
+ if key.contains(" ⚡ ") {
+ let parts: Vec<&str> = key.split(" ⚡ ").collect();
+ if parts.len() == 2 {
+ let parent = parts[0].trim().strip_prefix("classes|").unwrap_or(parts[0].trim()).to_string();
+ let child = value_string.trim().to_string();
+ hierarchy_map.entry(parent).or_insert_with(Vec::new).push(child);
+ }
}
}
}
@@ -781,11 +838,10 @@ pub async fn type_hierarchy(ast_index: Arc>, language: String, sub
result.push_str(&build_hierarchy(&hierarchy_map, &subtree_of, 0, &language));
}
- result
+ Ok(result)
}
-pub async fn definition_paths_fuzzy(ast_index: Arc>, pattern: &str, top_n: usize, max_candidates_to_consider: usize) -> Vec {
- let db = ast_index.lock().await.sleddb.clone();
+pub async fn definition_paths_fuzzy(ast_index: Arc, pattern: &str, top_n: usize, max_candidates_to_consider: usize) -> Result, String> {
let mut candidates = HashSet::new();
let mut patterns_to_try = Vec::new();
@@ -802,26 +858,30 @@ pub async fn definition_paths_fuzzy(ast_index: Arc>, pattern: &str
}
}
- for pat in patterns_to_try {
- let c_prefix = format!("c|{}", pat);
- let mut iter = db.scan_prefix(&c_prefix);
- while let Some(Ok((key, _))) = iter.next() {
- let key_string = String::from_utf8(key.to_vec()).unwrap();
- if let Some((_, dest)) = key_string.split_once(" ⚡ ") {
- candidates.insert(dest.to_string());
+ {
+ let txn = ast_index.db_env.read_txn()
+ .map_err_with_prefix("Failed to create read transaction:")?;
+
+ for pat in patterns_to_try {
+ let mut cursor = ast_index.db.prefix_iter(&txn, &format!("c|{}", pat))
+ .map_err_with_prefix("Failed to create prefix iterator:")?;
+ while let Some(Ok((key, _))) = cursor.next() {
+ if let Some((_, dest)) = key.split_once(" ⚡ ") {
+ candidates.insert(dest.to_string());
+ }
+ if candidates.len() >= max_candidates_to_consider {
+ break;
+ }
}
if candidates.len() >= max_candidates_to_consider {
break;
}
}
- if candidates.len() >= max_candidates_to_consider {
- break;
- }
}
let results = fuzzy_search(&pattern.to_string(), candidates, top_n, &[':']);
- results.into_iter()
+ Ok(results.into_iter()
.map(|result| {
if let Some(pos) = result.find("::") {
result[pos + 2..].to_string()
@@ -829,37 +889,39 @@ pub async fn definition_paths_fuzzy(ast_index: Arc>, pattern: &str
result
}
})
- .collect()
+ .collect())
}
#[allow(dead_code)]
-pub async fn dump_database(ast_index: Arc>) -> usize
+pub fn dump_database(ast_index: Arc) -> Result
{
- let db = ast_index.lock().await.sleddb.clone();
- println!("\nsled has {} records", db.len());
- let iter = db.iter();
+ let txn = ast_index.db_env.read_txn()
+ .map_err_with_prefix("Failed to create read transaction:")?;
+ let db_len = ast_index.db.len(&txn).map_err_with_prefix("Failed to count records:")?;
+ println!("\ndb has {db_len} records");
+ let iter = ast_index.db.iter(&txn)
+ .map_err_with_prefix("Failed to create iterator:")?;
for item in iter {
- let (key, value) = item.unwrap();
- let key_string = String::from_utf8(key.to_vec()).unwrap();
- if key_string.starts_with("d|") {
+ let (key, value) = item.map_err_with_prefix("Failed to get item:")?;
+ if key.starts_with("d|") {
match serde_cbor::from_slice::(&value) {
- Ok(definition) => println!("{} 👉 {:.50}", key_string, format!("{:?}", definition)),
- Err(e) => println!("Failed to deserialize value at {}: {:?}", key_string, e),
+ Ok(definition) => println!("{} 👉 {:.50}", key, format!("{:?}", definition)),
+ Err(e) => println!("Failed to deserialize value at {}: {:?}", key, e),
}
- } else if key_string.starts_with("classes|") {
+ } else if key.starts_with("classes|") {
let value_string = String::from_utf8(value.to_vec()).unwrap();
- println!("{} 👉 {:?}", key_string, value_string);
- } else if key_string.starts_with("counters|") {
+ println!("{} 👉 {:?}", key, value_string);
+ } else if key.starts_with("counters|") {
let counter_value: i32 = serde_cbor::from_slice(&value).unwrap();
- println!("{}: {}", key_string, counter_value);
+ println!("{}: {}", key, counter_value);
} else if value.len() > 0 {
- println!("{} ({} bytes)", key_string, value.len());
+ println!("{} ({} bytes)", key, value.len());
} else {
- println!("{}", key_string);
+ println!("{}", key);
}
}
println!("dump_database over");
- db.len()
+ Ok(db_len)
}
@@ -884,7 +946,7 @@ mod tests {
}
async fn run_ast_db_test(
- ast_index: Arc>,
+ ast_index: Arc,
library_file_path: &str,
main_file_path: &str,
goat_location: &str,
@@ -902,18 +964,17 @@ mod tests {
println!("(E) {}:{} {}", error.err_cpath, error.err_line, error.err_message);
}
- let mut ucx: ConnectUsageContext = connect_usages_look_if_full_reset_needed(ast_index.clone()).await;
+ let mut ucx: ConnectUsageContext = connect_usages_look_if_full_reset_needed(ast_index.clone()).unwrap();
loop {
- let did_anything = connect_usages(ast_index.clone(), &mut ucx).await;
+ let did_anything = connect_usages(ast_index.clone(), &mut ucx).unwrap();
if !did_anything {
break;
}
}
- flush_sled_batch(ast_index.clone(), 0).await;
- dump_database(ast_index.clone()).await;
+ let _ = dump_database(ast_index.clone()).unwrap();
- let hierarchy = type_hierarchy(ast_index.clone(), language.to_string(), "".to_string()).await;
+ let hierarchy = type_hierarchy(ast_index.clone(), language.to_string(), "".to_string()).unwrap();
println!("Type hierarchy:\n{}", hierarchy);
let expected_hierarchy = "Animal\n Goat\n CosmicGoat\nCosmicJustice\n CosmicGoat\n";
assert_eq!(
@@ -922,11 +983,11 @@ mod tests {
);
println!(
"Type hierachy subtree_of=Animal:\n{}",
- type_hierarchy(ast_index.clone(), language.to_string(), format!("{}🔎Animal", language)).await
+ type_hierarchy(ast_index.clone(), language.to_string(), format!("{}🔎Animal", language)).unwrap()
);
// Goat::Goat() is a C++ constructor
- let goat_def = definitions(ast_index.clone(), goat_location).await;
+ let goat_def = definitions(ast_index.clone(), goat_location).unwrap();
let mut goat_def_str = String::new();
for def in goat_def.iter() {
goat_def_str.push_str(&format!("{:?}\n", def));
@@ -934,9 +995,9 @@ mod tests {
println!("goat_def_str:\n{}", goat_def_str);
assert!(goat_def.len() == 1);
- let animalage_defs = definitions(ast_index.clone(), animal_age_location).await;
+ let animalage_defs = definitions(ast_index.clone(), animal_age_location).unwrap();
let animalage_def0 = animalage_defs.first().unwrap();
- let animalage_usage = usages(ast_index.clone(), animalage_def0.path(), 100).await;
+ let animalage_usage = usages(ast_index.clone(), animalage_def0.path(), 100).unwrap();
let mut animalage_usage_str = String::new();
for (used_at_def, used_at_uline) in animalage_usage.iter() {
animalage_usage_str.push_str(&format!("{:}:{}\n", used_at_def.cpath, used_at_uline));
@@ -944,9 +1005,9 @@ mod tests {
println!("animalage_usage_str:\n{}", animalage_usage_str);
assert!(animalage_usage.len() == 5);
- let goat_defs = definitions(ast_index.clone(), format!("{}_goat_library::Goat", language).as_str()).await;
+ let goat_defs = definitions(ast_index.clone(), format!("{}_goat_library::Goat", language).as_str()).unwrap();
let goat_def0 = goat_defs.first().unwrap();
- let goat_usage = usages(ast_index.clone(), goat_def0.path(), 100).await;
+ let goat_usage = usages(ast_index.clone(), goat_def0.path(), 100).unwrap();
let mut goat_usage_str = String::new();
for (used_at_def, used_at_uline) in goat_usage.iter() {
goat_usage_str.push_str(&format!("{:}:{}\n", used_at_def.cpath, used_at_uline));
@@ -954,31 +1015,30 @@ mod tests {
println!("goat_usage:\n{}", goat_usage_str);
assert!(goat_usage.len() == 1 || goat_usage.len() == 2); // derived from generates usages (new style: py) or not (old style)
- doc_remove(ast_index.clone(), &library_file_path.to_string()).await;
- doc_remove(ast_index.clone(), &main_file_path.to_string()).await;
- flush_sled_batch(ast_index.clone(), 0).await;
+ doc_remove(ast_index.clone(), &library_file_path.to_string());
+ doc_remove(ast_index.clone(), &main_file_path.to_string());
- let dblen = dump_database(ast_index.clone()).await;
- let counters = fetch_counters(ast_index.clone()).await;
+ let dblen = dump_database(ast_index.clone()).unwrap();
+ let counters = fetch_counters(ast_index.clone()).unwrap();
assert_eq!(counters.counter_defs, 0);
assert_eq!(counters.counter_usages, 0);
assert_eq!(counters.counter_docs, 0);
assert_eq!(dblen, 3 + 1); // 3 counters and 1 class hierarchy
- let db = ast_index.lock().await.sleddb.clone();
- drop(ast_index);
- assert!(Arc::strong_count(&db) == 1);
- println!("db.flush");
- let x = db.flush().unwrap();
- println!("db.flush returned {}, drop", x);
- drop(db);
+ // assert!(Arc::strong_count(&db) == 1);
+ println!("db.clear");
+ {
+ let mut txn = ast_index.db_env.write_txn().unwrap();
+ ast_index.db.clear(&mut txn).unwrap();
+ }
+ assert!(Arc::try_unwrap(ast_index).is_ok());
tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
}
#[tokio::test]
async fn test_ast_db_cpp() {
init_tracing();
- let ast_index = ast_index_init("".to_string(), 10, false).await;
+ let ast_index = ast_index_init("".to_string(), 10).await;
run_ast_db_test(
ast_index,
"src/ast/alt_testsuite/cpp_goat_library.h",
@@ -992,7 +1052,7 @@ mod tests {
#[tokio::test]
async fn test_ast_db_py() {
init_tracing();
- let ast_index = ast_index_init("".to_string(), 10, false).await;
+ let ast_index = ast_index_init("".to_string(), 10).await;
run_ast_db_test(
ast_index,
"src/ast/alt_testsuite/py_goat_library.py",
diff --git a/refact-agent/engine/src/ast/ast_indexer_thread.rs b/refact-agent/engine/src/ast/ast_indexer_thread.rs
index 1553072ad..29723c5b3 100644
--- a/refact-agent/engine/src/ast/ast_indexer_thread.rs
+++ b/refact-agent/engine/src/ast/ast_indexer_thread.rs
@@ -5,15 +5,16 @@ use tokio::sync::{Mutex as AMutex, Notify as ANotify};
use tokio::sync::RwLock as ARwLock;
use tokio::task::JoinHandle;
use tracing::info;
+use crate::custom_error::trace_and_default;
use crate::files_in_workspace::Document;
use crate::global_context::GlobalContext;
use crate::ast::ast_structs::{AstDB, AstStatus, AstCounters, AstErrorStats};
-use crate::ast::ast_db::{ast_index_init, fetch_counters, doc_add, doc_remove, flush_sled_batch, ConnectUsageContext, connect_usages, connect_usages_look_if_full_reset_needed};
+use crate::ast::ast_db::{ast_index_init, fetch_counters, doc_add, doc_remove, connect_usages, connect_usages_look_if_full_reset_needed};
pub struct AstIndexService {
- pub ast_index: Arc>,
+ pub ast_index: Arc,
pub ast_status: Arc>,
pub ast_sleeping_point: Arc,
pub ast_todo: IndexSet,
@@ -23,6 +24,7 @@ async fn ast_indexer_thread(
gcx_weak: Weak>,
ast_service: Arc>,
) {
+ let t0 = tokio::time::Instant::now();
let mut reported_parse_stats = true;
let mut reported_connect_stats = true;
let mut stats_parsed_cnt = 0;
@@ -41,7 +43,7 @@ async fn ast_indexer_thread(
ast_service_locked.ast_sleeping_point.clone(),
)
};
- let ast_max_files = ast_index.lock().await.ast_max_files; // cannot change
+ let ast_max_files = ast_index.ast_max_files; // cannot change
loop {
let (cpath, left_todo_count) = {
@@ -74,7 +76,7 @@ async fn ast_indexer_thread(
};
let mut doc = Document { doc_path: cpath.clone().into(), doc_text: None };
- doc_remove(ast_index.clone(), &cpath).await;
+ doc_remove(ast_index.clone(), &cpath);
match crate::files_in_workspace::get_file_text_from_memory_or_disk(gcx.clone(), &doc.doc_path).await {
Ok(file_text) => {
@@ -113,7 +115,7 @@ async fn ast_indexer_thread(
}
if stats_update_ts.elapsed() >= std::time::Duration::from_millis(1000) { // can't be lower, because flush_sled_batch() happens not very often at all
- let counters: AstCounters = fetch_counters(ast_index.clone()).await;
+ let counters = fetch_counters(ast_index.clone()).unwrap_or_else(trace_and_default);
{
let mut status_locked = ast_status.lock().await;
status_locked.files_unparsed = left_todo_count;
@@ -135,8 +137,6 @@ async fn ast_indexer_thread(
continue;
}
- flush_sled_batch(ast_index.clone(), 0).await; // otherwise bad stats
-
if !reported_parse_stats {
if !stats_parsing_errors.errors.is_empty() {
let error_count = stats_parsing_errors.errors_counter;
@@ -187,7 +187,7 @@ async fn ast_indexer_thread(
stats_parsed_cnt = 0;
stats_symbols_cnt = 0;
reported_parse_stats = true;
- let counters: AstCounters = fetch_counters(ast_index.clone()).await;
+ let counters: AstCounters = fetch_counters(ast_index.clone()).unwrap_or_else(trace_and_default);
{
let mut status_locked = ast_status.lock().await;
status_locked.files_unparsed = 0;
@@ -200,20 +200,18 @@ async fn ast_indexer_thread(
}
// Connect usages, unless we have files in the todo
- let mut usagecx: ConnectUsageContext = connect_usages_look_if_full_reset_needed(ast_index.clone()).await;
+ let mut usagecx = connect_usages_look_if_full_reset_needed(ast_index.clone()).unwrap_or_else(trace_and_default);
loop {
todo_count = ast_service.lock().await.ast_todo.len();
if todo_count > 0 {
break;
}
- let did_anything = connect_usages(ast_index.clone(), &mut usagecx).await;
+ let did_anything = connect_usages(ast_index.clone(), &mut usagecx).unwrap_or_else(trace_and_default);
if !did_anything {
break;
}
}
- flush_sled_batch(ast_index.clone(), 0).await;
-
if !usagecx.errstats.errors.is_empty() {
let error_count = usagecx.errstats.errors_counter;
let display_count = std::cmp::min(5, error_count);
@@ -242,7 +240,7 @@ async fn ast_indexer_thread(
}
if !reported_connect_stats {
- let counters: AstCounters = fetch_counters(ast_index.clone()).await;
+ let counters: AstCounters = fetch_counters(ast_index.clone()).unwrap_or_else(trace_and_default);
{
let mut status_locked = ast_status.lock().await;
status_locked.files_unparsed = 0;
@@ -254,8 +252,9 @@ async fn ast_indexer_thread(
status_locked.astate = "done".to_string();
}
ast_sleeping_point.notify_waiters();
- let _ = write!(std::io::stderr(), "AST COMPLETE\n");
- info!("AST COMPLETE"); // you can see stderr sometimes faster vs logs
+ let msg = format!("AST COMPLETE in {:.2}s", t0.elapsed().as_secs_f32());
+ let _ = write!(std::io::stderr(), "{msg}");
+ info!("{msg}"); // you can see stderr sometimes faster vs logs
reported_connect_stats = true;
}
@@ -302,7 +301,7 @@ pub async fn ast_indexer_block_until_finished(ast_service: Arc Arc>
{
- let ast_index = ast_index_init(ast_permanent, ast_max_files, false).await;
+ let ast_index = ast_index_init(ast_permanent, ast_max_files).await;
let ast_status = Arc::new(AMutex::new(AstStatus {
astate_notify: Arc::new(ANotify::new()),
astate: String::from("starting"),
diff --git a/refact-agent/engine/src/ast/ast_structs.rs b/refact-agent/engine/src/ast/ast_structs.rs
index 471438c65..c68a218ca 100644
--- a/refact-agent/engine/src/ast/ast_structs.rs
+++ b/refact-agent/engine/src/ast/ast_structs.rs
@@ -1,8 +1,8 @@
-use std::collections::HashMap;
use std::sync::Arc;
use std::fmt;
use serde::{Deserialize, Serialize};
-use tokio::sync::{Mutex as AMutex, Notify as ANotify};
+use tempfile::TempDir;
+use tokio::sync::{Notify as ANotify};
pub use crate::ast::treesitter::structs::SymbolType;
@@ -58,10 +58,9 @@ impl AstDefinition {
}
pub struct AstDB {
- pub sleddb: Arc,
- pub sledbatch: Arc>,
- pub batch_counter: usize,
- pub counters_increase: HashMap,
+ pub db_env: Arc,
+ pub db: Arc>,
+ pub _db_temp_dir: Option, // Kept for cleanup
pub ast_max_files: usize,
}
@@ -79,6 +78,7 @@ pub struct AstStatus {
pub ast_max_files_hit: bool,
}
+#[derive(Default, Debug)]
pub struct AstCounters {
pub counter_defs: i32,
pub counter_usages: i32,
diff --git a/refact-agent/engine/src/ast/parse_python.rs b/refact-agent/engine/src/ast/parse_python.rs
index 723da8c69..e0486e249 100644
--- a/refact-agent/engine/src/ast/parse_python.rs
+++ b/refact-agent/engine/src/ast/parse_python.rs
@@ -1,6 +1,5 @@
use indexmap::IndexMap;
use tree_sitter::{Node, Parser};
-use tree_sitter_python::language;
use crate::ast::ast_structs::{AstDefinition, AstUsage, AstErrorStats};
use crate::ast::treesitter::structs::SymbolType;
@@ -822,7 +821,7 @@ fn py_body<'a>(cx: &mut ContextPy, node: &Node<'a>, path: &Vec) -> Strin
fn py_make_cx(code: &str) -> ContextPy
{
let mut sitter = Parser::new();
- sitter.set_language(&language()).unwrap();
+ sitter.set_language(&tree_sitter_python::LANGUAGE.into()).unwrap();
let cx = ContextPy {
ap: ContextAnyParser {
sitter,
diff --git a/refact-agent/engine/src/ast/treesitter/language_id.rs b/refact-agent/engine/src/ast/treesitter/language_id.rs
index 40e3b56bb..716a8fda4 100644
--- a/refact-agent/engine/src/ast/treesitter/language_id.rs
+++ b/refact-agent/engine/src/ast/treesitter/language_id.rs
@@ -131,22 +131,15 @@ impl From for LanguageId {
impl From for LanguageId {
fn from(value: Language) -> Self {
- if value == tree_sitter_cpp::language() {
- Self::Cpp
- } else if value == tree_sitter_python::language() {
- Self::Python
- } else if value == tree_sitter_java::language() {
- Self::Java
- } else if value == tree_sitter_javascript::language() {
- Self::JavaScript
- } else if value == tree_sitter_rust::language() {
- Self::Rust
- } else if value == tree_sitter_typescript::language_typescript() {
- Self::TypeScript
- } else if value == tree_sitter_typescript::language_tsx() {
- Self::TypeScriptReact
- } else {
- Self::Unknown
+ match value {
+ lang if lang == tree_sitter_cpp::LANGUAGE.into() => Self::Cpp,
+ lang if lang == tree_sitter_python::LANGUAGE.into() => Self::Python,
+ lang if lang == tree_sitter_java::LANGUAGE.into() => Self::Java,
+ lang if lang == tree_sitter_javascript::LANGUAGE.into() => Self::JavaScript,
+ lang if lang == tree_sitter_rust::LANGUAGE.into() => Self::Rust,
+ lang if lang == tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into() => Self::TypeScript,
+ lang if lang == tree_sitter_typescript::LANGUAGE_TSX.into() => Self::TypeScriptReact,
+ _ => Self::Unknown,
}
}
}
diff --git a/refact-agent/engine/src/ast/treesitter/parsers/cpp.rs b/refact-agent/engine/src/ast/treesitter/parsers/cpp.rs
index 2bfe0c865..848bc1458 100644
--- a/refact-agent/engine/src/ast/treesitter/parsers/cpp.rs
+++ b/refact-agent/engine/src/ast/treesitter/parsers/cpp.rs
@@ -7,7 +7,6 @@ use parking_lot::RwLock;
use similar::DiffableStr;
use tree_sitter::{Node, Parser, Range};
-use tree_sitter_cpp::language;
use uuid::Uuid;
use crate::ast::treesitter::ast_instance_structs::{AstSymbolFields, AstSymbolInstanceArc, ClassFieldDeclaration, CommentDefinition, FunctionArg, FunctionCall, FunctionDeclaration, ImportDeclaration, ImportType, StructDeclaration, TypeDef, VariableDefinition, VariableUsage};
@@ -100,7 +99,7 @@ impl CppParser {
pub fn new() -> Result {
let mut parser = Parser::new();
parser
- .set_language(&language())
+ .set_language(&tree_sitter_cpp::LANGUAGE.into())
.map_err(internal_error)?;
Ok(CppParser { parser })
}
diff --git a/refact-agent/engine/src/ast/treesitter/parsers/java.rs b/refact-agent/engine/src/ast/treesitter/parsers/java.rs
index 0a3f38921..42637fa7e 100644
--- a/refact-agent/engine/src/ast/treesitter/parsers/java.rs
+++ b/refact-agent/engine/src/ast/treesitter/parsers/java.rs
@@ -9,7 +9,6 @@ use itertools::Itertools;
use parking_lot::RwLock;
use similar::DiffableStr;
use tree_sitter::{Node, Parser, Range};
-use tree_sitter_java::language;
use uuid::Uuid;
use crate::ast::treesitter::ast_instance_structs::{AstSymbolFields, AstSymbolInstanceArc, ClassFieldDeclaration, CommentDefinition, FunctionArg, FunctionCall, FunctionDeclaration, ImportDeclaration, ImportType, StructDeclaration, TypeDef, VariableDefinition, VariableUsage};
@@ -220,7 +219,7 @@ impl JavaParser {
pub fn new() -> Result {
let mut parser = Parser::new();
parser
- .set_language(&language())
+ .set_language(&tree_sitter_java::LANGUAGE.into())
.map_err(internal_error)?;
Ok(JavaParser { parser })
}
diff --git a/refact-agent/engine/src/ast/treesitter/parsers/js.rs b/refact-agent/engine/src/ast/treesitter/parsers/js.rs
index 2ebc1edea..b3482f677 100644
--- a/refact-agent/engine/src/ast/treesitter/parsers/js.rs
+++ b/refact-agent/engine/src/ast/treesitter/parsers/js.rs
@@ -6,7 +6,6 @@ use parking_lot::RwLock;
use similar::DiffableStr;
use tree_sitter::{Node, Parser, Range};
-use tree_sitter_javascript::language;
use uuid::Uuid;
use crate::ast::treesitter::ast_instance_structs::{AstSymbolFields, AstSymbolInstanceArc, ClassFieldDeclaration, CommentDefinition, FunctionArg, FunctionCall, FunctionDeclaration, ImportDeclaration, ImportType, StructDeclaration, TypeDef, VariableDefinition, VariableUsage};
@@ -142,7 +141,7 @@ impl JSParser {
pub fn new() -> Result {
let mut parser = Parser::new();
parser
- .set_language(&language())
+ .set_language(&tree_sitter_javascript::LANGUAGE.into())
.map_err(internal_error)?;
Ok(Self { parser })
}
@@ -226,7 +225,7 @@ impl JSParser {
end_point: child.end_position(),
};
}
-
+
symbols.push(Arc::new(RwLock::new(Box::new(decl))));
symbols
}
@@ -478,7 +477,7 @@ impl JSParser {
"identifier" /*| "field_identifier"*/ => {
let mut usage = VariableUsage::default();
usage.ast_fields.file_path = path.clone();
- usage.ast_fields.language = LanguageId::TypeScript;
+ usage.ast_fields.language = LanguageId::JavaScript;
usage.ast_fields.is_error = true;
usage.ast_fields.name = code.slice(parent.byte_range()).to_string();
usage.ast_fields.full_range = parent.range();
@@ -492,7 +491,7 @@ impl JSParser {
"member_expression" => {
let mut usage = VariableUsage::default();
usage.ast_fields.file_path = path.clone();
- usage.ast_fields.language = LanguageId::TypeScript;
+ usage.ast_fields.language = LanguageId::JavaScript;
usage.ast_fields.is_error = true;
if let Some(property) = parent.child_by_field_name("property") {
usage.ast_fields.name = code.slice(property.byte_range()).to_string();
@@ -651,7 +650,7 @@ impl JSParser {
if let Some(first) = def.path_components.first() {
if vec!["@", ".", ".."].contains(&first.as_str()) {
def.import_type = ImportType::UserModule;
- }
+ }
}
let mut imports: Vec = vec![];
for i in 0..info.node.child_count() {
@@ -748,7 +747,7 @@ impl JSParser {
let mut ast_fields = AstSymbolFields::default();
ast_fields.file_path = path.clone();
ast_fields.is_error = false;
- ast_fields.language = LanguageId::from(language());
+ ast_fields.language = LanguageId::JavaScript;
let mut candidates = VecDeque::from(vec![CandidateInfo {
ast_fields,
diff --git a/refact-agent/engine/src/ast/treesitter/parsers/python.rs b/refact-agent/engine/src/ast/treesitter/parsers/python.rs
index a596da72a..75d4f364e 100644
--- a/refact-agent/engine/src/ast/treesitter/parsers/python.rs
+++ b/refact-agent/engine/src/ast/treesitter/parsers/python.rs
@@ -8,7 +8,6 @@ use itertools::Itertools;
use parking_lot::RwLock;
use similar::DiffableStr;
use tree_sitter::{Node, Parser, Point, Range};
-use tree_sitter_python::language;
use uuid::Uuid;
use crate::ast::treesitter::ast_instance_structs::{AstSymbolFields, AstSymbolInstanceArc, ClassFieldDeclaration, CommentDefinition, FunctionArg, FunctionCall, FunctionDeclaration, ImportDeclaration, ImportType, StructDeclaration, SymbolInformation, TypeDef, VariableDefinition, VariableUsage};
@@ -211,7 +210,7 @@ impl PythonParser {
pub fn new() -> Result {
let mut parser = Parser::new();
parser
- .set_language(&language())
+ .set_language(&tree_sitter_python::LANGUAGE.into())
.map_err(internal_error)?;
Ok(PythonParser { parser })
}
diff --git a/refact-agent/engine/src/ast/treesitter/parsers/rust.rs b/refact-agent/engine/src/ast/treesitter/parsers/rust.rs
index dfbdfaec2..41dc0bfb0 100644
--- a/refact-agent/engine/src/ast/treesitter/parsers/rust.rs
+++ b/refact-agent/engine/src/ast/treesitter/parsers/rust.rs
@@ -5,7 +5,6 @@ use parking_lot::RwLock;
use similar::DiffableStr;
use tree_sitter::{Node, Parser, Point, Range};
-use tree_sitter_rust::language;
use uuid::Uuid;
use crate::ast::treesitter::ast_instance_structs::{AstSymbolInstance, AstSymbolInstanceArc, ClassFieldDeclaration, CommentDefinition, FunctionArg, FunctionCall, FunctionDeclaration, ImportDeclaration, ImportType, StructDeclaration, TypeAlias, TypeDef, VariableDefinition, VariableUsage};
@@ -29,7 +28,7 @@ impl RustParser {
pub fn new() -> Result {
let mut parser = Parser::new();
parser
- .set_language(&language())
+ .set_language(&tree_sitter_rust::LANGUAGE.into())
.map_err(internal_error)?;
Ok(RustParser { parser })
}
@@ -717,7 +716,7 @@ impl RustParser {
symbols
}
-
+
fn parse_use_declaration(&mut self, parent: &Node, code: &str, path: &PathBuf, parent_guid: &Uuid, is_error: bool) -> Vec {
let mut symbols: Vec = vec![];
let argument_node = parent.child_by_field_name("argument").unwrap();
@@ -926,7 +925,7 @@ impl RustParser {
}
symbols
}
-
+
pub fn parse_block(&mut self, parent: &Node, code: &str, path: &PathBuf, parent_guid: &Uuid, is_error: bool) -> Vec {
let mut symbols: Vec = vec![];
for i in 0..parent.child_count() {
diff --git a/refact-agent/engine/src/ast/treesitter/parsers/tests/js.rs b/refact-agent/engine/src/ast/treesitter/parsers/tests/js.rs
index a48b73028..a8d829388 100644
--- a/refact-agent/engine/src/ast/treesitter/parsers/tests/js.rs
+++ b/refact-agent/engine/src/ast/treesitter/parsers/tests/js.rs
@@ -28,7 +28,7 @@ mod tests {
let file = canonicalize(PathBuf::from(file!())).unwrap().parent().unwrap().join("cases/js/car.js");
assert!(file.exists());
- base_skeletonizer_test(&LanguageId::Java, &mut parser, &file, CAR_JS_CODE, CAR_JS_SKELETON);
+ base_skeletonizer_test(&LanguageId::JavaScript, &mut parser, &file, CAR_JS_CODE, CAR_JS_SKELETON);
}
#[test]
@@ -36,6 +36,6 @@ mod tests {
let mut parser: Box = Box::new(JSParser::new().expect("JSParser::new"));
let file = canonicalize(PathBuf::from(file!())).unwrap().parent().unwrap().join("cases/js/car.js");
assert!(file.exists());
- base_declaration_formatter_test(&LanguageId::Java, &mut parser, &file, CAR_JS_CODE, CAR_JS_DECLS);
+ base_declaration_formatter_test(&LanguageId::JavaScript, &mut parser, &file, CAR_JS_CODE, CAR_JS_DECLS);
}
}
diff --git a/refact-agent/engine/src/ast/treesitter/parsers/tests/ts.rs b/refact-agent/engine/src/ast/treesitter/parsers/tests/ts.rs
index 9109e6f24..b19421ebf 100644
--- a/refact-agent/engine/src/ast/treesitter/parsers/tests/ts.rs
+++ b/refact-agent/engine/src/ast/treesitter/parsers/tests/ts.rs
@@ -28,7 +28,7 @@ mod tests {
let file = canonicalize(PathBuf::from(file!())).unwrap().parent().unwrap().join("cases/ts/person.ts");
assert!(file.exists());
- base_skeletonizer_test(&LanguageId::Java, &mut parser, &file, PERSON_TS_CODE, PERSON_TS_SKELETON);
+ base_skeletonizer_test(&LanguageId::TypeScript, &mut parser, &file, PERSON_TS_CODE, PERSON_TS_SKELETON);
}
#[test]
@@ -36,6 +36,6 @@ mod tests {
let mut parser: Box = Box::new(TSParser::new().expect("TSParser::new"));
let file = canonicalize(PathBuf::from(file!())).unwrap().parent().unwrap().join("cases/ts/person.ts");
assert!(file.exists());
- base_declaration_formatter_test(&LanguageId::Java, &mut parser, &file, PERSON_TS_CODE, PERSON_TS_DECLS);
+ base_declaration_formatter_test(&LanguageId::TypeScript, &mut parser, &file, PERSON_TS_CODE, PERSON_TS_DECLS);
}
}
diff --git a/refact-agent/engine/src/ast/treesitter/parsers/ts.rs b/refact-agent/engine/src/ast/treesitter/parsers/ts.rs
index c5afa368f..6f29abec0 100644
--- a/refact-agent/engine/src/ast/treesitter/parsers/ts.rs
+++ b/refact-agent/engine/src/ast/treesitter/parsers/ts.rs
@@ -8,7 +8,6 @@ use parking_lot::RwLock;
use similar::DiffableStr;
use tree_sitter::{Node, Parser, Range};
-use tree_sitter_typescript::language_typescript as language;
use uuid::Uuid;
use crate::ast::treesitter::ast_instance_structs::{AstSymbolFields, AstSymbolInstanceArc, ClassFieldDeclaration, CommentDefinition, FunctionArg, FunctionCall, FunctionDeclaration, ImportDeclaration, ImportType, StructDeclaration, TypeDef, VariableDefinition, VariableUsage};
@@ -134,7 +133,7 @@ impl TSParser {
pub fn new() -> Result {
let mut parser = Parser::new();
parser
- .set_language(&language())
+ .set_language(&tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into())
.map_err(internal_error)?;
Ok(Self { parser })
}
@@ -244,7 +243,7 @@ impl TSParser {
end_point: child.end_position(),
};
}
-
+
symbols.push(Arc::new(RwLock::new(Box::new(decl))));
symbols
}
diff --git a/refact-agent/engine/src/at_commands/at_ast_definition.rs b/refact-agent/engine/src/at_commands/at_ast_definition.rs
index a00071e89..7a7413dd7 100644
--- a/refact-agent/engine/src/at_commands/at_ast_definition.rs
+++ b/refact-agent/engine/src/at_commands/at_ast_definition.rs
@@ -2,9 +2,11 @@ use std::sync::Arc;
use async_trait::async_trait;
use tokio::sync::Mutex as AMutex;
+use crate::ast::ast_db::definition_paths_fuzzy;
use crate::at_commands::at_commands::{AtCommand, AtCommandsContext, AtParam};
use crate::call_validation::{ContextFile, ContextEnum};
use crate::at_commands::execute_at::{AtCommandMember, correct_at_arg};
+use crate::custom_error::trace_and_default;
// use strsim::jaro_winkler;
@@ -36,14 +38,14 @@ impl AtParamSymbolPathQuery {
// }
pub struct AtAstDefinition {
- pub params: Vec>>,
+ pub params: Vec>,
}
impl AtAstDefinition {
pub fn new() -> Self {
AtAstDefinition {
params: vec![
- Arc::new(AMutex::new(AtParamSymbolPathQuery::new()))
+ Box::new(AtParamSymbolPathQuery::new())
],
}
}
@@ -78,7 +80,7 @@ impl AtParam for AtParamSymbolPathQuery {
}
let ast_index = ast_service_opt.unwrap().lock().await.ast_index.clone();
- crate::ast::ast_db::definition_paths_fuzzy(ast_index, value, top_n, 1000).await
+ definition_paths_fuzzy(ast_index, value, top_n, 1000).await.unwrap_or_else(trace_and_default)
}
fn param_completion_valid(&self) -> bool {
@@ -88,7 +90,7 @@ impl AtParam for AtParamSymbolPathQuery {
#[async_trait]
impl AtCommand for AtAstDefinition {
- fn params(&self) -> &Vec>> {
+ fn params(&self) -> &Vec> {
&self.params
}
@@ -108,7 +110,7 @@ impl AtCommand for AtAstDefinition {
},
};
- correct_at_arg(ccx.clone(), self.params[0].clone(), &mut arg_symbol).await;
+ correct_at_arg(ccx.clone(), &self.params[0], &mut arg_symbol).await;
args.clear();
args.push(arg_symbol.clone());
@@ -116,7 +118,7 @@ impl AtCommand for AtAstDefinition {
let ast_service_opt = gcx.read().await.ast_service.clone();
if let Some(ast_service) = ast_service_opt {
let ast_index = ast_service.lock().await.ast_index.clone();
- let defs: Vec> = crate::ast::ast_db::definitions(ast_index, arg_symbol.text.as_str()).await;
+ let defs: Vec> = crate::ast::ast_db::definitions(ast_index, arg_symbol.text.as_str())?;
let file_paths = defs.iter().map(|x| x.cpath.clone()).collect::>();
let short_file_paths = crate::files_correction::shortify_paths(gcx.clone(), &file_paths).await;
diff --git a/refact-agent/engine/src/at_commands/at_ast_reference.rs b/refact-agent/engine/src/at_commands/at_ast_reference.rs
index f251819a8..87e4af2fb 100644
--- a/refact-agent/engine/src/at_commands/at_ast_reference.rs
+++ b/refact-agent/engine/src/at_commands/at_ast_reference.rs
@@ -7,17 +7,18 @@ use crate::at_commands::at_commands::{AtCommand, AtCommandsContext, AtParam};
use crate::call_validation::{ContextFile, ContextEnum};
use crate::at_commands::execute_at::{AtCommandMember, correct_at_arg};
use crate::at_commands::at_ast_definition::AtParamSymbolPathQuery;
+use crate::custom_error::trace_and_default;
pub struct AtAstReference {
- pub params: Vec>>,
+ pub params: Vec>,
}
impl AtAstReference {
pub fn new() -> Self {
AtAstReference {
params: vec![
- Arc::new(AMutex::new(AtParamSymbolPathQuery::new()))
+ Box::new(AtParamSymbolPathQuery::new())
],
}
}
@@ -26,7 +27,7 @@ impl AtAstReference {
#[async_trait]
impl AtCommand for AtAstReference {
- fn params(&self) -> &Vec>> {
+ fn params(&self) -> &Vec> {
&self.params
}
@@ -46,7 +47,7 @@ impl AtCommand for AtAstReference {
},
};
- correct_at_arg(ccx.clone(), self.params[0].clone(), &mut arg_symbol).await;
+ correct_at_arg(ccx.clone(), &self.params[0], &mut arg_symbol).await;
args.clear();
args.push(arg_symbol.clone());
@@ -55,14 +56,19 @@ impl AtCommand for AtAstReference {
if let Some(ast_service) = ast_service_opt {
let ast_index = ast_service.lock().await.ast_index.clone();
- let defs = crate::ast::ast_db::definitions(ast_index.clone(), arg_symbol.text.as_str()).await;
+ let defs = crate::ast::ast_db::definitions(ast_index.clone(), arg_symbol.text.as_str())
+ .unwrap_or_else(trace_and_default);
let mut all_results = vec![];
let mut messages = vec![];
const USAGES_LIMIT: usize = 20;
if let Some(def) = defs.get(0) {
- let usages: Vec<(Arc, usize)> = crate::ast::ast_db::usages(ast_index.clone(), def.path(), 100).await;
+ let usages: Vec<(Arc, usize)> = crate::ast::ast_db::usages(
+ ast_index.clone(),
+ def.path(),
+ 100,
+ ).unwrap_or_else(trace_and_default);
let usage_count = usages.len();
let text = format!(
diff --git a/refact-agent/engine/src/at_commands/at_commands.rs b/refact-agent/engine/src/at_commands/at_commands.rs
index ad9d81c54..be18e864b 100644
--- a/refact-agent/engine/src/at_commands/at_commands.rs
+++ b/refact-agent/engine/src/at_commands/at_commands.rs
@@ -32,7 +32,7 @@ pub struct AtCommandsContext {
pub current_model: String,
pub should_execute_remotely: bool,
- pub at_commands: HashMap>>>, // a copy from static constant
+ pub at_commands: HashMap>, // a copy from static constant
pub subchat_tool_parameters: IndexMap,
pub postprocess_parameters: PostprocessSettings,
@@ -77,7 +77,7 @@ impl AtCommandsContext {
#[async_trait]
pub trait AtCommand: Send + Sync {
- fn params(&self) -> &Vec>>;
+ fn params(&self) -> &Vec>;
// returns (messages_for_postprocessing, text_on_clip)
async fn at_execute(&self, ccx: Arc>, cmd: &mut AtCommandMember, args: &mut Vec) -> Result<(Vec, String), String>;
fn depends_on(&self) -> Vec { vec![] } // "ast", "vecdb"
@@ -90,19 +90,19 @@ pub trait AtParam: Send + Sync {
fn param_completion_valid(&self) -> bool {false}
}
-pub async fn at_commands_dict(gcx: Arc>) -> HashMap>>> {
+pub async fn at_commands_dict(gcx: Arc>) -> HashMap> {
let at_commands_dict = HashMap::from([
- ("@file".to_string(), Arc::new(AMutex::new(Box::new(AtFile::new()) as Box))),
- // ("@file-search".to_string(), Arc::new(AMutex::new(Box::new(AtFileSearch::new()) as Box))),
- ("@definition".to_string(), Arc::new(AMutex::new(Box::new(AtAstDefinition::new()) as Box))),
- ("@references".to_string(), Arc::new(AMutex::new(Box::new(AtAstReference::new()) as Box))),
- // ("@local-notes-to-self".to_string(), Arc::new(AMutex::new(Box::new(AtLocalNotesToSelf::new()) as Box))),
- ("@tree".to_string(), Arc::new(AMutex::new(Box::new(AtTree::new()) as Box))),
- // ("@diff".to_string(), Arc::new(AMutex::new(Box::new(AtDiff::new()) as Box))),
- // ("@diff-rev".to_string(), Arc::new(AMutex::new(Box::new(AtDiffRev::new()) as Box))),
- ("@web".to_string(), Arc::new(AMutex::new(Box::new(AtWeb::new()) as Box))),
- ("@search".to_string(), Arc::new(AMutex::new(Box::new(crate::at_commands::at_search::AtSearch::new()) as Box))),
- ("@knowledge-load".to_string(), Arc::new(AMutex::new(Box::new(crate::at_commands::at_knowledge::AtLoadKnowledge::new()) as Box))),
+ ("@file".to_string(), Arc::new(AtFile::new()) as Arc),
+ // ("@file-search".to_string(), Arc::new(AtFileSearch::new()) as Arc),
+ ("@definition".to_string(), Arc::new(AtAstDefinition::new()) as Arc),
+ ("@references".to_string(), Arc::new(AtAstReference::new()) as Arc),
+ // ("@local-notes-to-self".to_string(), Arc::new(AtLocalNotesToSelf::new()) as Arc),
+ ("@tree".to_string(), Arc::new(AtTree::new()) as Arc),
+ // ("@diff".to_string(), Arc::new(AtDiff::new()) as Arc),
+ // ("@diff-rev".to_string(), Arc::new(AtDiffRev::new()) as Arc),
+ ("@web".to_string(), Arc::new(AtWeb::new()) as Arc),
+ ("@search".to_string(), Arc::new(crate::at_commands::at_search::AtSearch::new()) as Arc),
+ ("@knowledge-load".to_string(), Arc::new(crate::at_commands::at_knowledge::AtLoadKnowledge::new()) as Arc),
]);
let (ast_on, vecdb_on) = {
@@ -116,8 +116,7 @@ pub async fn at_commands_dict(gcx: Arc>) -> HashMap>) -> HashMap>>,
+ pub params: Vec>,
}
impl AtFile {
pub fn new() -> Self {
AtFile {
params: vec![
- Arc::new(AMutex::new(AtParamFilePath::new()))
+ Box::new(AtParamFilePath::new())
],
}
}
@@ -271,7 +271,7 @@ pub async fn context_file_from_file_path(
#[async_trait]
impl AtCommand for AtFile {
- fn params(&self) -> &Vec>> {
+ fn params(&self) -> &Vec> {
&self.params
}
@@ -292,7 +292,7 @@ impl AtCommand for AtFile {
return Err("Cannot execute @file: no file provided".to_string());
}
};
- correct_at_arg(ccx.clone(), self.params[0].clone(), &mut arg0).await;
+ correct_at_arg(ccx.clone(), &self.params[0], &mut arg0).await;
args.clear();
args.push(arg0.clone());
diff --git a/refact-agent/engine/src/at_commands/at_knowledge.rs b/refact-agent/engine/src/at_commands/at_knowledge.rs
index fcbc0d82d..551872c09 100644
--- a/refact-agent/engine/src/at_commands/at_knowledge.rs
+++ b/refact-agent/engine/src/at_commands/at_knowledge.rs
@@ -11,7 +11,7 @@ use crate::memories::memories_search;
/// @knowledge-load command - loads knowledge entries by search key or memory ID
pub struct AtLoadKnowledge {
- params: Vec>>,
+ params: Vec>,
}
impl AtLoadKnowledge {
@@ -24,7 +24,7 @@ impl AtLoadKnowledge {
#[async_trait]
impl AtCommand for AtLoadKnowledge {
- fn params(&self) -> &Vec>> {
+ fn params(&self) -> &Vec> {
&self.params
}
diff --git a/refact-agent/engine/src/at_commands/at_search.rs b/refact-agent/engine/src/at_commands/at_search.rs
index 6f576b69e..431aba048 100644
--- a/refact-agent/engine/src/at_commands/at_search.rs
+++ b/refact-agent/engine/src/at_commands/at_search.rs
@@ -20,7 +20,7 @@ pub fn text_on_clip(query: &String, from_tool_call: bool) -> String {
pub struct AtSearch {
- pub params: Vec>>,
+ pub params: Vec>,
}
impl AtSearch {
@@ -84,7 +84,7 @@ pub async fn execute_at_search(
#[async_trait]
impl AtCommand for AtSearch {
- fn params(&self) -> &Vec>> {
+ fn params(&self) -> &Vec> {
&self.params
}
diff --git a/refact-agent/engine/src/at_commands/at_tree.rs b/refact-agent/engine/src/at_commands/at_tree.rs
index 4fc187db4..5dc03c725 100644
--- a/refact-agent/engine/src/at_commands/at_tree.rs
+++ b/refact-agent/engine/src/at_commands/at_tree.rs
@@ -3,7 +3,6 @@ use std::path::PathBuf;
use std::sync::{Arc, RwLock};
use async_trait::async_trait;
-use sled::Db;
use tokio::sync::Mutex as AMutex;
use tracing::warn;
@@ -16,7 +15,7 @@ use crate::files_correction::{correct_to_nearest_dir_path, get_project_dirs, pat
pub struct AtTree {
- pub params: Vec>>,
+ pub params: Vec>,
}
impl AtTree {
@@ -140,9 +139,9 @@ impl TreeNode {
}
}
-fn _print_symbols(db: Arc, path: &PathBuf) -> String {
+fn _print_symbols(db: Arc, path: &PathBuf) -> String {
let cpath = path.to_string_lossy().to_string();
- let defs = crate::ast::ast_db::doc_def_internal(db.clone(), &cpath);
+ let defs = crate::ast::ast_db::doc_defs(db.clone(), &cpath);
let symbols_list = defs
.iter()
.filter(|x| match x.symbol_type {
@@ -157,7 +156,7 @@ fn _print_symbols(db: Arc, path: &PathBuf) -> String {
async fn _print_files_tree(
tree: &TreeNode,
- ast_db: Option>>,
+ ast_db: Option>,
maxdepth: usize,
) -> String {
fn traverse(
@@ -165,7 +164,7 @@ async fn _print_files_tree(
path: PathBuf,
depth: usize,
maxdepth: usize,
- db_mb: Option>,
+ ast_db: Option>,
) -> Option {
if depth > maxdepth {
return None;
@@ -174,8 +173,8 @@ async fn _print_files_tree(
let indent = " ".repeat(depth);
let name = path.file_name().unwrap_or_default().to_string_lossy().to_string();
if !node.is_dir() {
- if let Some(db) = &db_mb {
- output.push_str(&format!("{}{}{}\n", indent, name, _print_symbols(db.clone(), &path)));
+ if let Some(db) = ast_db.clone() {
+ output.push_str(&format!("{}{}{}\n", indent, name, _print_symbols(db, &path)));
} else {
output.push_str(&format!("{}{}\n", indent, name));
}
@@ -189,7 +188,7 @@ async fn _print_files_tree(
for (name, child) in &node.children {
let mut child_path = path.clone();
child_path.push(name);
- if let Some(child_str) = traverse(child, child_path, depth + 1, maxdepth, db_mb.clone()) {
+ if let Some(child_str) = traverse(child, child_path, depth + 1, maxdepth, ast_db.clone()) {
child_output.push_str(&child_str);
} else {
dirs += child.is_dir() as usize;
@@ -207,12 +206,7 @@ async fn _print_files_tree(
let mut result = String::new();
for (name, node) in &tree.children {
- let db_mb = if let Some(ast) = ast_db.clone() {
- Some(ast.lock().await.sleddb.clone())
- } else {
- None
- };
- if let Some(output) = traverse(node, PathBuf::from(name), 0, maxdepth, db_mb.clone()) {
+ if let Some(output) = traverse(node, PathBuf::from(name), 0, maxdepth, ast_db.clone()) {
result.push_str(&output);
} else {
break;
@@ -224,7 +218,7 @@ async fn _print_files_tree(
async fn _print_files_tree_with_budget(
tree: &TreeNode,
char_limit: usize,
- ast_db: Option>>,
+ ast_db: Option>,
) -> String {
let mut good_enough = String::new();
for maxdepth in 1..20 {
@@ -256,7 +250,7 @@ pub async fn print_files_tree_with_budget(
match ast_module_option {
Some(ast_module) => {
crate::ast::ast_indexer_thread::ast_indexer_block_until_finished(ast_module.clone(), 20_000, true).await;
- let ast_db: Option>> = Some(ast_module.lock().await.ast_index.clone());
+ let ast_db: Option> = Some(ast_module.lock().await.ast_index.clone());
Ok(_print_files_tree_with_budget(tree, char_limit, ast_db.clone()).await)
}
None => Ok(_print_files_tree_with_budget(tree, char_limit, None).await),
@@ -266,7 +260,7 @@ pub async fn print_files_tree_with_budget(
#[async_trait]
impl AtCommand for AtTree {
- fn params(&self) -> &Vec>> { &self.params }
+ fn params(&self) -> &Vec> { &self.params }
async fn at_execute(
&self,
diff --git a/refact-agent/engine/src/at_commands/at_web.rs b/refact-agent/engine/src/at_commands/at_web.rs
index 9f1bd46ca..768ea1ab9 100644
--- a/refact-agent/engine/src/at_commands/at_web.rs
+++ b/refact-agent/engine/src/at_commands/at_web.rs
@@ -14,7 +14,7 @@ use crate::call_validation::{ChatMessage, ContextEnum};
pub struct AtWeb {
- pub params: Vec>>,
+ pub params: Vec>,
}
impl AtWeb {
@@ -27,7 +27,7 @@ impl AtWeb {
#[async_trait]
impl AtCommand for AtWeb {
- fn params(&self) -> &Vec>> {
+ fn params(&self) -> &Vec> {
&self.params
}
diff --git a/refact-agent/engine/src/at_commands/execute_at.rs b/refact-agent/engine/src/at_commands/execute_at.rs
index f91b9387d..79430f3f1 100644
--- a/refact-agent/engine/src/at_commands/execute_at.rs
+++ b/refact-agent/engine/src/at_commands/execute_at.rs
@@ -22,7 +22,7 @@ pub async fn run_at_commands_locally(
ccx: Arc>,
tokenizer: Option>,
maxgen: usize,
- original_messages: &Vec,
+ mut original_messages: Vec,
stream_back_to_user: &mut HasRagResults,
) -> (Vec, bool) {
let (n_ctx, top_n, is_preview, gcx) = {
@@ -56,13 +56,13 @@ pub async fn run_at_commands_locally(
// - if there's only 1 user message at the bottom, it receives reserve_for_context tokens for context
// - if there are N user messages, they receive reserve_for_context/N tokens each (and there's no taking from one to give to the other)
// This is useful to give prefix and suffix of the same file precisely the position necessary for FIM-like operation of a chat model
- let mut rebuilt_messages: Vec = original_messages.iter().take(user_msg_starts).map(|m| m.clone()).collect();
- for msg_idx in user_msg_starts..original_messages.len() {
- let mut msg = original_messages[msg_idx].clone();
+ let messages_after_user_msg = original_messages.split_off(user_msg_starts);
+ let mut new_messages = original_messages;
+ for (idx, mut msg) in messages_after_user_msg.into_iter().enumerate() {
// todo: make multimodal messages support @commands
if let ChatContent::Multimodal(_) = &msg.content {
- rebuilt_messages.push(msg.clone());
stream_back_to_user.push_in_json(json!(msg));
+ new_messages.push(msg);
continue;
}
let mut content = msg.content.content_text_only();
@@ -71,7 +71,7 @@ pub async fn run_at_commands_locally(
let mut context_limit = reserve_for_context / messages_with_at.max(1);
context_limit = context_limit.saturating_sub(content_n_tokens);
- info!("msg {} user_posted {:?} which is {} tokens, that leaves {} tokens for context of this message", msg_idx, crate::nicer_logs::first_n_chars(&content, 50), content_n_tokens, context_limit);
+ info!("msg {} user_posted {:?} which is {} tokens, that leaves {} tokens for context of this message", idx + user_msg_starts, crate::nicer_logs::first_n_chars(&content, 50), content_n_tokens, context_limit);
let mut messages_exec_output = vec![];
if content.contains("@") {
@@ -79,13 +79,19 @@ pub async fn run_at_commands_locally(
messages_exec_output.extend(res);
}
+ let mut context_file_pp = if context_limit > MIN_RAG_CONTEXT_LIMIT {
+ filter_only_context_file_from_context_tool(&messages_exec_output)
+ } else {
+ Vec::new()
+ };
+
let mut plain_text_messages = vec![];
- for exec_result in messages_exec_output.iter() {
+ for exec_result in messages_exec_output.into_iter() {
// at commands exec() can produce role "user" "assistant" "diff" "plain_text"
if let ContextEnum::ChatMessage(raw_msg) = exec_result { // means not context_file
if raw_msg.role != "plain_text" {
- rebuilt_messages.push(raw_msg.clone());
stream_back_to_user.push_in_json(json!(raw_msg));
+ new_messages.push(raw_msg);
} else {
plain_text_messages.push(raw_msg);
}
@@ -95,7 +101,6 @@ pub async fn run_at_commands_locally(
// TODO: reduce context_limit by tokens(messages_exec_output)
if context_limit > MIN_RAG_CONTEXT_LIMIT {
- let mut context_file_pp = filter_only_context_file_from_context_tool(&messages_exec_output);
let (tokens_limit_plain, mut tokens_limit_files) = {
if context_file_pp.is_empty() {
(context_limit, 0)
@@ -115,8 +120,8 @@ pub async fn run_at_commands_locally(
).await;
for m in pp_plain_text {
// OUTPUT: plain text after all custom messages
- rebuilt_messages.push(m.clone());
stream_back_to_user.push_in_json(json!(m));
+ new_messages.push(m);
}
tokens_limit_files += non_used_plain;
info!("tokens_limit_files {}", tokens_limit_files);
@@ -144,8 +149,8 @@ pub async fn run_at_commands_locally(
"context_file".to_string(),
serde_json::to_string(&json_vec).unwrap_or("".to_string()),
);
- rebuilt_messages.push(message.clone());
stream_back_to_user.push_in_json(json!(message));
+ new_messages.push(message);
}
}
info!("postprocess_plain_text_messages + postprocess_context_files {:.3}s", t0.elapsed().as_secs_f32());
@@ -154,19 +159,19 @@ pub async fn run_at_commands_locally(
if content.trim().len() > 0 {
// stream back to the user, with at-commands replaced
msg.content = ChatContent::SimpleText(content);
- rebuilt_messages.push(msg.clone());
stream_back_to_user.push_in_json(json!(msg));
+ new_messages.push(msg);
}
}
- (rebuilt_messages, any_context_produced)
+ (new_messages, any_context_produced)
}
pub async fn run_at_commands_remotely(
ccx: Arc>,
model_id: &str,
maxgen: usize,
- original_messages: &Vec,
+ original_messages: Vec,
stream_back_to_user: &mut HasRagResults,
) -> Result<(Vec, bool), String> {
let (gcx, n_ctx, subchat_tool_parameters, postprocess_parameters, chat_id) = {
@@ -181,7 +186,7 @@ pub async fn run_at_commands_remotely(
};
let post = CommandExecutePost {
- messages: original_messages.clone(),
+ messages: original_messages,
n_ctx,
maxgen,
subchat_tool_parameters,
@@ -205,14 +210,13 @@ pub async fn run_at_commands_remotely(
pub async fn correct_at_arg(
ccx: Arc>,
- param: Arc>,
+ param: &Box,
arg: &mut AtCommandMember,
) {
- let param_lock = param.lock().await;
- if param_lock.is_value_valid(ccx.clone(), &arg.text).await {
+ if param.is_value_valid(ccx.clone(), &arg.text).await {
return;
}
- let completion = match param_lock.param_completion(ccx.clone(), &arg.text).await.get(0) {
+ let completion = match param.param_completion(ccx.clone(), &arg.text).await.get(0) {
Some(x) => x.clone(),
None => {
arg.ok = false;
@@ -220,7 +224,7 @@ pub async fn correct_at_arg(
return;
}
};
- if !param_lock.is_value_valid(ccx.clone(), &completion).await {
+ if !param.is_value_valid(ccx.clone(), &completion).await {
arg.ok = false; arg.reason = Some("incorrect argument; completion did not help".to_string());
return;
}
@@ -231,21 +235,18 @@ pub async fn execute_at_commands_in_query(
ccx: Arc>,
query: &mut String,
) -> (Vec, Vec) {
- let at_commands = {
- ccx.lock().await.at_commands.clone()
- };
+ let at_commands = ccx.lock().await.at_commands.clone();
let at_command_names = at_commands.keys().map(|x|x.clone()).collect::>();
let mut context_enums = vec![];
let mut highlight_members = vec![];
- let mut clips = vec![];
+ let mut clips: Vec<(String, usize, usize)> = vec![];
let words = parse_words_from_line(query);
for (w_idx, (word, pos1, pos2)) in words.iter().enumerate() {
let cmd = match at_commands.get(word) {
- Some(c) => c.clone(),
+ Some(c) => c,
None => { continue; }
};
- let cmd_lock = cmd.lock().await;
let args = words.iter().skip(w_idx + 1).map(|x|x.clone()).collect::>();
let mut cmd_member = AtCommandMember::new("cmd".to_string(), word.clone(), *pos1, *pos2);
@@ -256,7 +257,7 @@ pub async fn execute_at_commands_in_query(
arg_members.push(AtCommandMember::new("arg".to_string(), text.clone(), pos1, pos2));
}
- match cmd_lock.at_execute(ccx.clone(), &mut cmd_member, &mut arg_members).await {
+ match cmd.at_execute(ccx.clone(), &mut cmd_member, &mut arg_members).await {
Ok((res, text_on_clip)) => {
context_enums.extend(res);
clips.push((text_on_clip, cmd_member.pos1, arg_members.last().map(|x|x.pos2).unwrap_or(cmd_member.pos2)));
diff --git a/refact-agent/engine/src/background_tasks.rs b/refact-agent/engine/src/background_tasks.rs
index d4611c0ff..4e871b3ec 100644
--- a/refact-agent/engine/src/background_tasks.rs
+++ b/refact-agent/engine/src/background_tasks.rs
@@ -47,6 +47,7 @@ pub async fn start_background_tasks(gcx: Arc>, config_dir
tokio::spawn(crate::vecdb::vdb_highlev::vecdb_background_reload(gcx.clone())), // this in turn can create global_context::vec_db
tokio::spawn(crate::integrations::sessions::remove_expired_sessions_background_task(gcx.clone())),
tokio::spawn(crate::memories::memories_migration(gcx.clone(), config_dir.clone())),
+ tokio::spawn(crate::git::cleanup::git_shadow_cleanup_background_task(gcx.clone())),
tokio::spawn(crate::cloud::threads_sub::watch_threads_subscription(gcx.clone())),
]);
let ast = gcx.clone().read().await.ast_service.clone();
diff --git a/refact-agent/engine/src/call_validation.rs b/refact-agent/engine/src/call_validation.rs
index 4af2cca32..03b625e89 100644
--- a/refact-agent/engine/src/call_validation.rs
+++ b/refact-agent/engine/src/call_validation.rs
@@ -53,6 +53,8 @@ pub struct SamplingParameters {
pub reasoning_effort: Option, // OpenAI style reasoning
#[serde(default)]
pub thinking: Option, // Anthropic style reasoning
+ #[serde(default)]
+ pub enable_thinking: Option, // Qwen style reasoning
}
#[derive(Debug, Deserialize, Clone)]
diff --git a/refact-agent/engine/src/caps/self_hosted.rs b/refact-agent/engine/src/caps/self_hosted.rs
index 9d8b630e2..7d3355a1b 100644
--- a/refact-agent/engine/src/caps/self_hosted.rs
+++ b/refact-agent/engine/src/caps/self_hosted.rs
@@ -6,7 +6,7 @@ use serde::Deserialize;
use crate::caps::{
BaseModelRecord, ChatModelRecord, CodeAssistantCaps, CompletionModelRecord, DefaultModels,
- EmbeddingModelRecord, default_chat_scratchpad, default_completion_scratchpad,
+ EmbeddingModelRecord, CapsMetadata, default_chat_scratchpad, default_completion_scratchpad,
default_completion_scratchpad_patch, default_embedding_batch, default_hf_tokenizer_template,
default_rejection_threshold, relative_to_full_url, normalize_string, resolve_relative_urls
};
@@ -59,6 +59,10 @@ pub struct SelfHostedCapsChat {
pub endpoint: String,
pub models: IndexMap,
pub default_model: String,
+ #[serde(default)]
+ pub default_light_model: String,
+ #[serde(default)]
+ pub default_thinking_model: String,
}
#[derive(Debug, Deserialize, Clone, Default)]
@@ -88,7 +92,12 @@ pub struct SelfHostedCaps {
#[serde(default)]
pub customization: String,
+ #[serde(default)]
pub caps_version: i64,
+ #[serde(default)]
+ pub support_metadata: bool,
+ #[serde(default)]
+ pub metadata: CapsMetadata,
}
fn configure_base_model(
@@ -99,6 +108,7 @@ fn configure_base_model(
tokenizer_endpoints: &HashMap,
caps_url: &String,
cmdline_api_key: &str,
+ support_metadata: &bool,
) -> Result<(), String> {
base_model.name = model_name.to_string();
base_model.id = format!("{}/{}", cloud_name, model_name);
@@ -108,6 +118,7 @@ fn configure_base_model(
if let Some(tokenizer) = tokenizer_endpoints.get(&base_model.name) {
base_model.tokenizer = relative_to_full_url(caps_url, &tokenizer)?;
}
+ base_model.support_metadata = support_metadata.clone();
base_model.api_key = cmdline_api_key.to_string();
base_model.endpoint_style = "openai".to_string();
Ok(())
@@ -155,6 +166,7 @@ impl SelfHostedCapsModelRecord {
&self_hosted_caps.tokenizer_endpoints,
caps_url,
cmdline_api_key,
+ &self_hosted_caps.support_metadata,
)?;
let (scratchpad, scratchpad_patch) = self.get_completion_scratchpad();
@@ -199,6 +211,7 @@ impl SelfHostedCapsModelRecord {
&self_hosted_caps.tokenizer_endpoints,
caps_url,
cmdline_api_key,
+ &self_hosted_caps.support_metadata,
)?;
Ok(ChatModelRecord {
@@ -239,6 +252,7 @@ impl SelfHostedCapsEmbeddingModelRecord {
&self_hosted_caps.tokenizer_endpoints,
caps_url,
cmdline_api_key,
+ &self_hosted_caps.support_metadata,
)?;
Ok(embedding_model)
@@ -261,15 +275,23 @@ impl SelfHostedCaps {
defaults: DefaultModels {
completion_default_model: format!("{}/{}", self.cloud_name, self.completion.default_model),
chat_default_model: format!("{}/{}", self.cloud_name, self.chat.default_model),
- chat_thinking_model: String::new(),
- chat_light_model: format!("{}/{}", self.cloud_name, self.chat.default_model),
+ chat_thinking_model: if self.chat.default_thinking_model.is_empty() {
+ String::new()
+ } else {
+ format!("{}/{}", self.cloud_name, self.chat.default_thinking_model)
+ },
+ chat_light_model: if self.chat.default_light_model.is_empty() {
+ String::new()
+ } else {
+ format!("{}/{}", self.cloud_name, self.chat.default_light_model)
+ },
},
customization: self.customization.clone(),
caps_version: self.caps_version,
hf_tokenizer_template: default_hf_tokenizer_template(),
- metadata: crate::caps::CapsMetadata::default(),
+ metadata: self.metadata.clone(),
};
for (model_name, model_rec) in &self.completion.models {
@@ -319,7 +341,7 @@ impl SelfHostedCaps {
api_key: cmdline_api_key.to_string(),
tokenizer_api_key: cmdline_api_key.to_string(),
code_completion_n_ctx: 0,
- support_metadata: false,
+ support_metadata: self.support_metadata,
completion_models: IndexMap::new(),
chat_models: IndexMap::new(),
embedding_model: EmbeddingModelRecord::default(),
@@ -327,8 +349,16 @@ impl SelfHostedCaps {
defaults: DefaultModels {
completion_default_model: self.completion.default_model.clone(),
chat_default_model: self.chat.default_model.clone(),
- chat_thinking_model: String::new(),
- chat_light_model: String::new(),
+ chat_thinking_model: if self.chat.default_thinking_model.is_empty() {
+ String::new()
+ } else {
+ format!("{}/{}", self.cloud_name, self.chat.default_thinking_model)
+ },
+ chat_light_model: if self.chat.default_light_model.is_empty() {
+ String::new()
+ } else {
+ format!("{}/{}", self.cloud_name, self.chat.default_light_model)
+ },
},
running_models: Vec::new(),
};
diff --git a/refact-agent/engine/src/cloud/messages_req.rs b/refact-agent/engine/src/cloud/messages_req.rs
index 7b83144a6..04f16ee30 100644
--- a/refact-agent/engine/src/cloud/messages_req.rs
+++ b/refact-agent/engine/src/cloud/messages_req.rs
@@ -289,10 +289,14 @@ pub fn convert_messages_to_thread_messages(
} else if msg.role == "diff" {
let extra_message = match serde_json::from_str::>(&msg.content.content_text_only()) {
Ok(chunks) => {
- chunks.iter()
- .filter(|x| !x.application_details.is_empty())
- .map(|x| x.application_details.clone())
- .join("\n")
+ if chunks.is_empty() {
+ "Nothing has changed.".to_string()
+ } else {
+ chunks.iter()
+ .filter(|x| !x.application_details.is_empty())
+ .map(|x| x.application_details.clone())
+ .join("\n")
+ }
},
Err(_) => "".to_string()
};
diff --git a/refact-agent/engine/src/custom_error.rs b/refact-agent/engine/src/custom_error.rs
index 191c5b763..6733329c1 100644
--- a/refact-agent/engine/src/custom_error.rs
+++ b/refact-agent/engine/src/custom_error.rs
@@ -98,3 +98,9 @@ impl MapErrToString for Result {
self.map_err(|e| format!("{pref} {e}"))
}
}
+
+/// Does tracing::error!(), and returns the default value
+pub fn trace_and_default(e: E) -> T {
+ tracing::error!("{e}");
+ Default::default()
+}
diff --git a/refact-agent/engine/src/file_filter.rs b/refact-agent/engine/src/file_filter.rs
index 017eb42c4..4b75075c8 100644
--- a/refact-agent/engine/src/file_filter.rs
+++ b/refact-agent/engine/src/file_filter.rs
@@ -10,10 +10,10 @@ pub const SOURCE_FILE_EXTENSIONS: &[&str] = &[
"c", "cpp", "cc", "h", "hpp", "cs", "java", "py", "rb", "go", "rs", "swift",
"php", "js", "jsx", "ts", "tsx", "lua", "pl", "r", "sh", "bat", "cmd", "ps1",
"m", "kt", "kts", "groovy", "dart", "fs", "fsx", "fsi", "html", "htm", "css",
- "scss", "sass", "less", "json", "xml", "yml", "yaml", "md", "sql", "db", "sqlite",
- "mdf", "cfg", "conf", "ini", "toml", "dockerfile", "ipynb", "rmd", "xml", "kt",
- "xaml", "unity", "gd", "uproject", "uasset", "asm", "s", "tex", "makefile", "mk",
- "cmake", "gradle", "liquid"
+ "scss", "sass", "less", "json", "xml", "yml", "yaml", "md", "sql", "cfg",
+ "conf", "ini", "toml", "dockerfile", "ipynb", "rmd", "xml", "kt", "xaml",
+ "unity", "gd", "uproject", "asm", "s", "tex", "makefile", "mk", "cmake",
+ "gradle", "liquid"
];
pub fn is_valid_file(path: &PathBuf, allow_hidden_folders: bool, ignore_size_thresholds: bool) -> Result<(), Box> {
diff --git a/refact-agent/engine/src/forward_to_openai_endpoint.rs b/refact-agent/engine/src/forward_to_openai_endpoint.rs
index 0876c4f29..f6cec3d70 100644
--- a/refact-agent/engine/src/forward_to_openai_endpoint.rs
+++ b/refact-agent/engine/src/forward_to_openai_endpoint.rs
@@ -44,6 +44,9 @@ pub async fn forward_to_openai_style_endpoint(
data["reasoning_effort"] = serde_json::Value::String(reasoning_effort.to_string());
} else if let Some(thinking) = sampling_parameters.thinking.clone() {
data["thinking"] = thinking.clone();
+ } else if let Some(enable_thinking) = sampling_parameters.enable_thinking {
+ data["enable_thinking"] = serde_json::Value::Bool(enable_thinking);
+ data["temperature"] = serde_json::Value::from(sampling_parameters.temperature);
} else if let Some(temperature) = sampling_parameters.temperature {
data["temperature"] = serde_json::Value::from(temperature);
}
@@ -130,7 +133,10 @@ pub async fn forward_to_openai_style_endpoint_streaming(
data["reasoning_effort"] = serde_json::Value::String(reasoning_effort.to_string());
} else if let Some(thinking) = sampling_parameters.thinking.clone() {
data["thinking"] = thinking.clone();
- } else if let Some(temperature) = sampling_parameters.temperature {
+ } else if let Some(enable_thinking) = sampling_parameters.enable_thinking {
+ data["enable_thinking"] = serde_json::Value::Bool(enable_thinking);
+ data["temperature"] = serde_json::Value::from(sampling_parameters.temperature);
+ }else if let Some(temperature) = sampling_parameters.temperature {
data["temperature"] = serde_json::Value::from(temperature);
}
data["max_completion_tokens"] = serde_json::Value::from(sampling_parameters.max_new_tokens);
diff --git a/refact-agent/engine/src/git/checkpoints.rs b/refact-agent/engine/src/git/checkpoints.rs
index 5e9cf197a..19d0b1d12 100644
--- a/refact-agent/engine/src/git/checkpoints.rs
+++ b/refact-agent/engine/src/git/checkpoints.rs
@@ -1,4 +1,5 @@
use std::sync::Arc;
+use std::time::SystemTime;
use chrono::{DateTime, Utc};
use git2::{IndexAddOption, Oid, Repository};
use tokio::sync::RwLock as ARwLock;
@@ -15,6 +16,7 @@ use crate::files_correction::{deserialize_path, get_active_workspace_folder, get
use crate::global_context::GlobalContext;
use crate::git::{FileChange, FileChangeStatus, from_unix_glob_pattern_to_gitignore};
use crate::git::operations::{checkout_head_and_branch_to_commit, commit, get_commit_datetime, get_diff_statuses, get_diff_statuses_index_to_commit, get_or_create_branch, stage_changes, open_or_init_repo};
+use crate::git::cleanup::RECENT_COMMITS_DURATION;
#[derive(Default, Serialize, Deserialize, Clone, Debug)]
pub struct Checkpoint {
@@ -48,6 +50,8 @@ async fn open_shadow_repo_and_nested_repos(
} else {
Repository::open(&git_dir_path).map_err_to_string()
}?;
+ let filetime_now = filetime::FileTime::now();
+ filetime::set_file_times(&git_dir_path, filetime_now, filetime_now).map_err_to_string()?;
repo.set_workdir(path, false).map_err_to_string()?;
for blocklisted_rule in indexing_for_path.blocklist {
if let Err(e) = repo.add_ignore_rule(&from_unix_glob_pattern_to_gitignore(&blocklisted_rule)) {
@@ -63,7 +67,7 @@ async fn open_shadow_repo_and_nested_repos(
}
Ok(result)
}
-
+
let (cache_dir, vcs_roots) = {
let gcx_locked = gcx.read().await;
(gcx_locked.cache_dir.clone(), gcx_locked.documents_state.workspace_vcs_roots.clone())
@@ -118,9 +122,9 @@ pub async fn create_workspace_checkpoint(
let abort_flag: Arc = gcx.read().await.git_operations_abort_flag.clone();
let workspace_folder = get_active_workspace_folder(gcx.clone()).await
.ok_or_else(|| "No active workspace folder".to_string())?;
- let (repo, nested_repos, workspace_folder_hash) =
+ let (repo, nested_repos, workspace_folder_hash) =
open_shadow_repo_and_nested_repos(gcx.clone(), &workspace_folder, false).await?;
-
+
if let Some(prev_checkpoint) = prev_checkpoint {
if prev_checkpoint.workspace_hash() != workspace_folder_hash {
return Err("Can not create checkpoint for different workspace folder".to_string());
@@ -137,13 +141,13 @@ pub async fn create_workspace_checkpoint(
let (_, mut file_changes) = get_diff_statuses(git2::StatusShow::Workdir, &repo, false)?;
- let (nested_file_changes, flatened_nested_file_changes) =
+ let (nested_file_changes, flatened_nested_file_changes) =
get_file_changes_from_nested_repos(&repo, &nested_repos, false)?;
file_changes.extend(flatened_nested_file_changes);
stage_changes(&repo, &file_changes, &abort_flag)?;
let commit_oid = commit(&repo, &branch, &format!("Auto commit for chat {chat_id}"), "Refact Agent", "agent@refact.ai")?;
-
+
for (nested_repo, changes) in nested_file_changes {
stage_changes(&nested_repo, &changes, &abort_flag)?;
}
@@ -164,7 +168,19 @@ pub async fn preview_changes_for_workspace_checkpoint(
let commit_to_restore_oid = Oid::from_str(&checkpoint_to_restore.commit_hash).map_err_to_string()?;
let reverted_to = get_commit_datetime(&repo, &commit_to_restore_oid)?;
- let mut files_changed = get_diff_statuses_index_to_commit(&repo, &commit_to_restore_oid, true)?;
+ let mut files_changed = match get_diff_statuses_index_to_commit(&repo, &commit_to_restore_oid, true) {
+ Ok(files_changed) => files_changed,
+ Err(e) => {
+ let recent_cutoff_timestamp = SystemTime::now().checked_sub(RECENT_COMMITS_DURATION).unwrap()
+ .duration_since(std::time::UNIX_EPOCH).unwrap().as_secs();
+
+ if reverted_to.timestamp() < recent_cutoff_timestamp as i64 {
+ return Err("This checkpoint has expired and was removed".to_string());
+ } else {
+ return Err(e);
+ }
+ }
+ };
// Invert status since we got changes in reverse order so that if it fails it does not update the workspace
for change in &mut files_changed {
@@ -183,7 +199,7 @@ pub async fn restore_workspace_checkpoint(
) -> Result<(), String> {
let workspace_folder = get_active_workspace_folder(gcx.clone()).await
.ok_or_else(|| "No active workspace folder".to_string())?;
- let (repo, nested_repos, workspace_folder_hash) =
+ let (repo, nested_repos, workspace_folder_hash) =
open_shadow_repo_and_nested_repos(gcx.clone(), &workspace_folder, false).await?;
if checkpoint_to_restore.workspace_hash() != workspace_folder_hash {
return Err("Can not restore checkpoint for different workspace folder".to_string());
@@ -192,7 +208,7 @@ pub async fn restore_workspace_checkpoint(
let commit_to_restore_oid = Oid::from_str(&checkpoint_to_restore.commit_hash).map_err_to_string()?;
checkout_head_and_branch_to_commit(&repo, &format!("refact-{chat_id}"), &commit_to_restore_oid)?;
-
+
for nested_repo in &nested_repos {
let reset_index_result = nested_repo.index()
.and_then(|mut index| {
@@ -242,18 +258,18 @@ pub async fn init_shadow_repos_if_needed(gcx: Arc>) -> ()
let initial_commit_result: Result = (|| {
let (_, mut file_changes) = get_diff_statuses(git2::StatusShow::Workdir, &repo, false)?;
- let (nested_file_changes, all_nested_changes) =
+ let (nested_file_changes, all_nested_changes) =
get_file_changes_from_nested_repos(&repo, &nested_repos, false)?;
file_changes.extend(all_nested_changes);
stage_changes(&repo, &file_changes, &abort_flag)?;
-
+
let mut index = repo.index().map_err_to_string()?;
let tree_id = index.write_tree().map_err_to_string()?;
let tree = repo.find_tree(tree_id).map_err_to_string()?;
let signature = git2::Signature::now("Refact Agent", "agent@refact.ai").map_err_to_string()?;
let commit = repo.commit(Some("HEAD"), &signature, &signature, "Initial commit", &tree, &[]).map_err_to_string()?;
-
+
for (nested_repo, changes) in nested_file_changes {
stage_changes(&nested_repo, &changes, &abort_flag)?;
}
diff --git a/refact-agent/engine/src/git/cleanup.rs b/refact-agent/engine/src/git/cleanup.rs
new file mode 100644
index 000000000..5978da6d1
--- /dev/null
+++ b/refact-agent/engine/src/git/cleanup.rs
@@ -0,0 +1,314 @@
+use std::path::Path;
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::Arc;
+use std::time::SystemTime;
+use std::collections::HashSet;
+use tokio::time::Duration;
+use tokio::sync::RwLock as ARwLock;
+use git2::{Repository, Oid, ObjectType};
+
+use crate::ast::chunk_utils::official_text_hashing_function;
+use crate::custom_error::{trace_and_default, MapErrToString};
+use crate::files_correction::get_project_dirs;
+use crate::global_context::GlobalContext;
+
+const MAX_INACTIVE_REPO_DURATION: Duration = Duration::from_secs(7 * 24 * 60); // 1 week
+pub const RECENT_COMMITS_DURATION: Duration = Duration::from_secs(7 * 24 * 60); // 1 week
+const CLEANUP_INTERVAL_DURATION: Duration = Duration::from_secs(24 * 60 * 60); // 1 day
+
+pub async fn git_shadow_cleanup_background_task(gcx: Arc>) {
+ loop {
+ // wait 2 mins before cleanup; lower priority than other startup tasks
+ tokio::time::sleep(tokio::time::Duration::from_secs(2 * 60)).await;
+
+ let (cache_dir, abort_flag) = {
+ let gcx_locked = gcx.read().await;
+ (gcx_locked.cache_dir.clone(), gcx_locked.git_operations_abort_flag.clone())
+ };
+ let workspace_folders = get_project_dirs(gcx.clone()).await;
+ let workspace_folder_hashes: Vec<_> = workspace_folders.into_iter()
+ .map(|f| official_text_hashing_function(&f.to_string_lossy())).collect();
+
+ let dirs_to_check: Vec<_> = [
+ cache_dir.join("shadow_git"),
+ cache_dir.join("shadow_git").join("nested")
+ ].into_iter().filter(|dir| dir.exists()).collect();
+
+ for dir in dirs_to_check {
+ match cleanup_inactive_shadow_repositories(&dir, &workspace_folder_hashes).await {
+ Ok(cleanup_count) => {
+ if cleanup_count > 0 {
+ tracing::info!("Git shadow cleanup: removed {} old repositories", cleanup_count);
+ }
+ }
+ Err(e) => {
+ tracing::error!("Git shadow cleanup failed: {}", e);
+ }
+ }
+ }
+
+ match cleanup_old_objects_from_repos(&cache_dir.join("shadow_git"), &workspace_folder_hashes, abort_flag).await {
+ Ok(objects_cleaned) => {
+ if objects_cleaned > 0 {
+ tracing::info!("Git object cleanup: removed {} old objects from active repositories", objects_cleaned);
+ }
+ }
+ Err(e) => {
+ tracing::error!("Git object cleanup failed: {}", e);
+ }
+ }
+
+ tokio::time::sleep(CLEANUP_INTERVAL_DURATION).await;
+ }
+}
+
+async fn cleanup_inactive_shadow_repositories(dir: &Path, workspace_folder_hashes: &[String]) -> Result {
+ let mut inactive_repos = Vec::new();
+
+ let mut entries = tokio::fs::read_dir(dir).await
+ .map_err(|e| format!("Failed to read shadow_git directory: {}", e))?;
+
+ while let Some(entry) = entries.next_entry().await
+ .map_err(|e| format!("Failed to read directory entry: {}", e))? {
+
+ let path = entry.path();
+ let dir_name = path.file_name().unwrap_or_default().to_string_lossy().to_string();
+ if !path.is_dir() || !path.join(".git").exists() || workspace_folder_hashes.contains(&dir_name) {
+ continue;
+ }
+
+ if repo_is_inactive(&path).await.unwrap_or_else(trace_and_default) {
+ inactive_repos.push(path);
+ }
+ }
+
+ let mut repos_to_remove = Vec::new();
+ for repo_path in inactive_repos {
+ let dir_name = repo_path.file_name().unwrap_or_default().to_string_lossy();
+ if !dir_name.ends_with("_to_remove") {
+ let mut new_path = repo_path.clone();
+ new_path.set_file_name(format!("{dir_name}_to_remove"));
+ match tokio::fs::rename(&repo_path, &new_path).await {
+ Ok(()) => repos_to_remove.push(new_path),
+ Err(e) => {
+ tracing::warn!("Failed to rename repo {}: {}", repo_path.display(), e);
+ continue;
+ }
+ }
+ } else {
+ repos_to_remove.push(repo_path);
+ }
+ }
+
+ let mut cleanup_count = 0;
+ for repo in repos_to_remove {
+ match tokio::fs::remove_dir_all(&repo).await {
+ Ok(()) => {
+ tracing::info!("Removed old shadow git repository: {}", repo.display());
+ cleanup_count += 1;
+ }
+ Err(e) => tracing::warn!("Failed to remove shadow git repository {}: {}", repo.display(), e),
+ }
+ }
+
+ Ok(cleanup_count)
+}
+
+async fn repo_is_inactive(
+ repo_dir: &Path,
+) -> Result {
+ let metadata = tokio::fs::metadata(repo_dir).await
+ .map_err_with_prefix(format!("Failed to get metadata for {}:", repo_dir.display()))?;
+
+ let mtime = metadata.modified()
+ .map_err_with_prefix(format!("Failed to get modified time for {}:", repo_dir.display()))?;
+
+ let duration_since_mtime = SystemTime::now().duration_since(mtime)
+ .map_err_with_prefix(format!("Failed to calculate age for {}:", repo_dir.display()))?;
+
+ Ok(duration_since_mtime > MAX_INACTIVE_REPO_DURATION)
+}
+
+async fn cleanup_old_objects_from_repos(dir: &Path, workspace_folder_hashes: &[String], abort_flag: Arc) -> Result {
+ let mut total_objects_removed = 0;
+
+ let mut entries = tokio::fs::read_dir(dir).await
+ .map_err(|e| format!("Failed to read shadow_git directory: {}", e))?;
+
+ while let Some(entry) = entries.next_entry().await
+ .map_err(|e| format!("Failed to read directory entry: {}", e))? {
+
+ let path = entry.path();
+ if !path.is_dir() || !path.join(".git").exists() || repo_is_inactive(&path).await.unwrap_or_else(trace_and_default) {
+ continue;
+ }
+
+ let dir_name = path.file_name().unwrap_or_default().to_string_lossy().to_string();
+ if !workspace_folder_hashes.contains(&dir_name) {
+ continue;
+ }
+
+ match cleanup_old_objects_from_single_repo(&path, abort_flag.clone()).await {
+ Ok(removed_count) => {
+ if removed_count > 0 {
+ tracing::info!("Cleaned {} old objects from repository: {}", removed_count, path.display());
+ total_objects_removed += removed_count;
+ }
+ }
+ Err(e) => {
+ tracing::warn!("Failed to cleanup objects from repository {}: {}", path.display(), e);
+ }
+ }
+ }
+
+ Ok(total_objects_removed)
+}
+
+pub async fn cleanup_old_objects_from_single_repo(repo_path: &Path, abort_flag: Arc) -> Result {
+ let repo = Repository::open(repo_path)
+ .map_err(|e| format!("Failed to open repository {}: {}", repo_path.display(), e))?;
+
+ let now = SystemTime::now();
+ let cutoff_time = now.checked_sub(RECENT_COMMITS_DURATION)
+ .ok_or("Failed to calculate cutoff time")?;
+
+ let (recent_objects, old_objects) = collect_objects_from_commits(&repo, cutoff_time, abort_flag)?;
+
+ let objects_to_remove: HashSet<_> = old_objects.difference(&recent_objects).collect();
+
+ if objects_to_remove.is_empty() {
+ return Ok(0);
+ }
+
+ remove_unreferenced_objects(repo_path, &objects_to_remove).await
+}
+
+fn collect_objects_from_commits(repo: &Repository, cutoff_time: SystemTime, abort_flag: Arc) -> Result<(HashSet, HashSet), String> {
+ let mut recent_objects = HashSet::new();
+ let mut old_objects = HashSet::new();
+
+ let head_oid = repo.head().ok()
+ .and_then(|head| head.target())
+ .and_then(|target| repo.find_commit(target).ok())
+ .map(|commit| commit.id());
+
+ let mut revwalk = repo.revwalk()
+ .map_err(|e| format!("Failed to create revwalk: {}", e))?;
+
+ let mut any_branch_pushed = false;
+ if let Ok(refs) = repo.references() {
+ for reference in refs {
+ if let Ok(reference) = reference {
+ if reference.is_branch() {
+ if let Some(target) = reference.target() {
+ if let Ok(_) = revwalk.push(target) {
+ any_branch_pushed = true;
+ }
+ }
+ }
+ }
+ }
+ }
+ if !any_branch_pushed {
+ if let Err(e) = revwalk.push_head() {
+ tracing::warn!("Failed to push HEAD and branches to revwalk: {}", e);
+ }
+ }
+
+ revwalk.set_sorting(git2::Sort::TIME)
+ .map_err(|e| format!("Failed to set revwalk sorting: {}", e))?;
+
+ for oid_result in revwalk {
+ if abort_flag.load(Ordering::SeqCst) {
+ return Err("collect_objects_from_commits aborted".to_string());
+ }
+
+ let oid = match oid_result {
+ Ok(oid) => oid,
+ Err(e) => { tracing::warn!("{e}"); continue; }
+ };
+
+ let commit = match repo.find_commit(oid) {
+ Ok(commit) => commit,
+ Err(e) => { tracing::warn!("{e}"); continue; }
+ };
+
+ let commit_time = SystemTime::UNIX_EPOCH + std::time::Duration::from_secs(commit.time().seconds() as u64);
+ let is_recent = commit_time >= cutoff_time || Some(oid) == head_oid;
+
+ let tree_oid = commit.tree_id();
+ let objects_set = if is_recent { &mut recent_objects } else { &mut old_objects };
+
+ objects_set.insert(oid.to_string());
+
+ walk_tree_objects(repo, &tree_oid, objects_set, abort_flag.clone());
+ }
+
+ Ok((recent_objects, old_objects))
+}
+
+
+pub fn walk_tree_objects(repo: &Repository, tree_oid: &Oid, objects: &mut HashSet, abort_flag: Arc) {
+ let tree = match repo.find_tree(*tree_oid) {
+ Ok(t) => t,
+ Err(_) => return,
+ };
+
+ for entry in tree.iter() {
+ if abort_flag.load(Ordering::SeqCst) {
+ return;
+ }
+
+ let entry_oid = entry.id();
+ let entry_oid_str = entry_oid.to_string();
+
+ if objects.contains(&entry_oid_str) {
+ continue;
+ }
+
+ objects.insert(entry_oid_str);
+
+ // If this entry is a tree (subdirectory), recursively walk it
+ if entry.kind() == Some(ObjectType::Tree) {
+ walk_tree_objects(repo, &entry_oid, objects, abort_flag.clone());
+ }
+ }
+}
+
+async fn remove_unreferenced_objects(repo_path: &Path, objects_to_remove: &HashSet<&String>) -> Result {
+ let objects_dir = repo_path.join(".git").join("objects");
+ let repo = Repository::open(repo_path)
+ .map_err(|e| format!("Failed to open repository {}: {}", repo_path.display(), e))?;
+ let mut removed_count = 0;
+
+ for object_id in objects_to_remove {
+ if object_id.len() < 2 {
+ continue; // Invalid object ID
+ }
+
+ let oid = match Oid::from_str(object_id) {
+ Ok(oid) => oid,
+ Err(_) => continue,
+ };
+
+ let obj_type = match repo.find_object(oid, None) {
+ Ok(obj) => obj.kind(),
+ Err(_) => None,
+ };
+ if obj_type != Some(ObjectType::Blob) && obj_type != Some(ObjectType::Tree) {
+ continue;
+ }
+
+ let (dir_name, file_name) = object_id.split_at(2);
+ let object_path = objects_dir.join(dir_name).join(file_name);
+
+ if object_path.exists() {
+ match tokio::fs::remove_file(&object_path).await {
+ Ok(()) => removed_count += 1,
+ Err(e) => tracing::warn!("Failed to remove blob object file {}: {}", object_path.display(), e),
+ }
+ }
+ }
+
+ Ok(removed_count)
+}
diff --git a/refact-agent/engine/src/git/cleanup_tests.rs b/refact-agent/engine/src/git/cleanup_tests.rs
new file mode 100644
index 000000000..d47516477
--- /dev/null
+++ b/refact-agent/engine/src/git/cleanup_tests.rs
@@ -0,0 +1,279 @@
+use super::*;
+use std::{collections::HashSet, fs, sync::{atomic::AtomicBool, Arc}, time::SystemTime};
+use tempfile::TempDir;
+use git2::{Repository, Signature, Time};
+use std::collections::HashMap;
+
+async fn create_test_repository() -> (TempDir, HashMap, HashMap) {
+ let temp_dir = TempDir::new().expect("Failed to create temp dir");
+ let repo_path = temp_dir.path();
+
+ let repo = Repository::init(repo_path).expect("Failed to init repo");
+ let mut config = repo.config().expect("Failed to get config");
+ config.set_str("user.name", "Test User").expect("Failed to set user name");
+ config.set_str("user.email", "test@example.com").expect("Failed to set user email");
+
+ let mut commit_hashes = HashMap::new();
+ let mut blob_ids = HashMap::new();
+
+ // Create signature for old commits (20 days ago)
+ let old_time = Time::new(
+ (SystemTime::now().duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs() as i64) - (20 * 24 * 3600),
+ 0
+ );
+ let old_signature = Signature::new("Test User", "test@example.com", &old_time)
+ .expect("Failed to create old signature");
+
+ // Create first old commit with TWO files: one that will be kept, one that will be removed
+ let old_kept_file_path = repo_path.join("old_kept_file.txt");
+ fs::write(&old_kept_file_path, "This file should be kept").expect("Failed to write old kept file");
+
+ let old_removed_file_path = repo_path.join("old_removed_file.txt");
+ fs::write(&old_removed_file_path, "This file should be removed").expect("Failed to write old removed file");
+
+ let mut index = repo.index().expect("Failed to get index");
+ index.add_path(std::path::Path::new("old_kept_file.txt")).expect("Failed to add old kept file");
+ index.add_path(std::path::Path::new("old_removed_file.txt")).expect("Failed to add old removed file");
+ index.write().expect("Failed to write index");
+
+ let tree_id = index.write_tree().expect("Failed to write tree");
+ let tree = repo.find_tree(tree_id).expect("Failed to find tree");
+
+ let old_commit_oid = repo.commit(
+ Some("HEAD"),
+ &old_signature,
+ &old_signature,
+ "Old commit - both files",
+ &tree,
+ &[]
+ ).expect("Failed to create old commit");
+ commit_hashes.insert("old_commit".to_string(), old_commit_oid.to_string());
+
+ // Record the blob IDs for both files from the first commit
+ let first_commit = repo.find_commit(old_commit_oid).expect("Failed to find first commit");
+ let first_tree = first_commit.tree().expect("Failed to get first tree");
+
+ let kept_entry = first_tree.get_name("old_kept_file.txt").expect("Failed to find old_kept_file.txt in first commit");
+ let removed_entry = first_tree.get_name("old_removed_file.txt").expect("Failed to find old_removed_file.txt in first commit");
+
+ blob_ids.insert("old_kept_blob".to_string(), kept_entry.id().to_string());
+ blob_ids.insert("old_removed_blob".to_string(), removed_entry.id().to_string());
+
+ // Create second old commit: delete old_removed_file.txt and add shared_file.txt
+ let shared_file_path = repo_path.join("shared_file.txt");
+ fs::write(&shared_file_path, "Shared content - version 1").expect("Failed to write shared file");
+
+ let mut index = repo.index().expect("Failed to get index for second commit");
+ // Remove the old_removed_file.txt from the index (this makes its blob unreferenced)
+ index.remove_path(std::path::Path::new("old_removed_file.txt")).expect("Failed to remove old_removed_file.txt");
+ // Add the shared file
+ index.add_path(std::path::Path::new("shared_file.txt")).expect("Failed to add shared file");
+ index.write().expect("Failed to write index for second commit");
+
+ let tree_id = index.write_tree().expect("Failed to write tree for second commit");
+ let tree = repo.find_tree(tree_id).expect("Failed to find tree for second commit");
+ let parent_commit = repo.find_commit(old_commit_oid).expect("Failed to find parent commit");
+
+ let old_commit2_oid = repo.commit(
+ Some("HEAD"),
+ &old_signature,
+ &old_signature,
+ "Another old commit - removed old_removed_file.txt",
+ &tree,
+ &[&parent_commit]
+ ).expect("Failed to create second old commit");
+ commit_hashes.insert("old_commit2".to_string(), old_commit2_oid.to_string());
+
+ // Create recent commit (2 days ago)
+ let recent_time = Time::new(
+ (SystemTime::now().duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs() as i64) - (2 * 24 * 3600),
+ 0
+ );
+ let recent_signature = Signature::new("Test User", "test@example.com", &recent_time)
+ .expect("Failed to create recent signature");
+
+ // Modify shared file and add new file
+ fs::write(&shared_file_path, "Shared content - version 2 (recent)").expect("Failed to update shared file");
+
+ let recent_file_path = repo_path.join("recent_file.txt");
+ fs::write(&recent_file_path, "This file should NOT be cleaned up").expect("Failed to write recent file");
+
+ index.add_path(std::path::Path::new("shared_file.txt")).expect("Failed to add updated shared file");
+ index.add_path(std::path::Path::new("recent_file.txt")).expect("Failed to add recent file");
+ index.write().expect("Failed to write index");
+
+ let tree_id = index.write_tree().expect("Failed to write tree");
+ let tree = repo.find_tree(tree_id).expect("Failed to find tree");
+ let parent_commit = repo.find_commit(old_commit2_oid).expect("Failed to find parent commit");
+
+ let recent_commit_oid = repo.commit(
+ Some("HEAD"),
+ &recent_signature,
+ &recent_signature,
+ "Recent commit - should be preserved",
+ &tree,
+ &[&parent_commit]
+ ).expect("Failed to create recent commit");
+ commit_hashes.insert("recent_commit".to_string(), recent_commit_oid.to_string());
+
+ (temp_dir, commit_hashes, blob_ids)
+}
+
+fn get_all_objects(repo_path: &std::path::Path) -> Result, String> {
+ let objects_dir = repo_path.join(".git").join("objects");
+ let mut objects = HashSet::new();
+
+ for entry in fs::read_dir(&objects_dir).map_err(|e| format!("Failed to read objects dir: {}", e))? {
+ let entry = entry.map_err(|e| format!("Failed to read dir entry: {}", e))?;
+ let path = entry.path();
+
+ if path.is_dir() && path.file_name().unwrap().to_str().unwrap().len() == 2 {
+ let prefix = path.file_name().unwrap().to_str().unwrap();
+
+ for obj_entry in fs::read_dir(&path).map_err(|e| format!("Failed to read object subdir: {}", e))? {
+ let obj_entry = obj_entry.map_err(|e| format!("Failed to read object entry: {}", e))?;
+ let obj_path = obj_entry.path();
+
+ if obj_path.is_file() {
+ let suffix = obj_path.file_name().unwrap().to_str().unwrap();
+ let full_hash = format!("{}{}", prefix, suffix);
+ if full_hash.len() == 40 {
+ objects.insert(full_hash);
+ }
+ }
+ }
+ }
+ }
+
+ Ok(objects)
+}
+
+fn get_objects_for_commit(repo: &Repository, commit_oid: &str) -> Result