Skip to content

Commit 661875a

Browse files
fix: increment max size for directory (#2228)
* fix: increment max size for directory * Adds user configuration to be respected. * Updates default max_files limit from 5000 to 10000 * Refactors async client to use loaded configuration * Ensures configuration respects user tweaks 🤖 Assisted by Amazon Q Developer * fix: enable serde feature for chrono dependency Enables serde serialization/deserialization for DateTime types in the semantic search client. 🤖 Assisted by Amazon Q Developer --------- Co-authored-by: Kenneth S. <[email protected]>
1 parent 6d29006 commit 661875a

File tree

3 files changed

+14
-15
lines changed

3 files changed

+14
-15
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ bstr = "1.12.0"
3535
bytes = "1.10.1"
3636
camino = { version = "1.1.3", features = ["serde1"] }
3737
cfg-if = "1.0.0"
38-
chrono = "0.4.41"
38+
chrono = { version = "0.4.41", features = ["serde"] }
3939
clap = { version = "4.5.32", features = ["deprecated", "derive", "string", "unicode", "wrap_help"] }
4040
clap_complete = "4.5.46"
4141
clap_complete_fig = "4.4.0"

crates/semantic-search-client/src/client/async_implementation.rs

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ use crate::client::{
2020
embedder_factory,
2121
utils,
2222
};
23+
use crate::config;
2324
use crate::config::SemanticSearchConfig;
2425
use crate::embedding::{
2526
EmbeddingType,
@@ -84,7 +85,7 @@ const MAX_CONCURRENT_OPERATIONS: usize = 3;
8485
impl AsyncSemanticSearchClient {
8586
/// Create a new async semantic search client
8687
pub async fn new(base_dir: impl AsRef<Path>) -> Result<Self> {
87-
Self::with_config_and_embedding_type(base_dir, SemanticSearchConfig::default(), EmbeddingType::default()).await
88+
Self::with_embedding_type(base_dir, EmbeddingType::default()).await
8889
}
8990

9091
/// Create a new semantic search client with the default base directory
@@ -103,23 +104,19 @@ impl AsyncSemanticSearchClient {
103104
///
104105
/// The default base directory path
105106
pub fn get_default_base_dir() -> PathBuf {
106-
crate::config::get_default_base_dir()
107+
config::get_default_base_dir()
107108
}
108109

109110
/// Create a new async semantic search client with custom configuration and embedding type
110-
pub async fn with_config_and_embedding_type(
111-
base_dir: impl AsRef<Path>,
112-
config: SemanticSearchConfig,
113-
embedding_type: EmbeddingType,
114-
) -> Result<Self> {
111+
pub async fn with_embedding_type(base_dir: impl AsRef<Path>, embedding_type: EmbeddingType) -> Result<Self> {
115112
let base_dir = base_dir.as_ref().to_path_buf();
116113
tokio::fs::create_dir_all(&base_dir).await?;
117114

118115
// Create models directory
119-
crate::config::ensure_models_dir(&base_dir)?;
116+
config::ensure_models_dir(&base_dir)?;
120117

121118
// Initialize the configuration
122-
if let Err(e) = crate::config::init_config(&base_dir) {
119+
if let Err(e) = config::init_config(&base_dir) {
123120
tracing::error!("Failed to initialize semantic search configuration: {}", e);
124121
}
125122

@@ -136,13 +133,15 @@ impl AsyncSemanticSearchClient {
136133

137134
// Start background worker - we'll need to create a new embedder for the worker
138135
let worker_embedder = embedder_factory::create_embedder(embedding_type)?;
136+
// Makes sure it respects configuration even if tweaked by user.
137+
let loaded_config = config::get_config().clone();
139138
let worker = BackgroundWorker {
140139
job_rx,
141140
contexts: contexts.clone(),
142141
volatile_contexts: volatile_contexts.clone(),
143142
active_operations: active_operations.clone(),
144143
embedder: worker_embedder,
145-
config: config.clone(),
144+
config: loaded_config.clone(),
146145
base_dir: base_dir.clone(),
147146
indexing_semaphore: Arc::new(Semaphore::new(MAX_CONCURRENT_OPERATIONS)),
148147
};
@@ -154,7 +153,7 @@ impl AsyncSemanticSearchClient {
154153
contexts,
155154
volatile_contexts,
156155
embedder,
157-
config,
156+
config: loaded_config,
158157
job_tx,
159158
active_operations,
160159
};

crates/semantic-search-client/src/config.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ impl Default for SemanticSearchConfig {
8282
model_name: "all-MiniLM-L6-v2".to_string(),
8383
timeout: 30000, // 30 seconds
8484
base_dir: get_default_base_dir(),
85-
max_files: 5000, // Default limit of 5000 files
85+
max_files: 10000, // Default limit of 10000 files
8686
}
8787
}
8888
}
@@ -154,7 +154,7 @@ pub fn get_model_file_path(base_dir: &Path, model_name: &str, file_name: &str) -
154154
/// Result indicating success or failure
155155
pub fn ensure_models_dir(base_dir: &Path) -> std::io::Result<()> {
156156
let models_dir = get_models_dir(base_dir);
157-
std::fs::create_dir_all(models_dir)
157+
fs::create_dir_all(models_dir)
158158
}
159159

160160
/// Initializes the global configuration.
@@ -282,7 +282,7 @@ mod tests {
282282
assert_eq!(config.chunk_overlap, 128);
283283
assert_eq!(config.default_results, 5);
284284
assert_eq!(config.model_name, "all-MiniLM-L6-v2");
285-
assert_eq!(config.max_files, 5000);
285+
assert_eq!(config.max_files, 10000);
286286
}
287287

288288
#[test]

0 commit comments

Comments
 (0)