Skip to content

Commit 7d4c7d7

Browse files
authored
feat: Make Warehouse Cache case-insensitive (lakekeeper#1473)
1 parent 2fe38bf commit 7d4c7d7

4 files changed

Lines changed: 49 additions & 9 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ rust-version = "1.87.0"
1717
license = "Apache-2.0"
1818

1919
[workspace.dependencies]
20+
unicase = "2.8.1"
2021
anyhow = "^1.0"
2122
assert-json-diff = "2.0.2"
2223
async-channel = { version = "2.3.1" }

crates/lakekeeper/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ strum_macros = { workspace = true }
9797
thiserror = { workspace = true }
9898
time = { workspace = true }
9999
tokio = { workspace = true }
100+
unicase = { workspace = true }
100101
tokio-util = { workspace = true }
101102
tower = { workspace = true, optional = true }
102103
tower-http = { workspace = true, optional = true, features = [

crates/lakekeeper/src/service/catalog_store/warehouse_cache.rs

Lines changed: 46 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use std::{
55

66
use axum_prometheus::metrics;
77
use moka::{future::Cache, notification::RemovalCause};
8+
use unicase::UniCase;
89

910
#[cfg(feature = "router")]
1011
use crate::{
@@ -68,7 +69,7 @@ pub(crate) static WAREHOUSE_CACHE: LazyLock<Cache<WarehouseId, CachedWarehouse>>
6869
NAME_TO_ID_CACHE
6970
.invalidate(&(
7071
value.warehouse.project_id.clone(),
71-
value.warehouse.name.clone(),
72+
UniCase::new(value.warehouse.name.clone()),
7273
))
7374
.await;
7475
}
@@ -78,12 +79,14 @@ pub(crate) static WAREHOUSE_CACHE: LazyLock<Cache<WarehouseId, CachedWarehouse>>
7879
});
7980

8081
// Secondary index: (project_id, name) → warehouse_id
81-
static NAME_TO_ID_CACHE: LazyLock<Cache<(ProjectId, String), WarehouseId>> = LazyLock::new(|| {
82-
Cache::builder()
83-
.max_capacity(CONFIG.cache.warehouse.capacity)
84-
.initial_capacity(50)
85-
.build()
86-
});
82+
// Uses UniCase for case-insensitive warehouse name lookups
83+
static NAME_TO_ID_CACHE: LazyLock<Cache<(ProjectId, UniCase<String>), WarehouseId>> =
84+
LazyLock::new(|| {
85+
Cache::builder()
86+
.max_capacity(CONFIG.cache.warehouse.capacity)
87+
.initial_capacity(50)
88+
.build()
89+
});
8790

8891
#[derive(Debug, Clone)]
8992
pub(crate) struct CachedWarehouse {
@@ -131,7 +134,7 @@ pub(super) async fn warehouse_cache_insert(warehouse: Arc<ResolvedWarehouse>) {
131134
tracing::debug!("Inserting warehouse id {warehouse_id} into cache");
132135
tokio::join!(
133136
WAREHOUSE_CACHE.insert(warehouse_id, CachedWarehouse { warehouse }),
134-
NAME_TO_ID_CACHE.insert((project_id, name), warehouse_id),
137+
NAME_TO_ID_CACHE.insert((project_id, UniCase::new(name)), warehouse_id),
135138
);
136139
update_cache_size_metric();
137140
}
@@ -166,7 +169,7 @@ pub(super) async fn warehouse_cache_get_by_name(
166169
) -> Option<Arc<ResolvedWarehouse>> {
167170
update_cache_size_metric();
168171
let Some(warehouse_id) = NAME_TO_ID_CACHE
169-
.get(&(project_id.clone(), name.to_string()))
172+
.get(&(project_id.clone(), UniCase::new(name.to_string())))
170173
.await
171174
else {
172175
metrics::counter!(METRIC_WAREHOUSE_CACHE_MISSES, "cache_type" => "warehouse").increment(1);
@@ -708,4 +711,38 @@ mod tests {
708711
assert!(cached2.is_some());
709712
assert_eq!(cached2.unwrap().warehouse_id, warehouse2_id);
710713
}
714+
715+
#[tokio::test]
716+
async fn test_warehouse_cache_case_insensitive_lookup() {
717+
let warehouse_id = WarehouseId::new_random();
718+
let project_id = ProjectId::new_random();
719+
let name = "Test-Warehouse".to_string();
720+
let warehouse = test_warehouse(
721+
warehouse_id,
722+
name.clone(),
723+
project_id.clone(),
724+
Some(Utc::now()),
725+
0,
726+
);
727+
728+
// Insert warehouse with mixed-case name
729+
warehouse_cache_insert(warehouse.clone()).await;
730+
731+
// Verify we can retrieve it with different case variations
732+
let cached_lower = warehouse_cache_get_by_name("test-warehouse", &project_id).await;
733+
assert!(cached_lower.is_some());
734+
assert_eq!(cached_lower.unwrap().warehouse_id, warehouse_id);
735+
736+
let cached_upper = warehouse_cache_get_by_name("TEST-WAREHOUSE", &project_id).await;
737+
assert!(cached_upper.is_some());
738+
assert_eq!(cached_upper.unwrap().warehouse_id, warehouse_id);
739+
740+
let cached_mixed = warehouse_cache_get_by_name("TeSt-WaReHoUsE", &project_id).await;
741+
assert!(cached_mixed.is_some());
742+
assert_eq!(cached_mixed.unwrap().warehouse_id, warehouse_id);
743+
744+
let cached_exact = warehouse_cache_get_by_name(&name, &project_id).await;
745+
assert!(cached_exact.is_some());
746+
assert_eq!(cached_exact.unwrap().warehouse_id, warehouse_id);
747+
}
711748
}

0 commit comments

Comments
 (0)