Skip to content

Commit 71d389a

Browse files
authored
chore(cache): Add minimum threshold for table data disk cache size (#18227)
* feat(cache): Add minimum threshold for table data disk cache size Introduce TABLE_DATA_DISK_CACHE_SIZE_THRESHOLD (1024 bytes) to better handle disk cache enabling logic. Any configuration value below this threshold will disable the table data disk cache. This change temporarily addresses an issue with cloud platform deployment where setting the disk cache size to zero prevents the physical volume from being loaded. Additional logging for on-disk cache configuration added to improve diagnostics. * test: add UT
1 parent 8b375f0 commit 71d389a

File tree

1 file changed

+85
-1
lines changed

1 file changed

+85
-1
lines changed

src/query/storages/common/cache/src/manager.rs

Lines changed: 85 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,16 @@ use crate::Unit;
5252

5353
static DEFAULT_PARQUET_META_DATA_CACHE_ITEMS: usize = 3000;
5454

55+
// Minimum threshold for table data disk cache size (in bytes).
56+
// Any configuration value less than this threshold will be ignored,
57+
// and table data disk cache will not be enabled.
58+
// This threshold exists to accommodate the current cloud platform logic:
59+
// When attempting to disable table data cache in compute node configurations, setting the disk
60+
// cache size to zero prevents the physical volume from being loaded, so this threshold provides
61+
// a better approach.
62+
// Eventually, we should refactor the compute node configurations instead, to make those options more sensible.
63+
const TABLE_DATA_DISK_CACHE_SIZE_THRESHOLD: usize = 1024;
64+
5565
#[derive(Default)]
5666
struct CacheSlot<T> {
5767
cache: RwLock<Option<T>>,
@@ -154,6 +164,11 @@ impl CacheManager {
154164
) * 5
155165
};
156166

167+
info!(
168+
"[CacheManager] On-disk cache population queue size: {}",
169+
on_disk_cache_queue_size
170+
);
171+
157172
// setup table data cache
158173
let column_data_cache = {
159174
match config.data_cache_storage {
@@ -648,7 +663,10 @@ impl CacheManager {
648663
sync_data: bool,
649664
ee_mode: bool,
650665
) -> Result<Option<DiskCacheAccessor>> {
651-
if disk_cache_bytes_size == 0 || !ee_mode {
666+
if disk_cache_bytes_size <= TABLE_DATA_DISK_CACHE_SIZE_THRESHOLD || !ee_mode {
667+
info!(
668+
"[CacheManager] On-disk cache {cache_name} disabled, size {disk_cache_bytes_size}, threshold {TABLE_DATA_DISK_CACHE_SIZE_THRESHOLD}, ee mode {ee_mode}"
669+
);
652670
Ok(None)
653671
} else {
654672
let cache_holder = DiskCacheBuilder::try_build_disk_cache(
@@ -1104,4 +1122,70 @@ mod tests {
11041122

11051123
Ok(())
11061124
}
1125+
1126+
#[test]
1127+
fn test_disk_cache_size_threshold() -> Result<()> {
1128+
use tempfile::TempDir;
1129+
1130+
// Create a temporary directory for the test
1131+
let temp_dir = TempDir::new().unwrap();
1132+
let cache_path = temp_dir.path().to_path_buf();
1133+
1134+
// Test parameters
1135+
let cache_name = "test_threshold_cache".to_string();
1136+
let population_queue_size = 5;
1137+
let ee_mode = true; // Always use EE mode for this test
1138+
let sync_data = false;
1139+
let key_reload_policy = DiskCacheKeyReloadPolicy::Fuzzy;
1140+
1141+
// Case 1: Size below threshold (should disable cache)
1142+
let below_threshold_size = TABLE_DATA_DISK_CACHE_SIZE_THRESHOLD - 1;
1143+
let result = CacheManager::new_on_disk_cache(
1144+
cache_name.clone(),
1145+
&cache_path,
1146+
population_queue_size,
1147+
below_threshold_size,
1148+
key_reload_policy.clone(),
1149+
sync_data,
1150+
ee_mode,
1151+
)?;
1152+
assert!(
1153+
result.is_none(),
1154+
"Disk cache should be disabled when size is below threshold"
1155+
);
1156+
1157+
// Case 2: Size exactly at threshold (should disable cache)
1158+
let at_threshold_size = TABLE_DATA_DISK_CACHE_SIZE_THRESHOLD;
1159+
let result = CacheManager::new_on_disk_cache(
1160+
cache_name.clone(),
1161+
&cache_path,
1162+
population_queue_size,
1163+
at_threshold_size,
1164+
key_reload_policy.clone(),
1165+
sync_data,
1166+
ee_mode,
1167+
)?;
1168+
assert!(
1169+
result.is_none(),
1170+
"Disk cache should be disabled when size equals threshold"
1171+
);
1172+
1173+
// Case 3: Size above threshold (should enable cache)
1174+
let above_threshold_size = TABLE_DATA_DISK_CACHE_SIZE_THRESHOLD + 1024;
1175+
let result = CacheManager::new_on_disk_cache(
1176+
cache_name.clone(),
1177+
&cache_path,
1178+
population_queue_size,
1179+
above_threshold_size,
1180+
key_reload_policy.clone(),
1181+
sync_data,
1182+
ee_mode,
1183+
)?;
1184+
assert!(
1185+
result.is_some(),
1186+
"Disk cache should be enabled when size is above threshold"
1187+
);
1188+
1189+
Ok(())
1190+
}
11071191
}

0 commit comments

Comments
 (0)