Skip to content

Commit aab4064

Browse files
authored
chore: revert structured spill config (#19088)
* chore: drop structured spill config * test: remove structured spill behavior it * test: fix configs_table_basic formatting * test: add trailing newlines for configs_table_basic
1 parent 0bb0d0e commit aab4064

File tree

7 files changed

+150
-596
lines changed

7 files changed

+150
-596
lines changed

src/query/config/src/config.rs

Lines changed: 17 additions & 157 deletions
Original file line numberDiff line numberDiff line change
@@ -784,22 +784,6 @@ pub struct FsStorageConfig {
784784
default_value = "_data"
785785
)]
786786
pub data_path: String,
787-
788-
/// Percentage of disk space to reserve (0.0-100.0) - for spill usage
789-
#[clap(skip)]
790-
#[serde(default = "FsStorageConfig::default_reserved_space_percentage")]
791-
pub reserved_space_percentage: Option<OrderedFloat<f64>>,
792-
793-
/// Maximum bytes allowed for spill (0 = unlimited) - for spill usage
794-
#[clap(skip)]
795-
#[serde(default)]
796-
pub max_bytes: Option<u64>,
797-
}
798-
799-
impl FsStorageConfig {
800-
fn default_reserved_space_percentage() -> Option<OrderedFloat<f64>> {
801-
None // Use None as default, will use system default (10.0) if not specified
802-
}
803787
}
804788

805789
impl Default for FsStorageConfig {
@@ -812,8 +796,6 @@ impl From<InnerStorageFsConfig> for FsStorageConfig {
812796
fn from(inner: InnerStorageFsConfig) -> Self {
813797
Self {
814798
data_path: inner.root,
815-
reserved_space_percentage: None,
816-
max_bytes: None,
817799
}
818800
}
819801
}
@@ -3567,20 +3549,6 @@ impl Default for DiskCacheConfig {
35673549
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Args)]
35683550
#[serde(default)]
35693551
pub struct SpillConfig {
3570-
/// Unified spill storage configuration
3571-
/// Auto-detects type based on storage.type field:
3572-
/// - "fs": Local filesystem spill
3573-
/// - "s3": S3 remote spill
3574-
/// - "azblob": Azure blob remote spill
3575-
/// - etc.
3576-
/// If not configured, uses main data storage with _spill prefix (default)
3577-
///
3578-
/// Note: Spill operator created based on this config will only use Standard S3 storage class,
3579-
/// regardless of any other configured storage class
3580-
#[clap(skip)]
3581-
pub storage: Option<StorageConfig>,
3582-
3583-
// Legacy fields for backward compatibility
35843552
/// Path of spill to local disk. disable if it's empty.
35853553
#[clap(long, value_name = "VALUE", default_value = "")]
35863554
pub spill_local_disk_path: String,
@@ -3593,6 +3561,10 @@ pub struct SpillConfig {
35933561
/// Allow space in bytes to spill to local disk.
35943562
pub spill_local_disk_max_bytes: u64,
35953563

3564+
// TODO: We need to fix StorageConfig so that it supports command line injections.
3565+
#[clap(skip)]
3566+
pub storage: Option<StorageConfig>,
3567+
35963568
/// Maximum percentage of the global local spill quota that a single sort
35973569
/// operator may use for one query.
35983570
///
@@ -3613,49 +3585,9 @@ pub struct SpillConfig {
36133585
pub result_set_spilling_disk_quota_ratio: u64,
36143586
}
36153587

3616-
impl SpillConfig {
3617-
/// Get the spill type based on configuration
3618-
/// Auto-detects from storage configuration
3619-
pub fn get_spill_type(&self) -> String {
3620-
// Check new unified storage configuration first (higher priority)
3621-
if let Some(ref storage) = self.storage {
3622-
match storage.typ.as_str() {
3623-
"fs" => "fs".to_string(),
3624-
"s3" => "s3".to_string(),
3625-
"azblob" => "azblob".to_string(),
3626-
"gcs" => "gcs".to_string(),
3627-
"oss" => "oss".to_string(),
3628-
"obs" => "obs".to_string(),
3629-
"cos" => "cos".to_string(),
3630-
"webhdfs" => "webhdfs".to_string(),
3631-
_ => "remote".to_string(), // fallback for other storage types
3632-
}
3633-
} else if !self.spill_local_disk_path.is_empty() {
3634-
// Fall back to legacy configuration
3635-
"fs".to_string() // legacy local disk maps to fs
3636-
} else {
3637-
"default".to_string()
3638-
}
3639-
}
3640-
3641-
/// Check if this is using legacy local disk configuration
3642-
pub fn is_legacy_local_disk(&self) -> bool {
3643-
!self.spill_local_disk_path.is_empty()
3644-
}
3645-
}
3646-
36473588
impl Default for SpillConfig {
36483589
fn default() -> Self {
3649-
Self {
3650-
storage: None,
3651-
spill_local_disk_path: String::new(),
3652-
spill_local_disk_reserved_space_percentage: OrderedFloat(10.0),
3653-
spill_local_disk_max_bytes: u64::MAX,
3654-
sort_spilling_disk_quota_ratio: 60,
3655-
window_partition_spilling_disk_quota_ratio: 60,
3656-
// TODO: keep 0 to avoid deleting local result-set spill dir before HTTP pagination finishes.
3657-
result_set_spilling_disk_quota_ratio: 0,
3658-
}
3590+
inner::SpillConfig::default().into()
36593591
}
36603592
}
36613593

@@ -3827,91 +3759,19 @@ mod cache_config_converters {
38273759
}
38283760

38293761
fn convert_local_spill_config(spill: SpillConfig) -> Result<inner::SpillConfig> {
3830-
// Determine configuration based on auto-detected spill type
3831-
let spill_type = spill.get_spill_type();
3832-
let (local_writeable_root, path, reserved_disk_ratio, global_bytes_limit, storage_params) =
3833-
match spill_type.as_str() {
3834-
"fs" => {
3835-
if let Some(ref storage) = spill.storage {
3836-
// Use new filesystem storage configuration (higher priority)
3837-
let fs_path = storage.fs.data_path.clone();
3838-
let reserved_ratio = storage
3839-
.fs
3840-
.reserved_space_percentage
3841-
.unwrap_or(OrderedFloat(10.0))
3842-
/ 100.0;
3843-
let max_bytes = storage.fs.max_bytes.unwrap_or(u64::MAX);
3844-
3845-
// Validate fs configuration
3846-
if fs_path.is_empty() {
3847-
return Err(ErrorCode::InvalidConfig(
3848-
"FS storage configured but data_path is empty. Either specify a path or remove the fs storage configuration to use default behavior."
3849-
));
3850-
} else {
3851-
(None, fs_path, reserved_ratio, max_bytes, None)
3852-
}
3853-
} else if spill.is_legacy_local_disk() {
3854-
// Fall back to legacy local disk configuration
3855-
(
3856-
None,
3857-
spill.spill_local_disk_path,
3858-
spill.spill_local_disk_reserved_space_percentage / 100.0,
3859-
spill.spill_local_disk_max_bytes,
3860-
None,
3861-
)
3862-
} else {
3863-
return Err(ErrorCode::InvalidConfig(
3864-
"FS storage configuration not found",
3865-
));
3866-
}
3867-
}
3868-
"s3" | "azblob" | "gcs" | "oss" | "obs" | "cos" | "webhdfs" | "remote" => {
3869-
// Use remote storage configuration for all remote storage types
3870-
let storage_params = spill
3871-
.storage
3872-
.map(|storage| {
3873-
let storage: InnerStorageConfig = storage.try_into()?;
3874-
Ok::<_, ErrorCode>(storage.params)
3875-
})
3876-
.transpose()?;
3877-
3878-
(
3879-
None,
3880-
String::new(),
3881-
OrderedFloat(0.3),
3882-
u64::MAX,
3883-
storage_params,
3884-
)
3885-
}
3886-
_ => {
3887-
// Default behavior for "default" type and any unrecognized types:
3888-
// do NOT implicitly reuse the data cache disk. Local spill is
3889-
// enabled only when explicitly configured via either
3890-
// - [spill.storage] with type = "fs", or
3891-
// - legacy spill_local_disk_path.
3892-
let storage_params = spill
3893-
.storage
3894-
.map(|storage| {
3895-
let storage: InnerStorageConfig = storage.try_into()?;
3896-
Ok::<_, ErrorCode>(storage.params)
3897-
})
3898-
.transpose()?;
3899-
3900-
(
3901-
None,
3902-
spill.spill_local_disk_path,
3903-
spill.spill_local_disk_reserved_space_percentage / 100.0,
3904-
spill.spill_local_disk_max_bytes,
3905-
storage_params,
3906-
)
3907-
}
3908-
};
3762+
let storage_params = spill
3763+
.storage
3764+
.map(|storage| {
3765+
let storage: InnerStorageConfig = storage.try_into()?;
3766+
Ok::<_, ErrorCode>(storage.params)
3767+
})
3768+
.transpose()?;
39093769

39103770
Ok(inner::SpillConfig {
3911-
local_writeable_root,
3912-
path,
3913-
reserved_disk_ratio,
3914-
global_bytes_limit,
3771+
local_writeable_root: None,
3772+
path: spill.spill_local_disk_path,
3773+
reserved_disk_ratio: spill.spill_local_disk_reserved_space_percentage / 100.0,
3774+
global_bytes_limit: spill.spill_local_disk_max_bytes,
39153775
storage_params,
39163776
sort_spilling_disk_quota_ratio: spill.sort_spilling_disk_quota_ratio,
39173777
window_partition_spilling_disk_quota_ratio: spill
@@ -3931,10 +3791,10 @@ mod cache_config_converters {
39313791
});
39323792

39333793
Self {
3934-
storage,
39353794
spill_local_disk_path: value.path,
39363795
spill_local_disk_reserved_space_percentage: value.reserved_disk_ratio * 100.0,
39373796
spill_local_disk_max_bytes: value.global_bytes_limit,
3797+
storage,
39383798
sort_spilling_disk_quota_ratio: value.sort_spilling_disk_quota_ratio,
39393799
window_partition_spilling_disk_quota_ratio: value
39403800
.window_partition_spilling_disk_quota_ratio,

src/query/config/src/mask.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -211,20 +211,20 @@ impl StorageConfig {
211211
impl SpillConfig {
212212
fn mask_display(&self) -> Self {
213213
let Self {
214-
ref storage,
215214
ref spill_local_disk_path,
216215
spill_local_disk_reserved_space_percentage,
217216
spill_local_disk_max_bytes,
217+
ref storage,
218218
sort_spilling_disk_quota_ratio,
219219
window_partition_spilling_disk_quota_ratio,
220220
result_set_spilling_disk_quota_ratio,
221221
} = *self;
222222

223223
Self {
224-
storage: storage.as_ref().map(|storage| storage.mask_display()),
225224
spill_local_disk_path: spill_local_disk_path.clone(),
226225
spill_local_disk_reserved_space_percentage,
227226
spill_local_disk_max_bytes,
227+
storage: storage.as_ref().map(|storage| storage.mask_display()),
228228
sort_spilling_disk_quota_ratio,
229229
window_partition_spilling_disk_quota_ratio,
230230
// TODO: keep 0 to avoid deleting local result-set spill dir before HTTP pagination finishes.
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
// Copyright 2023 Datafuse Labs.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use std::ffi::OsString;
16+
use std::io::Write;
17+
18+
use clap::Parser;
19+
use databend_common_config::Config;
20+
use databend_common_config::InnerConfig;
21+
use pretty_assertions::assert_eq;
22+
23+
/// It's required to make sure setting's default value is the same with clap.
24+
#[test]
25+
fn test_config_default() {
26+
let setting_default = InnerConfig::default();
27+
let config_default: InnerConfig = Config::parse_from(Vec::<OsString>::new())
28+
.try_into()
29+
.expect("parse from args must succeed");
30+
31+
assert_eq!(
32+
setting_default, config_default,
33+
"default setting is different from default config, please check again"
34+
)
35+
}
36+
37+
#[test]
38+
fn test_load_config() {
39+
// Create a comprehensive test configuration with multiple sections and data types
40+
let conf = r#"
41+
# Query configuration section
42+
[query]
43+
max_active_sessions = 256
44+
shutdown_wait_timeout_ms = 5000
45+
flight_record_quota_size = 1048576
46+
unknown_query_field = "should be ignored"
47+
48+
# Log configuration section
49+
[log]
50+
file.level = "INFO"
51+
file.dir = "/tmp/databend/logs"
52+
unknown_log_field = 123
53+
54+
# Storage configuration section
55+
[storage]
56+
type = "fs"
57+
58+
[storage.fs]
59+
data_path = "/tmp/databend/data"
60+
61+
# Meta configuration section
62+
[meta]
63+
endpoint = "localhost:9191"
64+
username = "databend"
65+
password = "databend123"
66+
"#;
67+
68+
let mut temp_file = tempfile::NamedTempFile::new().unwrap();
69+
temp_file.write_all(conf.as_bytes()).unwrap();
70+
let temp_path = temp_file.path().to_str().unwrap().to_string();
71+
72+
// Save the original environment variable (if it exists)
73+
let original_env = std::env::var("CONFIG_FILE").ok();
74+
75+
// Set the environment variable to our test config file
76+
std::env::set_var("CONFIG_FILE", &temp_path);
77+
78+
// Use the original load function (without a config_file parameter)
79+
let config = Config::load(false).unwrap();
80+
81+
// Restore the original environment variable
82+
match original_env {
83+
Some(val) => std::env::set_var("CONFIG_FILE", val),
84+
None => std::env::remove_var("CONFIG_FILE"),
85+
}
86+
87+
// Test query configuration values
88+
assert_eq!(
89+
config.query.max_active_sessions, 256,
90+
"max_active_sessions should be 256"
91+
);
92+
assert_eq!(
93+
config.query.shutdown_wait_timeout_ms, 5000,
94+
"shutdown_wait_timeout_ms should be 5000"
95+
);
96+
assert_eq!(
97+
config.query.flight_record_quota_size, 1048576,
98+
"flight_record_quota_size should be 1048576"
99+
);
100+
101+
// Test log configuration values
102+
assert_eq!(
103+
config.log.file.level, "INFO",
104+
"log.file.level should be INFO"
105+
);
106+
assert_eq!(
107+
config.log.file.dir,
108+
"/tmp/databend/logs",
109+
"log.file.dir should be /tmp/databend/logs"
110+
);
111+
112+
// Test storage configuration values
113+
assert_eq!(
114+
config.storage.fs.data_path, "/tmp/databend/data",
115+
"storage.fs.data_path should be /tmp/databend/data"
116+
);
117+
118+
// Test meta configuration values
119+
assert_eq!(
120+
config.meta.endpoint, "localhost:9191",
121+
"meta.endpoint should be localhost:9191"
122+
);
123+
assert_eq!(
124+
config.meta.username, "databend",
125+
"meta.username should be databend"
126+
);
127+
assert_eq!(
128+
config.meta.password, "databend123",
129+
"meta.password should be databend123"
130+
);
131+
}

0 commit comments

Comments
 (0)