diff --git a/Cargo.lock b/Cargo.lock index 07a476125133e..47ecbe468ade0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4323,6 +4323,7 @@ dependencies = [ "chrono", "databend-common-base", "databend-common-catalog", + "databend-common-column", "databend-common-exception", "databend-common-expression", "databend-common-functions", @@ -9450,8 +9451,7 @@ dependencies = [ [[package]] name = "jsonb" version = "0.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a452366d21e8d3cbca680c41388e01d6a88739afef7877961946a6da409f9ccd" +source = "git+https://github.com/b41sh/jsonb?rev=ef5482fe9c07d87daa84115b86fd9af55dea277c#ef5482fe9c07d87daa84115b86fd9af55dea277c" dependencies = [ "byteorder", "ethnum", diff --git a/Cargo.toml b/Cargo.toml index 5c4d541f3ba1b..e11528ecc6772 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -657,6 +657,7 @@ backtrace = { git = "https://github.com/rust-lang/backtrace-rs.git", rev = "7226 color-eyre = { git = "https://github.com/eyre-rs/eyre.git", rev = "e5d92c3" } deltalake = { git = "https://github.com/delta-io/delta-rs", rev = "9954bff" } display-more = { git = "https://github.com/databendlabs/display-more", tag = "v0.2.0" } +jsonb = { git = "https://github.com/b41sh/jsonb", rev = "ef5482fe9c07d87daa84115b86fd9af55dea277c" } map-api = { git = "https://github.com/databendlabs/map-api", tag = "v0.4.2" } openraft = { git = "https://github.com/databendlabs/openraft", tag = "v0.10.0-alpha.9" } orc-rust = { git = "https://github.com/datafuse-extras/orc-rust", rev = "d82aa6d" } diff --git a/src/meta/proto-conv/src/schema_from_to_protobuf_impl.rs b/src/meta/proto-conv/src/schema_from_to_protobuf_impl.rs index b877f1a6c8701..12d7cd3861002 100644 --- a/src/meta/proto-conv/src/schema_from_to_protobuf_impl.rs +++ b/src/meta/proto-conv/src/schema_from_to_protobuf_impl.rs @@ -484,6 +484,13 @@ impl FromToProtoEnum for ex::VariantDataType { variant_data_type::Dt::ArrayT(dt) => { ex::VariantDataType::Array(Box::new(ex::VariantDataType::from_pb_enum(*dt)?)) } + variant_data_type::Dt::DecimalT(dt) => { + ex::VariantDataType::Decimal(ex::types::decimal::DecimalDataType::from_pb(dt)?) + } + variant_data_type::Dt::BinaryT(_) => ex::VariantDataType::Binary, + variant_data_type::Dt::DateT(_) => ex::VariantDataType::Date, + variant_data_type::Dt::TimestampT(_) => ex::VariantDataType::Timestamp, + variant_data_type::Dt::IntervalT(_) => ex::VariantDataType::Interval, }) } @@ -498,6 +505,14 @@ impl FromToProtoEnum for ex::VariantDataType { VariantDataType::Array(dt) => { pb::variant_data_type::Dt::ArrayT(Box::new(dt.to_pb_enum()?)) } + VariantDataType::Decimal(n) => { + let x = n.to_pb()?; + pb::variant_data_type::Dt::DecimalT(x) + } + VariantDataType::Binary => pb::variant_data_type::Dt::BinaryT(pb::Empty {}), + VariantDataType::Date => pb::variant_data_type::Dt::DateT(pb::Empty {}), + VariantDataType::Timestamp => pb::variant_data_type::Dt::TimestampT(pb::Empty {}), + VariantDataType::Interval => pb::variant_data_type::Dt::IntervalT(pb::Empty {}), }; Ok(pb::VariantDataType { dt: Some(dt) }) diff --git a/src/meta/protos/proto/virtual_schema.proto b/src/meta/protos/proto/virtual_schema.proto index c4d4c02eb7f17..4c7b5cf7280ad 100644 --- a/src/meta/protos/proto/virtual_schema.proto +++ b/src/meta/protos/proto/virtual_schema.proto @@ -32,6 +32,11 @@ message VariantDataType { Empty float64_t = 5; Empty string_t = 6; VariantDataType array_t = 7; + Decimal decimal_t = 8; + Empty binary_t = 9; + Empty date_t = 10; + Empty timestamp_t = 11; + Empty interval_t = 12; } } diff --git a/src/query/ee/tests/it/storages/fuse/operations/virtual_columns.rs b/src/query/ee/tests/it/storages/fuse/operations/virtual_columns.rs index 7a239a9e87195..888e3f222d078 100644 --- a/src/query/ee/tests/it/storages/fuse/operations/virtual_columns.rs +++ b/src/query/ee/tests/it/storages/fuse/operations/virtual_columns.rs @@ -34,10 +34,6 @@ async fn test_fuse_do_refresh_virtual_column() -> Result<()> { .default_session() .get_settings() .set_data_retention_time_in_days(0)?; - fixture - .default_session() - .get_settings() - .set_enable_experimental_virtual_column(1)?; fixture.create_default_database().await?; fixture.create_variant_table().await?; diff --git a/src/query/ee/tests/it/storages/fuse/operations/virtual_columns_builder.rs b/src/query/ee/tests/it/storages/fuse/operations/virtual_columns_builder.rs index 7dc9501e9aec2..80fa20ba87712 100644 --- a/src/query/ee/tests/it/storages/fuse/operations/virtual_columns_builder.rs +++ b/src/query/ee/tests/it/storages/fuse/operations/virtual_columns_builder.rs @@ -16,6 +16,8 @@ use std::str::FromStr; use databend_common_base::base::tokio; use databend_common_exception::Result; +use databend_common_expression::types::DecimalDataType; +use databend_common_expression::types::DecimalSize; use databend_common_expression::types::Int32Type; use databend_common_expression::types::VariantType; use databend_common_expression::ColumnId; @@ -32,11 +34,6 @@ use jsonb::OwnedJsonb; #[tokio::test(flavor = "multi_thread")] async fn test_virtual_column_builder() -> Result<()> { let fixture = TestFixture::setup_with_custom(EESetup::new()).await?; - - fixture - .default_session() - .get_settings() - .set_enable_experimental_virtual_column(1)?; fixture.create_default_database().await?; fixture.create_variant_table().await?; @@ -247,7 +244,10 @@ async fn test_virtual_column_builder() -> Result<()> { "['geo']['lat']", ) .unwrap(); - assert_eq!(meta_geo_lat.data_type, VariantDataType::Jsonb); + assert_eq!( + meta_geo_lat.data_type, + VariantDataType::Decimal(DecimalDataType::from(DecimalSize::new_unchecked(18, 1))) + ); let entries = vec![ Int32Type::from_data(vec![1, 2, 3, 4, 5, 6, 7, 8]).into(), @@ -316,11 +316,6 @@ async fn test_virtual_column_builder() -> Result<()> { #[tokio::test(flavor = "multi_thread")] async fn test_virtual_column_builder_stream_write() -> Result<()> { let fixture = TestFixture::setup_with_custom(EESetup::new()).await?; - - fixture - .default_session() - .get_settings() - .set_enable_experimental_virtual_column(1)?; fixture.create_default_database().await?; fixture.create_variant_table().await?; diff --git a/src/query/expression/src/schema.rs b/src/query/expression/src/schema.rs index 1b3bea9bda1b2..6f7ab4136e608 100644 --- a/src/query/expression/src/schema.rs +++ b/src/query/expression/src/schema.rs @@ -149,7 +149,7 @@ pub struct DataSchema { pub(crate) metadata: BTreeMap, } -#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Serialize, Deserialize)] +#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash, Serialize, Deserialize)] pub enum VariantDataType { Jsonb, Boolean, @@ -158,6 +158,11 @@ pub enum VariantDataType { Float64, String, Array(Box), + Decimal(DecimalDataType), + Binary, + Date, + Timestamp, + Interval, } #[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)] diff --git a/src/query/expression/src/types/decimal.rs b/src/query/expression/src/types/decimal.rs index ee890d0a77d76..2570255d46cca 100644 --- a/src/query/expression/src/types/decimal.rs +++ b/src/query/expression/src/types/decimal.rs @@ -1541,6 +1541,8 @@ impl Decimal for i256 { Copy, PartialEq, Eq, + PartialOrd, + Ord, Hash, Serialize, Deserialize, diff --git a/src/query/expression/src/utils/display.rs b/src/query/expression/src/utils/display.rs index 75bfddc42d4d3..9881a5d699316 100755 --- a/src/query/expression/src/utils/display.rs +++ b/src/query/expression/src/utils/display.rs @@ -734,6 +734,11 @@ impl Display for VariantDataType { VariantDataType::Float64 => write!(f, "Float64"), VariantDataType::String => write!(f, "String"), VariantDataType::Array(inner) => write!(f, "Array({inner})"), + VariantDataType::Decimal(inner) => write!(f, "Decimal({inner})"), + VariantDataType::Binary => write!(f, "Binary"), + VariantDataType::Date => write!(f, "Date"), + VariantDataType::Timestamp => write!(f, "Timestamp"), + VariantDataType::Interval => write!(f, "Interval"), } } } diff --git a/src/query/settings/src/settings_default.rs b/src/query/settings/src/settings_default.rs index 1fba150428237..b03a92b5be0f2 100644 --- a/src/query/settings/src/settings_default.rs +++ b/src/query/settings/src/settings_default.rs @@ -1079,13 +1079,6 @@ impl DefaultSettings { scope: SettingScope::Both, range: Some(SettingRange::String(vec!["None".into(), "LZ4".into(), "ZSTD".into()])), }), - ("enable_refresh_virtual_column_after_write", DefaultSettingValue { - value: UserSettingValue::UInt64(1), - desc: "Refresh virtual column after new data written", - mode: SettingMode::Both, - scope: SettingScope::Both, - range: Some(SettingRange::Numeric(0..=1)), - }), ("enable_refresh_aggregating_index_after_write", DefaultSettingValue { value: UserSettingValue::UInt64(1), desc: "Refresh aggregating index after new data written", @@ -1446,7 +1439,7 @@ impl DefaultSettings { range: Some(SettingRange::Numeric(1..=1024)), }), ("enable_experimental_virtual_column", DefaultSettingValue { - value: UserSettingValue::UInt64(0), + value: UserSettingValue::UInt64(1), desc: "Enables experimental virtual column", mode: SettingMode::Both, scope: SettingScope::Both, diff --git a/src/query/settings/src/settings_getter_setter.rs b/src/query/settings/src/settings_getter_setter.rs index 3ba9d9b0fa2d0..9cf521c4267c1 100644 --- a/src/query/settings/src/settings_getter_setter.rs +++ b/src/query/settings/src/settings_getter_setter.rs @@ -789,10 +789,6 @@ impl Settings { } } - pub fn get_enable_refresh_virtual_column_after_write(&self) -> Result { - Ok(self.try_get_u64("enable_refresh_virtual_column_after_write")? != 0) - } - pub fn get_enable_refresh_aggregating_index_after_write(&self) -> Result { Ok(self.try_get_u64("enable_refresh_aggregating_index_after_write")? != 0) } diff --git a/src/query/storages/common/table_meta/src/meta/v2/segment.rs b/src/query/storages/common/table_meta/src/meta/v2/segment.rs index eacd685d9b539..d0a4e38b5b77c 100644 --- a/src/query/storages/common/table_meta/src/meta/v2/segment.rs +++ b/src/query/storages/common/table_meta/src/meta/v2/segment.rs @@ -18,6 +18,10 @@ use std::sync::Arc; use chrono::DateTime; use chrono::Utc; +use databend_common_expression::types::i256; +use databend_common_expression::types::Decimal; +use databend_common_expression::types::DecimalDataType; +use databend_common_expression::types::DecimalSize; use databend_common_expression::types::NumberDataType; use databend_common_expression::BlockMetaInfo; use databend_common_expression::BlockMetaInfoDowncast; @@ -85,7 +89,16 @@ pub struct VirtualColumnMeta { // 3 => int64 // 4 => float64 // 5 => string + // 6 => decimal64 + // 7 => decimal128 + // 8 => decimal256 + // 9 => binary + // 10 => date + // 11 => timestamp + // 12 => interval pub data_type: u8, + /// the scale size, only used for decimal type + pub scale: Option, /// virtual column statistics. pub column_stat: Option, } @@ -100,26 +113,63 @@ impl VirtualColumnMeta { } pub fn data_type(&self) -> TableDataType { + let scale = self.scale.unwrap_or_default(); match self.data_type { 1 => TableDataType::Nullable(Box::new(TableDataType::Boolean)), 2 => TableDataType::Nullable(Box::new(TableDataType::Number(NumberDataType::UInt64))), 3 => TableDataType::Nullable(Box::new(TableDataType::Number(NumberDataType::Int64))), 4 => TableDataType::Nullable(Box::new(TableDataType::Number(NumberDataType::Float64))), 5 => TableDataType::Nullable(Box::new(TableDataType::String)), + 6 => { + let size = DecimalSize::new_unchecked(i64::MAX_PRECISION, scale); + TableDataType::Nullable(Box::new(TableDataType::Decimal(DecimalDataType::from( + size, + )))) + } + 7 => { + let size = DecimalSize::new_unchecked(i128::MAX_PRECISION, scale); + TableDataType::Nullable(Box::new(TableDataType::Decimal(DecimalDataType::from( + size, + )))) + } + 8 => { + let size = DecimalSize::new_unchecked(i256::MAX_PRECISION, scale); + TableDataType::Nullable(Box::new(TableDataType::Decimal(DecimalDataType::from( + size, + )))) + } + 9 => TableDataType::Nullable(Box::new(TableDataType::Binary)), + 10 => TableDataType::Nullable(Box::new(TableDataType::Date)), + 11 => TableDataType::Nullable(Box::new(TableDataType::Timestamp)), + 12 => TableDataType::Nullable(Box::new(TableDataType::Interval)), _ => TableDataType::Nullable(Box::new(TableDataType::Variant)), } } - pub fn data_type_code(variant_type: &VariantDataType) -> u8 { - match variant_type { + pub fn data_type_code(variant_type: &VariantDataType) -> (u8, Option) { + let ty = match variant_type { VariantDataType::Jsonb => 0, VariantDataType::Boolean => 1, VariantDataType::UInt64 => 2, VariantDataType::Int64 => 3, VariantDataType::Float64 => 4, VariantDataType::String => 5, + VariantDataType::Decimal(ty) => match ty { + DecimalDataType::Decimal64(_) => 6, + DecimalDataType::Decimal128(_) => 7, + DecimalDataType::Decimal256(_) => 8, + }, + VariantDataType::Binary => 9, + VariantDataType::Date => 10, + VariantDataType::Timestamp => 11, + VariantDataType::Interval => 12, _ => unreachable!(), - } + }; + let scale = match variant_type { + VariantDataType::Decimal(ty) => Some(ty.scale()), + _ => None, + }; + (ty, scale) } } diff --git a/src/query/storages/fuse/Cargo.toml b/src/query/storages/fuse/Cargo.toml index f929694809eeb..19b8e6a01a2f1 100644 --- a/src/query/storages/fuse/Cargo.toml +++ b/src/query/storages/fuse/Cargo.toml @@ -10,6 +10,7 @@ edition = { workspace = true } databend-common-base = { workspace = true } databend-common-catalog = { workspace = true } +databend-common-column = { workspace = true } databend-common-exception = { workspace = true } databend-common-expression = { workspace = true } databend-common-functions = { workspace = true } diff --git a/src/query/storages/fuse/src/io/write/stream/block_builder.rs b/src/query/storages/fuse/src/io/write/stream/block_builder.rs index 5bc189f47c455..b67a8dfc2ab1d 100644 --- a/src/query/storages/fuse/src/io/write/stream/block_builder.rs +++ b/src/query/storages/fuse/src/io/write/stream/block_builder.rs @@ -475,12 +475,7 @@ impl StreamBlockProperties { .collect::>(); let inverted_index_builders = create_inverted_index_builders(&table.table_info.meta); - let virtual_column_builder = if ctx - .get_settings() - .get_enable_refresh_virtual_column_after_write() - .unwrap_or_default() - && table.support_virtual_columns() - { + let virtual_column_builder = if table.support_virtual_columns() { VirtualColumnBuilder::try_create(ctx.clone(), source_schema.clone()).ok() } else { None diff --git a/src/query/storages/fuse/src/io/write/virtual_column_builder.rs b/src/query/storages/fuse/src/io/write/virtual_column_builder.rs index 19f4b76c937f1..69ee81630d32e 100644 --- a/src/query/storages/fuse/src/io/write/virtual_column_builder.rs +++ b/src/query/storages/fuse/src/io/write/virtual_column_builder.rs @@ -12,27 +12,33 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::BTreeMap; -use std::collections::BTreeSet; +use std::borrow::Cow; +use std::collections::HashMap; use std::collections::HashSet; -use std::collections::VecDeque; +use std::hash::Hash; use std::sync::Arc; use databend_common_catalog::table_context::TableContext; +use databend_common_column::types::months_days_micros; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::infer_schema_type; -use databend_common_expression::types::BooleanType; +use databend_common_expression::types::binary::BinaryColumnBuilder; +use databend_common_expression::types::i256; use databend_common_expression::types::DataType; -use databend_common_expression::types::Float64Type; -use databend_common_expression::types::Int64Type; +use databend_common_expression::types::Decimal; +use databend_common_expression::types::DecimalDataType; +use databend_common_expression::types::DecimalScalar; +use databend_common_expression::types::DecimalSize; +use databend_common_expression::types::MutableBitmap; +use databend_common_expression::types::NullableColumn; use databend_common_expression::types::NumberDataType; -use databend_common_expression::types::StringType; -use databend_common_expression::types::UInt64Type; -use databend_common_expression::types::VariantType; +use databend_common_expression::types::NumberScalar; +use databend_common_expression::Column; +use databend_common_expression::ColumnBuilder; use databend_common_expression::ColumnId; use databend_common_expression::DataBlock; -use databend_common_expression::FromData; +use databend_common_expression::Scalar; use databend_common_expression::ScalarRef; use databend_common_expression::TableDataType; use databend_common_expression::TableField; @@ -40,6 +46,7 @@ use databend_common_expression::TableSchemaRef; use databend_common_expression::TableSchemaRefExt; use databend_common_expression::VariantDataType; use databend_common_expression::VIRTUAL_COLUMNS_LIMIT; +use databend_common_hashtable::StackHashMap; use databend_common_io::constants::DEFAULT_BLOCK_INDEX_BUFFER_SIZE; use databend_common_license::license::Feature; use databend_common_license::license_manager::LicenseManagerSwitch; @@ -49,11 +56,20 @@ use databend_storages_common_table_meta::meta::DraftVirtualColumnMeta; use databend_storages_common_table_meta::meta::Location; use databend_storages_common_table_meta::meta::StatisticsOfColumns; use databend_storages_common_table_meta::meta::VirtualColumnMeta; -use jsonb::from_slice; +use jsonb::keypath::KeyPath as JsonbKeyPath; +use jsonb::keypath::KeyPaths as JsonbKeyPaths; +use jsonb::Date as JsonbDate; +use jsonb::Decimal128 as JsonbDecimal128; +use jsonb::Decimal256 as JsonbDecimal256; +use jsonb::Decimal64 as JsonbDecimal64; +use jsonb::Interval as JsonbInterval; use jsonb::Number as JsonbNumber; use jsonb::RawJsonb; +use jsonb::Timestamp as JsonbTimestamp; use jsonb::Value as JsonbValue; use parquet::format::FileMetaData; +use siphasher::sip128::Hasher128; +use siphasher::sip128::SipHasher24; use crate::io::write::WriteSettings; use crate::io::TableMetaLocationGenerator; @@ -66,14 +82,14 @@ pub struct VirtualColumnState { } #[derive(Clone)] -#[allow(clippy::type_complexity)] pub struct VirtualColumnBuilder { // Variant fields variant_fields: Vec, // Variant field offsets variant_offsets: Vec, // Store virtual values across multiple blocks - virtual_values: Vec, Vec>>>>, + virtual_paths: Vec>, + virtual_values: Vec>, // Ignored fields ignored_fields: HashSet, // Total number of rows processed @@ -108,84 +124,105 @@ impl VirtualColumnBuilder { if variant_fields.is_empty() { return Err(ErrorCode::VirtualColumnError("Virtual column only support variant type, but this table don't have variant type fields")); } - let mut virtual_values = Vec::with_capacity(variant_fields.len()); + let mut virtual_paths = Vec::with_capacity(variant_fields.len()); for _ in 0..variant_fields.len() { - virtual_values.push(BTreeMap::new()); + virtual_paths.push(HashMap::with_capacity(32)); } + let virtual_values = Vec::with_capacity(32); + let ignored_fields = HashSet::new(); Ok(VirtualColumnBuilder { variant_offsets, variant_fields, + virtual_paths, virtual_values, - ignored_fields: HashSet::new(), + ignored_fields, total_rows: 0, }) } pub fn add_block(&mut self, block: &DataBlock) -> Result<()> { let num_rows = block.num_rows(); + + let mut hash_to_index = Vec::with_capacity(self.variant_fields.len()); + for virtual_paths in &self.virtual_paths { + let mut field_hash_to_index: StackHashMap = + StackHashMap::with_capacity(virtual_paths.len()); + for (virtual_path, index) in virtual_paths.iter() { + let borrowed_key_paths = virtual_path.to_borrowed_key_paths(); + + let mut hasher = SipHasher24::new(); + borrowed_key_paths.hash(&mut hasher); + let hash128 = hasher.finish128(); + let hash_value = hash128.into(); + unsafe { + match field_hash_to_index.insert_and_entry(hash_value) { + Ok(e) => { + let v = e.get_mut(); + *v = *index; + } + Err(e) => { + let v = e.get_mut(); + *v = *index; + } + } + } + } + hash_to_index.push(field_hash_to_index); + } + // use a tmp column id to generate statistics for virtual columns. - let mut paths = VecDeque::new(); - // use first 10 rows as sample to check whether the block is suitable for generating virtual columns - let sample_rows = num_rows.min(10); for (i, offset) in self.variant_offsets.iter().enumerate() { if self.ignored_fields.contains(&i) { continue; } let column = block.get_by_offset(*offset); - let mut field_virtual_values = BTreeMap::new(); - for row in 0..sample_rows { + for row in 0..num_rows { let val = unsafe { column.index_unchecked(row) }; if let ScalarRef::Variant(jsonb_bytes) = val { - let val = from_slice(jsonb_bytes).unwrap(); - paths.clear(); - Self::collect_virtual_values( - &val, - row, - self.virtual_values.len(), - &mut paths, - &mut field_virtual_values, - ); - } - } + let raw_jsonb = RawJsonb::new(jsonb_bytes); + + let key_values = raw_jsonb.extract_scalar_key_values().unwrap(); + for (key_paths, jsonb_value) in key_values { + let scalar = Self::jsonb_value_to_scalar(jsonb_value); + // Blocks are added repeatedly, so the actual rows need to add the rows of the previous blocks + let scalar_value = JsonbScalarValue { + row: self.total_rows + row, + scalar, + }; - if Self::check_sample_virtual_values(sample_rows, &mut field_virtual_values) { - self.ignored_fields.insert(i); - field_virtual_values.clear(); - continue; - } + // Calculate the hash value and use the hash value as the key + let mut hasher = SipHasher24::new(); + key_paths.hash(&mut hasher); + let hash128 = hasher.finish128(); + let hash_value = hash128.into(); - let virtual_field_num = self.virtual_values.iter().map(|v| v.len()).sum(); - for row in sample_rows..num_rows { - let val = unsafe { column.index_unchecked(row) }; - if let ScalarRef::Variant(jsonb_bytes) = val { - let val = from_slice(jsonb_bytes).unwrap(); - paths.clear(); - Self::collect_virtual_values( - &val, - row, - virtual_field_num, - &mut paths, - &mut field_virtual_values, - ); - } - } - - // Merge block virtual values into the global virtual values - for (key_path, values) in field_virtual_values { - if let Some(existing_values) = self.virtual_values[i].get_mut(&key_path) { - // Extend existing values with new values - existing_values.extend(values); - } else { - // Add padding for previous blocks if this is a new key path - let mut padded_values = Vec::with_capacity(self.total_rows + values.len()); - // Add None for all rows in previous blocks - for _ in 0..self.total_rows { - padded_values.push(None); + // Use hash value to lookup instead of key paths + if let Some(index) = hash_to_index[i].get(&hash_value) { + self.virtual_values[*index].push(scalar_value); + } else { + // The index was not found. Create a new key path + let index = self.virtual_values.len(); + let owned_key_paths = + OwnedKeyPaths::from_borrowed_key_paths(&key_paths); + + unsafe { + match hash_to_index[i].insert_and_entry(hash_value) { + Ok(e) => { + let v = e.get_mut(); + *v = index; + } + Err(e) => { + let v = e.get_mut(); + *v = index; + } + } + } + + self.virtual_paths[i].insert(owned_key_paths, index); + self.virtual_values.push(vec![scalar_value]); + } } - // Add values from current block - padded_values.extend(values); - self.virtual_values[i].insert(key_path, padded_values); } } } @@ -195,27 +232,99 @@ impl VirtualColumnBuilder { Ok(()) } + fn jsonb_value_to_scalar(value: JsonbValue<'_>) -> Scalar { + match value { + JsonbValue::Null => Scalar::Null, + JsonbValue::Bool(v) => Scalar::Boolean(v), + JsonbValue::String(s) => Scalar::String(s.to_string()), + JsonbValue::Number(n) => match n { + JsonbNumber::Int64(v) => Scalar::Number(NumberScalar::Int64(v)), + JsonbNumber::UInt64(v) => Scalar::Number(NumberScalar::UInt64(v)), + JsonbNumber::Float64(v) => Scalar::Number(NumberScalar::Float64(v.into())), + JsonbNumber::Decimal64(v) => Scalar::Decimal(DecimalScalar::Decimal64( + v.value, + DecimalSize::new_unchecked(i64::MAX_PRECISION, v.scale), + )), + JsonbNumber::Decimal128(v) => Scalar::Decimal(DecimalScalar::Decimal128( + v.value, + DecimalSize::new_unchecked(i128::MAX_PRECISION, v.scale), + )), + JsonbNumber::Decimal256(v) => Scalar::Decimal(DecimalScalar::Decimal256( + i256(v.value), + DecimalSize::new_unchecked(i256::MAX_PRECISION, v.scale), + )), + }, + JsonbValue::Binary(v) => Scalar::Binary(v.to_vec()), + JsonbValue::Date(v) => Scalar::Date(v.value), + JsonbValue::Timestamp(v) => Scalar::Timestamp(v.value), + JsonbValue::TimestampTz(v) => Scalar::Timestamp(v.value), + JsonbValue::Interval(v) => { + Scalar::Interval(months_days_micros::new(v.months, v.days, v.micros)) + } + _ => unreachable!(), + } + } + + fn scalar_to_jsonb_value(scalar: ScalarRef<'_>) -> JsonbValue<'_> { + match scalar { + ScalarRef::Null => JsonbValue::Null, + ScalarRef::Boolean(v) => JsonbValue::Bool(v), + ScalarRef::String(s) => JsonbValue::String(Cow::Borrowed(s)), + ScalarRef::Number(NumberScalar::Int64(n)) => JsonbValue::Number(JsonbNumber::Int64(n)), + ScalarRef::Number(NumberScalar::UInt64(n)) => { + JsonbValue::Number(JsonbNumber::UInt64(n)) + } + ScalarRef::Number(NumberScalar::Float64(n)) => { + JsonbValue::Number(JsonbNumber::Float64(n.0)) + } + ScalarRef::Decimal(DecimalScalar::Decimal64(v, size)) => { + JsonbValue::Number(JsonbNumber::Decimal64(JsonbDecimal64 { + value: v, + scale: size.scale(), + })) + } + ScalarRef::Decimal(DecimalScalar::Decimal128(v, size)) => { + JsonbValue::Number(JsonbNumber::Decimal128(JsonbDecimal128 { + value: v, + scale: size.scale(), + })) + } + ScalarRef::Decimal(DecimalScalar::Decimal256(v, size)) => { + JsonbValue::Number(JsonbNumber::Decimal256(JsonbDecimal256 { + value: v.0, + scale: size.scale(), + })) + } + ScalarRef::Binary(v) => JsonbValue::Binary(v), + ScalarRef::Date(v) => JsonbValue::Date(JsonbDate { value: v }), + ScalarRef::Timestamp(v) => JsonbValue::Timestamp(JsonbTimestamp { value: v }), + ScalarRef::Interval(v) => JsonbValue::Interval(JsonbInterval { + months: v.months(), + days: v.days(), + micros: v.microseconds(), + }), + _ => unreachable!(), + } + } + #[async_backtrace::framed] pub fn finalize( &mut self, write_settings: &WriteSettings, location: &Location, ) -> Result { - let mut virtual_values = Vec::with_capacity(self.variant_fields.len()); - for _ in 0..self.variant_fields.len() { - virtual_values.push(BTreeMap::new()); + let mut virtual_values = Vec::with_capacity(self.virtual_values.len()); + for _ in 0..self.virtual_values.len() { + virtual_values.push(Vec::new()); } std::mem::swap(&mut self.virtual_values, &mut virtual_values); + let total_rows = self.total_rows; self.total_rows = 0; self.ignored_fields.clear(); - let mut virtual_field_num = 0; // Process the collected virtual values - for field_virtual_values in &mut virtual_values { - Self::discard_virtual_values(total_rows, virtual_field_num, field_virtual_values); - virtual_field_num += field_virtual_values.len(); - } + let virtual_field_num = self.discard_virtual_values(total_rows, &mut virtual_values); // If after discarding, no virtual values remain, return empty state if virtual_field_num == 0 { @@ -231,20 +340,20 @@ impl VirtualColumnBuilder { }); } - let mut virtual_column_names = Vec::new(); - let mut virtual_fields = Vec::new(); - let mut virtual_columns = Vec::new(); + let mut virtual_column_names = Vec::with_capacity(virtual_field_num); + let mut virtual_fields = Vec::with_capacity(virtual_field_num); + let mut virtual_columns = Vec::with_capacity(virtual_field_num); + // use a tmp column id to generate statistics for virtual columns. let mut tmp_column_id = 0; - - for (source_field, field_virtual_values) in - self.variant_fields.iter().zip(virtual_values.into_iter()) + for (source_field, field_virtual_paths) in + self.variant_fields.iter().zip(self.virtual_paths.iter()) { - let value_types = Self::inference_data_type(&field_virtual_values); - for ((key_paths, vals), val_type) in field_virtual_values - .into_iter() - .zip(value_types.into_iter()) - { + for (field_virtual_path, index) in field_virtual_paths { + if virtual_values[*index].is_empty() { + continue; + } + let val_type = Self::inference_data_type(&virtual_values[*index]); let virtual_type = match val_type { VariantDataType::Jsonb => DataType::Nullable(Box::new(DataType::Variant)), VariantDataType::Boolean => DataType::Nullable(Box::new(DataType::Boolean)), @@ -258,54 +367,81 @@ impl VirtualColumnBuilder { DataType::Nullable(Box::new(DataType::Number(NumberDataType::Float64))) } VariantDataType::String => DataType::Nullable(Box::new(DataType::String)), - _ => todo!(), + VariantDataType::Decimal(ty) => { + DataType::Nullable(Box::new(DataType::Decimal(ty.size()))) + } + VariantDataType::Binary => DataType::Nullable(Box::new(DataType::Binary)), + VariantDataType::Date => DataType::Nullable(Box::new(DataType::Date)), + VariantDataType::Timestamp => DataType::Nullable(Box::new(DataType::Timestamp)), + VariantDataType::Interval => DataType::Nullable(Box::new(DataType::Interval)), + _ => unreachable!(), }; - // create column - let column = match val_type { - VariantDataType::Jsonb => VariantType::from_opt_data(vals), - VariantDataType::Boolean => BooleanType::from_opt_data( - vals.into_iter() - .map(|v| v.map(|v| RawJsonb::new(&v).as_bool().unwrap().unwrap())) - .collect(), - ), - VariantDataType::UInt64 => UInt64Type::from_opt_data( - vals.into_iter() - .map(|v| v.map(|v| RawJsonb::new(&v).as_u64().unwrap().unwrap())) - .collect(), - ), - VariantDataType::Int64 => Int64Type::from_opt_data( - vals.into_iter() - .map(|v| v.map(|v| RawJsonb::new(&v).as_i64().unwrap().unwrap())) - .collect(), - ), - VariantDataType::Float64 => Float64Type::from_opt_data( - vals.into_iter() - .map(|v| v.map(|v| RawJsonb::new(&v).as_f64().unwrap().unwrap())) - .collect(), - ), - VariantDataType::String => StringType::from_opt_data( - vals.into_iter() - .map(|v| { - v.map(|v| RawJsonb::new(&v).as_str().unwrap().unwrap().to_string()) - }) - .collect(), - ), - _ => todo!(), + let mut last_row = 0; + let first_row = virtual_values[*index][0].row; + let column = if matches!(val_type, VariantDataType::Jsonb) { + let mut bitmap = MutableBitmap::from_len_zeroed(total_rows); + let mut builder = BinaryColumnBuilder::with_capacity(total_rows, 0); + for _ in 0..first_row { + builder.commit_row(); + } + for val in &virtual_values[*index] { + if val.row - last_row > 1 { + for _ in last_row..val.row { + builder.commit_row(); + } + } + bitmap.set(val.row, true); + let jsonb_value = Self::scalar_to_jsonb_value(val.scalar.as_ref()); + jsonb_value.write_to_vec(&mut builder.data); + builder.commit_row(); + last_row = val.row; + } + if last_row < total_rows - 1 { + for _ in last_row..total_rows { + builder.commit_row(); + } + } + let nullable_column = NullableColumn { + column: Column::Variant(builder.build()), + validity: bitmap.into(), + }; + Column::Nullable(Box::new(nullable_column)) + } else { + let mut builder = ColumnBuilder::with_capacity(&virtual_type, total_rows); + if first_row != 0 { + let default_len = first_row; + builder.push_repeat(&ScalarRef::Null, default_len); + last_row = first_row; + } + for val in &virtual_values[*index] { + if val.row - last_row > 1 { + let default_len = val.row - last_row - 1; + builder.push_repeat(&ScalarRef::Null, default_len); + } + builder.push(val.scalar.as_ref()); + last_row = val.row; + } + if last_row < total_rows - 1 { + let default_len = total_rows - last_row - 1; + builder.push_repeat(&ScalarRef::Null, default_len); + } + builder.build() }; + let virtual_table_type = infer_schema_type(&virtual_type).unwrap(); virtual_columns.push(column.into()); let mut key_name = String::new(); - for path in key_paths { + for path in &field_virtual_path.paths { key_name.push('['); match path { - KeyPath::Index(idx) => { + OwnedKeyPath::Index(idx) => { key_name.push_str(&format!("{idx}")); } - KeyPath::Name(name) => { + OwnedKeyPath::Name(name) => { key_name.push('\''); - key_name.push_str(&name); + key_name.push_str(name); key_name.push('\''); } } @@ -368,215 +504,100 @@ impl VirtualColumnBuilder { }) } - fn collect_virtual_values<'a>( - val: &JsonbValue<'a>, - row: usize, - virtual_field_num: usize, - paths: &mut VecDeque, - virtual_values: &mut BTreeMap, Vec>>>, - ) { - if virtual_values.len() + virtual_field_num > VIRTUAL_COLUMNS_LIMIT { - return; - } - match val { - JsonbValue::Object(obj) => { - for (key, val) in obj { - paths.push_back(KeyPath::Name(key.clone())); - Self::collect_virtual_values( - val, - row, - virtual_field_num, - paths, - virtual_values, - ); - paths.pop_back(); - } - return; - } - JsonbValue::Array(arr) => { - for (i, val) in arr.iter().enumerate() { - paths.push_back(KeyPath::Index(i as u32)); - Self::collect_virtual_values( - val, - row, - virtual_field_num, - paths, - virtual_values, - ); - paths.pop_back(); - } - return; - } - _ => {} - } - - // ignore root scalar values - if paths.is_empty() { - return; - } - - // only collect leaf node scalar values. - let path_names = paths.iter().cloned().collect(); - - if let Some(vals) = virtual_values.get_mut(&path_names) { - while vals.len() < row { - vals.push(None); - } - let buf = val.to_vec(); - vals.push(Some(buf)); - } else { - let mut vals = Vec::with_capacity(row + 1); - for _ in 0..row { - vals.push(None); - } - let buf = val.to_vec(); - vals.push(Some(buf)); - virtual_values.insert(path_names, vals); - } - } - - fn check_sample_virtual_values( - sample_rows: usize, - virtual_values: &mut BTreeMap, Vec>>>, - ) -> bool { - // All values are NULL or scalar Variant value. - if virtual_values.is_empty() { - return true; - } - // Fill in the NULL values, keeping each column the same length. - for (_, vals) in virtual_values.iter_mut() { - while vals.len() < sample_rows { - vals.push(None); - } - } - - let mut most_null_count = 0; - for (_, value) in virtual_values.iter() { - let null_count = value.iter().filter(|x| x.is_none()).count(); - let null_percentage = null_count as f64 / value.len() as f64; - if null_percentage > 0.7 { - most_null_count += 1; - } - } - let most_null_percentage = most_null_count as f64 / virtual_values.len() as f64; - most_null_percentage > 0.5 - } - fn discard_virtual_values( + &mut self, num_rows: usize, - virtual_field_num: usize, - virtual_values: &mut BTreeMap, Vec>>>, - ) { + virtual_values: &mut [Vec] + ) -> usize { if virtual_values.is_empty() { - return; - } - // Fill in the NULL values, keeping each column the same length. - for (_, vals) in virtual_values.iter_mut() { - while vals.len() < num_rows { - vals.push(None); - } + return 0; } // 1. Discard virtual columns with most values are Null values. - let mut keys_to_remove_none = Vec::new(); - for (key, value) in virtual_values.iter() { - let null_count = value.iter().filter(|x| x.is_none()).count(); - let null_percentage = null_count as f64 / value.len() as f64; + // let mut keys_to_remove_none = Vec::new(); + for values in virtual_values.iter_mut() { + if values.is_empty() { + continue; + } + let not_null_count = values + .iter() + .filter(|x| !matches!(x.scalar, Scalar::Null)) + .count(); + let null_count = num_rows - not_null_count; + let null_percentage = null_count as f64 / num_rows as f64; if null_percentage > 0.7 { - keys_to_remove_none.push(key.clone()); + values.clear(); } } - for key in keys_to_remove_none { - virtual_values.remove(&key); - } // 2. Discard names with the same prefix and ensure that the values of the virtual columns are leaf nodes // for example, we have following variant values. // {"k1":{"k2":"val"}} // {"k1":100} // we should not create virtual column for `k1`. - let mut keys_to_remove_prefix = Vec::new(); - let mut keys: Vec> = virtual_values.keys().cloned().collect(); - keys.sort_by_key(|k| k.len()); - - for i in 0..keys.len() { - let key1 = &keys[i]; - for key2 in keys.iter().skip(i + 1) { - if key2.starts_with(key1) { - keys_to_remove_prefix.push(key1.clone()); - break; + for virtual_paths in &self.virtual_paths { + for (virtual_path, index) in virtual_paths { + if virtual_values[*index].is_empty() { + continue; + } + for other_virtual_path in virtual_paths.keys() { + if virtual_path.is_prefix_path(other_virtual_path) { + virtual_values[*index].clear(); + } } } } - keys_to_remove_prefix.sort(); - keys_to_remove_prefix.dedup(); - - for key in keys_to_remove_prefix { - virtual_values.remove(&key); - } - // 3. Discard redundant virtual values to avoid generating too much virtual fields. - if virtual_field_num + virtual_values.len() > VIRTUAL_COLUMNS_LIMIT { - let redundant_num = virtual_field_num + virtual_values.len() - VIRTUAL_COLUMNS_LIMIT; - for _ in 0..redundant_num { - let _ = virtual_values.pop_last(); + let virtual_values_count = virtual_values.iter().filter(|x| !x.is_empty()).count(); + if virtual_values_count > VIRTUAL_COLUMNS_LIMIT { + let mut redundant_num = virtual_values_count - VIRTUAL_COLUMNS_LIMIT; + for virtual_value in virtual_values.iter_mut().rev() { + if redundant_num == 0 { + break; + } + if !virtual_value.is_empty() { + virtual_value.clear(); + redundant_num -= 1; + } } + VIRTUAL_COLUMNS_LIMIT + } else { + virtual_values_count } } - fn inference_data_type( - virtual_values: &BTreeMap, Vec>>>, - ) -> Vec { - let mut val_types = Vec::with_capacity(virtual_values.len()); - let mut val_type_set = BTreeSet::new(); - for (_, vals) in virtual_values.iter() { - val_type_set.clear(); - let mut max_u64 = u64::MIN; - let mut min_i64 = i64::MAX; - for buf in vals.iter().flatten() { - let val = from_slice(buf).unwrap(); - let ty = match val { - JsonbValue::Bool(_) => VariantDataType::Boolean, - JsonbValue::Number(JsonbNumber::UInt64(n)) => { - if n >= max_u64 { - max_u64 = n; - } - VariantDataType::UInt64 - } - JsonbValue::Number(JsonbNumber::Int64(n)) => { - if n <= min_i64 { - min_i64 = n; - } - VariantDataType::Int64 - } - JsonbValue::Number(JsonbNumber::Float64(_)) => VariantDataType::Float64, - JsonbValue::String(_) => VariantDataType::String, - _ => VariantDataType::Jsonb, - }; - if !val_type_set.contains(&ty) { - val_type_set.insert(ty); + fn inference_data_type(virtual_values: &[JsonbScalarValue]) -> VariantDataType { + let mut val_type_set = HashSet::new(); + for val in virtual_values.iter() { + let ty = match val.scalar { + Scalar::Boolean(_) => VariantDataType::Boolean, + Scalar::Number(NumberScalar::UInt64(_)) => VariantDataType::UInt64, + Scalar::Number(NumberScalar::Int64(_)) => VariantDataType::Int64, + Scalar::Number(NumberScalar::Float64(_)) => VariantDataType::Float64, + Scalar::Decimal(decimal_scalar) => { + let size = decimal_scalar.size(); + VariantDataType::Decimal(DecimalDataType::from(size)) } + Scalar::String(_) => VariantDataType::String, + Scalar::Binary(_) => VariantDataType::Binary, + Scalar::Date(_) => VariantDataType::Date, + Scalar::Timestamp(_) => VariantDataType::Timestamp, + Scalar::Interval(_) => VariantDataType::Interval, + _ => VariantDataType::Jsonb, + }; + if !val_type_set.contains(&ty) { + val_type_set.insert(ty); } - // Try to combine Uint64 and Int64 into one type - if val_type_set.len() == 2 - && val_type_set.contains(&VariantDataType::UInt64) - && val_type_set.contains(&VariantDataType::Int64) - { - if min_i64 >= 0 { - val_type_set.remove(&VariantDataType::Int64); - } else if max_u64 <= i64::MAX as u64 { - val_type_set.remove(&VariantDataType::UInt64); - } + if val_type_set.len() == 2 { + return VariantDataType::Jsonb; } - let common_type = if val_type_set.len() != 1 { - VariantDataType::Jsonb - } else { - val_type_set.pop_first().unwrap() - }; - val_types.push(common_type); } - val_types + if val_type_set.len() == 1 { + val_type_set.into_iter().next().unwrap() + } else { + VariantDataType::Jsonb + } } fn file_meta_to_virtual_column_metas( @@ -616,13 +637,15 @@ impl VirtualColumnBuilder { ); let num_values = chunk_meta.num_values as u64; - let variant_type_code = VirtualColumnMeta::data_type_code(&variant_type); + let (variant_type_code, scale) = + VirtualColumnMeta::data_type_code(&variant_type); let column_stat = columns_statistics.remove(&tmp_column_id); let virtual_column_meta = VirtualColumnMeta { offset: col_start as u64, len: col_len as u64, num_values, data_type: variant_type_code, + scale, column_stat, }; @@ -645,11 +668,71 @@ impl VirtualColumnBuilder { } } -/// Represents a valid key path. -#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd)] -enum KeyPath { - /// represents the index of an Array - Index(u32), +/// Represents a set of key path chains. +#[derive(Debug, Clone, Eq, Hash, PartialEq)] +pub struct OwnedKeyPaths { + pub paths: Vec, +} + +#[derive(Debug, Clone, Eq, Hash, PartialEq)] +pub enum OwnedKeyPath { + /// represents the index of an Array. + Index(i32), /// represents the field name of an Object. Name(String), } + +impl OwnedKeyPath { + fn to_borrowed_key_path(&self) -> JsonbKeyPath<'_> { + match self { + OwnedKeyPath::Index(idx) => JsonbKeyPath::Index(*idx), + OwnedKeyPath::Name(name) => JsonbKeyPath::Name(Cow::Borrowed(name.as_str())), + } + } + + fn from_borrowed_key_path<'a>(key_path: &JsonbKeyPath<'a>) -> OwnedKeyPath { + match key_path { + JsonbKeyPath::Index(idx) => OwnedKeyPath::Index(*idx), + JsonbKeyPath::QuotedName(name) => OwnedKeyPath::Name(name.to_string()), + JsonbKeyPath::Name(name) => OwnedKeyPath::Name(name.to_string()), + } + } +} + +impl OwnedKeyPaths { + fn to_borrowed_key_paths(&self) -> JsonbKeyPaths<'_> { + let paths = self + .paths + .iter() + .map(|path| path.to_borrowed_key_path()) + .collect::>(); + JsonbKeyPaths { paths } + } + + fn from_borrowed_key_paths(key_paths: &JsonbKeyPaths) -> OwnedKeyPaths { + let paths = key_paths + .paths + .iter() + .map(|path| OwnedKeyPath::from_borrowed_key_path(path)) + .collect::>(); + OwnedKeyPaths { paths } + } + + fn is_prefix_path(&self, other: &OwnedKeyPaths) -> bool { + if self.paths.len() >= other.paths.len() { + return false; + } + for (self_path, other_path) in self.paths.iter().zip(other.paths.iter()) { + if !self_path.eq(other_path) { + return false; + } + } + true + } +} + +#[derive(Debug, Clone)] +struct JsonbScalarValue { + row: usize, + scalar: Scalar, +} diff --git a/src/query/storages/fuse/src/operations/common/processors/transform_merge_commit_meta.rs b/src/query/storages/fuse/src/operations/common/processors/transform_merge_commit_meta.rs index abfd4743a0969..be03324b32c12 100644 --- a/src/query/storages/fuse/src/operations/common/processors/transform_merge_commit_meta.rs +++ b/src/query/storages/fuse/src/operations/common/processors/transform_merge_commit_meta.rs @@ -17,7 +17,6 @@ use std::collections::BTreeSet; use databend_common_exception::Result; use databend_common_expression::DataBlock; -use databend_common_expression::VariantDataType; use databend_common_expression::VirtualDataField; use databend_common_expression::VirtualDataSchema; use databend_common_pipeline_transforms::processors::AccumulatingTransform; @@ -124,8 +123,7 @@ impl TransformMergeCommitMeta { if l_field.source_column_id == r_field.source_column_id && l_field.name == r_field.name { - let mut combined_data_types: BTreeSet = - BTreeSet::new(); + let mut combined_data_types = BTreeSet::new(); for dt in &l_field.data_types { combined_data_types.insert(dt.clone()); } diff --git a/src/query/storages/fuse/src/operations/common/processors/transform_serialize_block.rs b/src/query/storages/fuse/src/operations/common/processors/transform_serialize_block.rs index 34c2437eaf7f4..7df15bd56ca2c 100644 --- a/src/query/storages/fuse/src/operations/common/processors/transform_serialize_block.rs +++ b/src/query/storages/fuse/src/operations/common/processors/transform_serialize_block.rs @@ -164,12 +164,7 @@ impl TransformSerializeBlock { let inverted_index_builders = create_inverted_index_builders(&table.table_info.meta); - let virtual_column_builder = if ctx - .get_settings() - .get_enable_refresh_virtual_column_after_write() - .unwrap_or_default() - && table.support_virtual_columns() - { + let virtual_column_builder = if table.support_virtual_columns() { VirtualColumnBuilder::try_create(ctx.clone(), source_schema.clone()).ok() } else { None diff --git a/tests/sqllogictests/suites/ee/01_ee_system/01_0002_virtual_column.test b/tests/sqllogictests/suites/ee/01_ee_system/01_0002_virtual_column.test index 9f4d1601f21d0..c6fd6cf75246b 100644 --- a/tests/sqllogictests/suites/ee/01_ee_system/01_0002_virtual_column.test +++ b/tests/sqllogictests/suites/ee/01_ee_system/01_0002_virtual_column.test @@ -21,9 +21,6 @@ CREATE DATABASE test_virtual_column statement ok USE test_virtual_column -statement ok -set enable_experimental_virtual_column = 1; - statement ok drop table if exists t1 @@ -448,7 +445,7 @@ SELECT count(*) FROM fuse_virtual_column('test_virtual_column', 'tweets'); 6 statement ok -set enable_refresh_virtual_column_after_write = 0; +set enable_experimental_virtual_column = 0; statement ok INSERT INTO tweets FROM (SELECT * FROM tweets); @@ -458,6 +455,12 @@ SELECT count(*) FROM fuse_virtual_column('test_virtual_column', 'tweets'); ---- 6 +statement error +REFRESH VIRTUAL COLUMN FOR tweets; + +statement ok +set enable_experimental_virtual_column = 1; + statement ok REFRESH VIRTUAL COLUMN FOR tweets; @@ -494,9 +497,6 @@ select id, data['id'], data['create'], data['text'], data['user']['id'], data['r 13 13 "4/18" "u" 13 6 {"lat":3.0} 3.0 NULL NULL 13 13 "4/18" "u" 13 6 {"lat":3.0} 3.0 NULL NULL -statement ok -set enable_refresh_virtual_column_after_write = 1; - statement ok CREATE OR REPLACE TABLE STUDENT_GRADES ( SCHOOL_ID VARCHAR, @@ -677,9 +677,6 @@ FROM test_stream; 9 "Richard" 33 "Austin" "hiking" "cycling" 10 "Lisa" 26 "Chicago" "gaming" "reading" -statement ok -set enable_experimental_virtual_column = 0; - statement ok DROP DATABASE test_virtual_column diff --git a/tests/sqllogictests/suites/ee/04_ee_inverted_index/04_0000_inverted_index_base.test b/tests/sqllogictests/suites/ee/04_ee_inverted_index/04_0000_inverted_index_base.test index eee5357f86d79..f5fcdff3c61f4 100644 --- a/tests/sqllogictests/suites/ee/04_ee_inverted_index/04_0000_inverted_index_base.test +++ b/tests/sqllogictests/suites/ee/04_ee_inverted_index/04_0000_inverted_index_base.test @@ -476,7 +476,7 @@ idx2 INVERTED books(title, author, description)index_record='"basic"' tokenizer= query TII select name, index_size, inverted_index_size from system.tables where name='t1' and database='test_inverted_index'; ---- -t1 2828 2390 +t1 5316 2390 query III select row_count, bloom_filter_size, inverted_index_size from fuse_block('test_inverted_index', 't1') @@ -486,12 +486,12 @@ select row_count, bloom_filter_size, inverted_index_size from fuse_block('test_i query IIII select block_count, row_count, index_size, inverted_index_size from fuse_segment('test_inverted_index', 't1'); ---- -1 10 2828 2390 +1 10 5316 2390 query IIII select block_count, row_count, index_size, inverted_index_size from fuse_snapshot('test_inverted_index', 't1'); ---- -1 10 2828 2390 +1 10 5316 2390 statement ok CREATE TABLE t2 (id int, body string) diff --git a/tests/sqllogictests/suites/mode/standalone/ee/explain_virtual_column.test b/tests/sqllogictests/suites/mode/standalone/ee/explain_virtual_column.test index 7a3a7e80aaba6..867dad261db6f 100644 --- a/tests/sqllogictests/suites/mode/standalone/ee/explain_virtual_column.test +++ b/tests/sqllogictests/suites/mode/standalone/ee/explain_virtual_column.test @@ -24,9 +24,6 @@ CREATE DATABASE test_virtual_db statement ok USE test_virtual_db -statement ok -set enable_experimental_virtual_column = 1; - statement ok drop table if exists t1 @@ -743,9 +740,6 @@ drop table data_main statement ok drop table data_staging -statement ok -set enable_experimental_virtual_column = 0; - statement ok USE default