Skip to content

Commit 697506a

Browse files
authored
feat(query): Support fuse_virtual_column function to show virtual column size (#17910)
1 parent 6892f0c commit 697506a

File tree

11 files changed

+338
-6
lines changed

11 files changed

+338
-6
lines changed

src/query/service/src/table_functions/table_function_factory.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ use databend_common_storages_fuse::table_functions::FuseTimeTravelSizeFunc;
3030
use databend_common_storages_fuse::table_functions::FuseVacuumDropAggregatingIndex;
3131
use databend_common_storages_fuse::table_functions::FuseVacuumDropInvertedIndex;
3232
use databend_common_storages_fuse::table_functions::FuseVacuumTemporaryTable;
33+
use databend_common_storages_fuse::table_functions::FuseVirtualColumnFunc;
3334
use databend_common_storages_fuse::table_functions::SetCacheCapacity;
3435
use databend_common_storages_fuse::table_functions::TableFunctionTemplate;
3536
use databend_common_storages_iceberg::IcebergInspectTable;
@@ -191,6 +192,14 @@ impl TableFunctionFactory {
191192
),
192193
);
193194

195+
creators.insert(
196+
"fuse_virtual_column".to_string(),
197+
(
198+
next_id(),
199+
Arc::new(TableFunctionTemplate::<FuseVirtualColumnFunc>::create),
200+
),
201+
);
202+
194203
creators.insert(
195204
"fuse_statistic".to_string(),
196205
(

src/query/storages/common/table_meta/src/meta/column_oriented_segment/block_meta.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,15 @@ use super::ColumnOrientedSegment;
2121
use crate::meta::BlockMeta;
2222
use crate::meta::ColumnMeta;
2323
use crate::meta::ColumnMetaV0;
24+
use crate::meta::VirtualBlockMeta;
25+
2426
pub trait AbstractBlockMeta: Send + Sync + 'static + Sized {
2527
fn block_size(&self) -> u64;
2628
fn file_size(&self) -> u64;
2729
fn row_count(&self) -> u64;
2830
fn location_path(&self) -> String;
2931
fn col_metas(&self, col_ids: &HashSet<ColumnId>) -> HashMap<ColumnId, ColumnMeta>;
32+
fn virtual_block_meta(&self) -> Option<VirtualBlockMeta>;
3033
}
3134

3235
impl AbstractBlockMeta for BlockMeta {
@@ -54,6 +57,10 @@ impl AbstractBlockMeta for BlockMeta {
5457
fn location_path(&self) -> String {
5558
self.location.0.to_string()
5659
}
60+
61+
fn virtual_block_meta(&self) -> Option<VirtualBlockMeta> {
62+
self.virtual_block_meta.clone()
63+
}
5764
}
5865

5966
impl AbstractBlockMeta for ColumnOrientedBlockMeta {
@@ -98,6 +105,11 @@ impl AbstractBlockMeta for ColumnOrientedBlockMeta {
98105
.unwrap()
99106
.to_string()
100107
}
108+
109+
fn virtual_block_meta(&self) -> Option<VirtualBlockMeta> {
110+
// TODO
111+
None
112+
}
101113
}
102114

103115
pub struct ColumnOrientedBlockMeta {

src/query/storages/fuse/src/io/write/block_writer.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,12 +222,14 @@ impl BlockBuilder {
222222
.as_ref()
223223
.map(|v| v.size)
224224
.unwrap_or_default(),
225+
ngram_filter_index_size: bloom_index_state
226+
.as_ref()
227+
.map(|v| v.ngram_size)
228+
.unwrap_or_default(),
225229
compression: self.write_settings.table_compression.into(),
226230
inverted_index_size,
227231
virtual_block_meta: None,
228232
create_on: Some(Utc::now()),
229-
// TODO(kould): ngram index
230-
ngram_filter_index_size: None,
231233
};
232234

233235
let serialized = BlockSerialization {

src/query/storages/fuse/src/io/write/bloom_index_writer.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,33 @@ use crate::FuseStorageFormat;
4343
pub struct BloomIndexState {
4444
pub(crate) data: Vec<u8>,
4545
pub(crate) size: u64,
46+
pub(crate) ngram_size: Option<u64>,
4647
pub(crate) location: Location,
4748
pub(crate) column_distinct_count: HashMap<ColumnId, usize>,
4849
}
4950

5051
impl BloomIndexState {
5152
pub fn from_bloom_index(bloom_index: &BloomIndex, location: Location) -> Result<Self> {
5253
let index_block = bloom_index.serialize_to_data_block()?;
54+
// Calculate ngram index size
55+
let ngram_indexes = &bloom_index
56+
.filter_schema
57+
.fields()
58+
.iter()
59+
.enumerate()
60+
.filter(|(_, f)| f.name.starts_with("Ngram"))
61+
.map(|(i, _)| i)
62+
.collect::<Vec<_>>();
63+
let ngram_size = if !ngram_indexes.is_empty() {
64+
let mut ngram_size = 0;
65+
for i in ngram_indexes {
66+
let column = index_block.get_by_offset(*i);
67+
ngram_size += column.value.memory_size() as u64;
68+
}
69+
Some(ngram_size)
70+
} else {
71+
None
72+
};
5373
let mut data = Vec::with_capacity(DEFAULT_BLOCK_INDEX_BUFFER_SIZE);
5474
let _ = blocks_to_parquet(
5575
&bloom_index.filter_schema,
@@ -61,6 +81,7 @@ impl BloomIndexState {
6181
Ok(Self {
6282
data,
6383
size: data_size,
84+
ngram_size,
6485
location,
6586
column_distinct_count: bloom_index.column_distinct_count.clone(),
6687
})

src/query/storages/fuse/src/table_functions/fuse_block.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,14 @@ impl TableMetaFunc for FuseBlock {
6565
"inverted_index_size",
6666
TableDataType::Nullable(Box::new(TableDataType::Number(NumberDataType::UInt64))),
6767
),
68+
TableField::new(
69+
"ngram_index_size",
70+
TableDataType::Nullable(Box::new(TableDataType::Number(NumberDataType::UInt64))),
71+
),
72+
TableField::new(
73+
"virtual_column_size",
74+
TableDataType::Nullable(Box::new(TableDataType::Number(NumberDataType::UInt64))),
75+
),
6876
])
6977
}
7078

@@ -86,6 +94,8 @@ impl TableMetaFunc for FuseBlock {
8694
let mut bloom_filter_location = vec![];
8795
let mut bloom_filter_size = Vec::with_capacity(len);
8896
let mut inverted_index_size = Vec::with_capacity(len);
97+
let mut ngram_index_size = Vec::with_capacity(len);
98+
let mut virtual_column_size = Vec::with_capacity(len);
8999

90100
let segments_io = SegmentsIO::create(ctx.clone(), tbl.operator.clone(), tbl.schema());
91101

@@ -113,6 +123,13 @@ impl TableMetaFunc for FuseBlock {
113123
);
114124
bloom_filter_size.push(block.bloom_filter_index_size);
115125
inverted_index_size.push(block.inverted_index_size);
126+
ngram_index_size.push(block.ngram_filter_index_size);
127+
virtual_column_size.push(
128+
block
129+
.virtual_block_meta
130+
.as_ref()
131+
.map(|m| m.virtual_column_size),
132+
);
116133

117134
row_num += 1;
118135
if row_num >= limit {
@@ -157,6 +174,14 @@ impl TableMetaFunc for FuseBlock {
157174
DataType::Nullable(Box::new(DataType::Number(NumberDataType::UInt64))),
158175
Value::Column(UInt64Type::from_opt_data(inverted_index_size)),
159176
),
177+
BlockEntry::new(
178+
DataType::Nullable(Box::new(DataType::Number(NumberDataType::UInt64))),
179+
Value::Column(UInt64Type::from_opt_data(ngram_index_size)),
180+
),
181+
BlockEntry::new(
182+
DataType::Nullable(Box::new(DataType::Number(NumberDataType::UInt64))),
183+
Value::Column(UInt64Type::from_opt_data(virtual_column_size)),
184+
),
160185
],
161186
row_num,
162187
))

src/query/storages/fuse/src/table_functions/fuse_segment.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,11 @@ impl TableMetaFunc for FuseSegment {
5757
"bytes_compressed",
5858
TableDataType::Number(NumberDataType::UInt64),
5959
),
60+
TableField::new("index_size", TableDataType::Number(NumberDataType::UInt64)),
61+
TableField::new(
62+
"virtual_block_count",
63+
TableDataType::Nullable(Box::new(TableDataType::Number(NumberDataType::UInt64))),
64+
),
6065
])
6166
}
6267

@@ -75,6 +80,8 @@ impl TableMetaFunc for FuseSegment {
7580
let mut row_count: Vec<u64> = Vec::with_capacity(len);
7681
let mut compressed: Vec<u64> = Vec::with_capacity(len);
7782
let mut uncompressed: Vec<u64> = Vec::with_capacity(len);
83+
let mut index_size: Vec<u64> = Vec::with_capacity(len);
84+
let mut virtual_block_count: Vec<Option<u64>> = Vec::with_capacity(len);
7885
let mut file_location: Vec<String> = Vec::with_capacity(len);
7986

8087
let segments_io = SegmentsIO::create(ctx.clone(), tbl.operator.clone(), tbl.schema());
@@ -95,6 +102,8 @@ impl TableMetaFunc for FuseSegment {
95102
row_count.push(segment.summary.row_count);
96103
compressed.push(segment.summary.compressed_byte_size);
97104
uncompressed.push(segment.summary.uncompressed_byte_size);
105+
index_size.push(segment.summary.index_size);
106+
virtual_block_count.push(segment.summary.virtual_block_count);
98107
file_location.push(segment_locations[idx].0.clone());
99108

100109
row_num += 1;
@@ -116,6 +125,8 @@ impl TableMetaFunc for FuseSegment {
116125
UInt64Type::from_data(row_count),
117126
UInt64Type::from_data(uncompressed),
118127
UInt64Type::from_data(compressed),
128+
UInt64Type::from_data(index_size),
129+
UInt64Type::from_opt_data(virtual_block_count),
119130
]))
120131
}
121132
}

0 commit comments

Comments
 (0)