Skip to content

Commit 68c1e85

Browse files
authored
fix: incorrect index size during merge segments (#10817)
1 parent 90f689d commit 68c1e85

File tree

5 files changed

+58
-2
lines changed

5 files changed

+58
-2
lines changed

src/query/service/tests/it/storages/fuse/operations/commit.rs

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ use common_catalog::table_context::StageAttachment;
3333
use common_catalog::table_context::TableContext;
3434
use common_exception::ErrorCode;
3535
use common_exception::Result;
36+
use common_expression::BlockThresholds;
3637
use common_expression::DataBlock;
3738
use common_expression::FunctionContext;
3839
use common_io::prelude::FormatSettings;
@@ -71,15 +72,22 @@ use common_settings::ChangeValue;
7172
use common_settings::Settings;
7273
use common_storage::DataOperator;
7374
use common_storage::StageFileInfo;
75+
use common_storages_fuse::io::SegmentWriter;
76+
use common_storages_fuse::io::TableMetaLocationGenerator;
7477
use common_storages_fuse::operations::AppendOperationLogEntry;
78+
use common_storages_fuse::statistics::reducers::reduce_block_metas;
7579
use common_storages_fuse::FuseTable;
7680
use common_storages_fuse::FUSE_TBL_SNAPSHOT_PREFIX;
7781
use databend_query::sessions::QueryContext;
7882
use futures::TryStreamExt;
83+
use rand::thread_rng;
84+
use rand::Rng;
7985
use storages_common_table_meta::meta::SegmentInfo;
8086
use storages_common_table_meta::meta::Statistics;
8187
use walkdir::WalkDir;
8288

89+
use crate::storages::fuse::block_writer::BlockWriter;
90+
use crate::storages::fuse::operations::mutation::CompactSegmentTestFixture;
8391
use crate::storages::fuse::table_test_fixture::execute_query;
8492
use crate::storages::fuse::table_test_fixture::TestFixture;
8593

@@ -301,6 +309,48 @@ async fn test_abort_on_error() -> Result<()> {
301309
Ok(())
302310
}
303311

312+
#[tokio::test(flavor = "multi_thread")]
313+
async fn test_merge_segments() -> common_exception::Result<()> {
314+
let fixture = TestFixture::new().await;
315+
let ctx = fixture.ctx();
316+
317+
let operator = ctx.get_data_operator()?.operator();
318+
let data_accessor = operator.clone();
319+
let location_gen = TableMetaLocationGenerator::with_prefix("test/".to_owned());
320+
let block_writer = BlockWriter::new(&data_accessor, &location_gen);
321+
let segment_writer = SegmentWriter::new(&data_accessor, &location_gen);
322+
323+
let mut rand = thread_rng();
324+
let number_of_segments: usize = rand.gen_range(1..10);
325+
let mut block_number_of_segments = Vec::with_capacity(number_of_segments);
326+
let mut rows_per_blocks = Vec::with_capacity(number_of_segments);
327+
for _ in 0..number_of_segments {
328+
block_number_of_segments.push(rand.gen_range(10..30));
329+
rows_per_blocks.push(rand.gen_range(1..8));
330+
}
331+
332+
let threshold = BlockThresholds {
333+
max_rows_per_block: 5,
334+
min_rows_per_block: 4,
335+
max_bytes_per_block: 1024,
336+
};
337+
338+
let (locations, block_metas, segment_infos) = CompactSegmentTestFixture::gen_segments(
339+
&block_writer,
340+
&segment_writer,
341+
&block_number_of_segments,
342+
&rows_per_blocks,
343+
threshold,
344+
)
345+
.await?;
346+
347+
let expect = reduce_block_metas(&block_metas, threshold)?;
348+
let iter = locations.iter().zip(segment_infos.iter());
349+
let (_, results) = FuseTable::merge_segments(iter)?;
350+
assert_eq!(expect, results);
351+
Ok(())
352+
}
353+
304354
struct CtxDelegation {
305355
ctx: Arc<dyn TableContext>,
306356
catalog: Arc<FakedCatalog>,

src/query/service/tests/it/storages/fuse/operations/mutation/block_compact_mutator.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ async fn test_safety() -> Result<()> {
171171
&segment_writer,
172172
&block_number_of_segments,
173173
&rows_per_blocks,
174+
threshold,
174175
)
175176
.await?;
176177

src/query/service/tests/it/storages/fuse/operations/mutation/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,4 @@ mod recluster_mutator;
1818
mod segments_compact_mutator;
1919

2020
pub use deletion::do_deletion;
21+
pub use segments_compact_mutator::CompactSegmentTestFixture;

src/query/service/tests/it/storages/fuse/operations/mutation/segments_compact_mutator.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ use common_exception::ErrorCode;
2222
use common_exception::Result;
2323
use common_expression::types::number::NumberColumn;
2424
use common_expression::types::number::NumberScalar;
25+
use common_expression::BlockThresholds;
2526
use common_expression::Column;
2627
use common_expression::DataBlock;
2728
use common_expression::Scalar;
@@ -666,6 +667,7 @@ impl CompactSegmentTestFixture {
666667
&segment_writer,
667668
num_block_of_segments,
668669
&rows_per_block,
670+
BlockThresholds::default(),
669671
)
670672
.await?;
671673
self.input_blocks = blocks;
@@ -682,6 +684,7 @@ impl CompactSegmentTestFixture {
682684
segment_writer: &SegmentWriter<'_>,
683685
block_num_of_segments: &[usize],
684686
rows_per_blocks: &[usize],
687+
thresholds: BlockThresholds,
685688
) -> Result<(Vec<Location>, Vec<BlockMeta>, Vec<SegmentInfo>)> {
686689
let mut locations = vec![];
687690
let mut collected_blocks = vec![];
@@ -693,7 +696,7 @@ impl CompactSegmentTestFixture {
693696
{
694697
let (schema, blocks) =
695698
TestFixture::gen_sample_blocks_ex(*num_blocks, *rows_per_block, 1);
696-
let mut stats_acc = StatisticsAccumulator::default();
699+
let mut stats_acc = StatisticsAccumulator::new(thresholds);
697700
for block in blocks {
698701
let block = block?;
699702
let col_stats = gen_columns_statistics(&block, None, &schema)?;

src/query/storages/fuse/src/operations/commit.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -466,9 +466,10 @@ impl FuseTable {
466466
let stats = &segment_info.summary;
467467
acc.row_count += stats.row_count;
468468
acc.block_count += stats.block_count;
469+
acc.perfect_block_count += stats.perfect_block_count;
469470
acc.uncompressed_byte_size += stats.uncompressed_byte_size;
470471
acc.compressed_byte_size += stats.compressed_byte_size;
471-
acc.index_size = stats.index_size;
472+
acc.index_size += stats.index_size;
472473
acc.col_stats = if acc.col_stats.is_empty() {
473474
stats.col_stats.clone()
474475
} else {

0 commit comments

Comments
 (0)