Skip to content

Commit 8c0db6a

Browse files
committed
update
1 parent 54c122c commit 8c0db6a

File tree

13 files changed

+400
-34
lines changed

13 files changed

+400
-34
lines changed

src/common/storage/src/meta_hll.rs

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -117,18 +117,6 @@ impl MetaHLL {
117117
pub fn num_empty_registers(&self) -> usize {
118118
self.registers.iter().filter(|x| **x == 0).count()
119119
}
120-
121-
pub fn from_hll(hll: MetaHLL12) -> Self {
122-
let registers = hll.get_registers();
123-
let mut new_registers = vec![0; M];
124-
let group_size = registers.len() / M;
125-
for i in 0..M {
126-
for j in 0..group_size {
127-
new_registers[i] = new_registers[i].max(registers[i * group_size + j]);
128-
}
129-
}
130-
Self::with_registers(new_registers)
131-
}
132120
}
133121

134122
/// Helper function sigma as defined in
@@ -180,6 +168,20 @@ fn hll_tau(x: f64) -> f64 {
180168
}
181169
}
182170

171+
impl From<MetaHLL12> for MetaHLL {
172+
fn from(value: MetaHLL12) -> Self {
173+
let registers = value.get_registers();
174+
let mut new_registers = vec![0; M];
175+
let group_size = registers.len() / M;
176+
for i in 0..M {
177+
for j in 0..group_size {
178+
new_registers[i] = new_registers[i].max(registers[i * group_size + j]);
179+
}
180+
}
181+
Self::with_registers(new_registers)
182+
}
183+
}
184+
183185
#[derive(serde::Serialize, borsh::BorshSerialize)]
184186
enum MetaHLLVariantRef<'a> {
185187
Empty,
@@ -393,7 +395,7 @@ mod tests {
393395
hll.add_object(&i);
394396
}
395397

396-
let hll = MetaHLL::from_hll(hll);
398+
let hll = MetaHLL::from(hll);
397399
let count = hll.count();
398400
let error_rate = 1.04 / ((M as f64).sqrt());
399401
let diff = count as f64 / 100_000f64;

src/query/service/src/interpreters/interpreter_table_analyze.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,11 @@ impl Interpreter for AnalyzeTableInterpreter {
136136
let table_statistics = table
137137
.read_table_snapshot_statistics(Some(&snapshot))
138138
.await?;
139+
if let Some(table_statistics) = &table_statistics {
140+
if table_statistics.snapshot_id == snapshot.snapshot_id {
141+
return Ok(PipelineBuildResult::create());
142+
}
143+
}
139144

140145
// plan sql
141146
let (is_full, temporal_str) = if let Some(table_statistics) = &table_statistics {

src/query/service/src/pipelines/builders/builder_mutation_source.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ impl PipelineBuilder {
8080
Vec::with_capacity(mutation_source.partitions.partitions.len());
8181
for part in &mutation_source.partitions.partitions {
8282
// Safe to downcast because we know the partition is lazy
83-
let part: &FuseLazyPartInfo = FuseLazyPartInfo::from_part(part)?;
83+
let part = FuseLazyPartInfo::from_part(part)?;
8484
segment_locations.push(SegmentLocation {
8585
segment_idx: part.segment_index,
8686
location: part.segment_location.clone(),

src/query/service/tests/it/storages/fuse/operations/table_analyze.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ async fn test_table_analyze_without_prev_table_seq() -> Result<()> {
219219
let hll: HashMap<ColumnId, MetaHLL12> =
220220
HashMap::from([(0, borsh_deserialize_from_slice(&col)?)]);
221221
let table_statistics =
222-
TableSnapshotStatistics::new(hll, HashMap::new(), snapshot_1.snapshot_id);
222+
TableSnapshotStatistics::new(hll, HashMap::new(), snapshot_1.snapshot_id, 14);
223223
let table_statistics_location = location_gen.snapshot_statistics_location_from_uuid(
224224
&table_statistics.snapshot_id,
225225
table_statistics.format_version(),

src/query/storages/common/table_meta/src/meta/current/mod.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,13 @@ pub use v2::SegmentStatistics;
2525
pub use v2::Statistics;
2626
pub use v2::VirtualBlockMeta;
2727
pub use v2::VirtualColumnMeta;
28-
pub use v3::TableSnapshotStatistics;
2928
pub use v4::CompactSegmentInfo;
3029
pub use v4::RawBlockMeta;
3130
pub use v4::SegmentInfo;
3231
pub use v4::TableSnapshot;
3332
pub use v4::TableSnapshotLite;
33+
pub use v4::TableSnapshotStatistics;
3434

3535
use super::v0;
3636
use super::v2;
37-
use super::v3;
3837
use super::v4;

src/query/storages/common/table_meta/src/meta/v3/table_snapshot_statistics.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ impl From<v2::TableSnapshotStatistics> for TableSnapshotStatistics {
6868
Self {
6969
format_version: TableSnapshotStatistics::VERSION,
7070
snapshot_id: value.snapshot_id,
71-
hll: HashMap::new(),
71+
hll: value.hll,
7272
histograms: HashMap::new(),
7373
}
7474
}

src/query/storages/common/table_meta/src/meta/v4/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,4 @@ pub use segment::RawBlockMeta;
2121
pub use segment::SegmentInfo;
2222
pub use snapshot::TableSnapshot;
2323
pub use snapshot::TableSnapshotLite;
24+
pub use table_snapshot_statistics::TableSnapshotStatistics;

src/query/storages/common/table_meta/src/meta/v4/table_snapshot_statistics.rs

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,12 @@ use databend_common_expression::ColumnId;
1818
use databend_common_storage::Histogram;
1919
use databend_common_storage::MetaHLL;
2020

21+
use crate::meta::v1;
22+
use crate::meta::v2;
23+
use crate::meta::v3;
2124
use crate::meta::FormatVersion;
2225
use crate::meta::SnapshotId;
26+
use crate::meta::Versioned;
2327

2428
#[derive(serde::Serialize, serde::Deserialize, Clone, Debug)]
2529
pub struct TableSnapshotStatistics {
@@ -31,3 +35,69 @@ pub struct TableSnapshotStatistics {
3135
pub hll: HashMap<ColumnId, MetaHLL>,
3236
pub histograms: HashMap<ColumnId, Histogram>,
3337
}
38+
39+
impl TableSnapshotStatistics {
40+
pub fn new(
41+
hll: HashMap<ColumnId, MetaHLL>,
42+
histograms: HashMap<ColumnId, Histogram>,
43+
snapshot_id: SnapshotId,
44+
row_count: usize,
45+
) -> Self {
46+
Self {
47+
format_version: TableSnapshotStatistics::VERSION,
48+
snapshot_id,
49+
hll,
50+
histograms,
51+
row_count,
52+
}
53+
}
54+
55+
pub fn format_version(&self) -> u64 {
56+
self.format_version
57+
}
58+
59+
pub fn column_distinct_values(&self) -> HashMap<ColumnId, u64> {
60+
self.hll
61+
.iter()
62+
.map(|hll| (*hll.0, hll.1.count() as u64))
63+
.collect()
64+
}
65+
}
66+
67+
impl From<v1::TableSnapshotStatistics> for TableSnapshotStatistics {
68+
fn from(value: v1::TableSnapshotStatistics) -> Self {
69+
Self {
70+
format_version: TableSnapshotStatistics::VERSION,
71+
snapshot_id: value.snapshot_id,
72+
row_count: 0,
73+
hll: HashMap::new(),
74+
histograms: HashMap::new(),
75+
}
76+
}
77+
}
78+
79+
impl From<v2::TableSnapshotStatistics> for TableSnapshotStatistics {
80+
fn from(value: v2::TableSnapshotStatistics) -> Self {
81+
let hll = value.hll.into_iter().map(|(k, v)| (k, v.into())).collect();
82+
Self {
83+
format_version: TableSnapshotStatistics::VERSION,
84+
snapshot_id: value.snapshot_id,
85+
row_count: 0,
86+
hll,
87+
histograms: HashMap::new(),
88+
}
89+
}
90+
}
91+
92+
impl From<v3::TableSnapshotStatistics> for TableSnapshotStatistics {
93+
fn from(value: v3::TableSnapshotStatistics) -> Self {
94+
let hll = value.hll.into_iter().map(|(k, v)| (k, v.into())).collect();
95+
Self {
96+
format_version: TableSnapshotStatistics::VERSION,
97+
snapshot_id: value.snapshot_id,
98+
row_count: 0,
99+
hll,
100+
histograms: value.histograms,
101+
}
102+
}
103+
}

src/query/storages/common/table_meta/src/meta/versions.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,13 +105,15 @@ impl SnapshotVersion {
105105
impl Versioned<0> for v1::TableSnapshotStatistics {}
106106
impl Versioned<2> for v2::TableSnapshotStatistics {}
107107
impl Versioned<3> for v3::TableSnapshotStatistics {}
108+
impl Versioned<4> for v4::TableSnapshotStatistics {}
108109

109110
impl Versioned<2> for DataBlock {}
110111

111112
pub enum TableSnapshotStatisticsVersion {
112113
V0(PhantomData<v1::TableSnapshotStatistics>),
113114
V2(PhantomData<v2::TableSnapshotStatistics>),
114115
V3(PhantomData<v3::TableSnapshotStatistics>),
116+
V4(PhantomData<v4::TableSnapshotStatistics>),
115117
}
116118

117119
impl TableSnapshotStatisticsVersion {
@@ -120,6 +122,7 @@ impl TableSnapshotStatisticsVersion {
120122
TableSnapshotStatisticsVersion::V0(a) => Self::ver(a),
121123
TableSnapshotStatisticsVersion::V2(a) => Self::ver(a),
122124
TableSnapshotStatisticsVersion::V3(a) => Self::ver(a),
125+
TableSnapshotStatisticsVersion::V4(a) => Self::ver(a),
123126
}
124127
}
125128

@@ -206,8 +209,11 @@ mod converters {
206209
3 => Ok(TableSnapshotStatisticsVersion::V3(testify_version::<_, 3>(
207210
PhantomData,
208211
))),
212+
4 => Ok(TableSnapshotStatisticsVersion::V4(testify_version::<_, 4>(
213+
PhantomData,
214+
))),
209215
_ => Err(ErrorCode::Internal(format!(
210-
"unknown table snapshot statistics version {value}, versions supported: 0, 2, 3"
216+
"unknown table snapshot statistics version {value}, versions supported: 0, 2, 3, 4"
211217
))),
212218
}
213219
}

src/query/storages/common/table_meta/src/readers/versioned_reader.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,11 @@ impl VersionedReader<TableSnapshotStatistics> for TableSnapshotStatisticsVersion
4343
let ts = load_json(reader, v)?;
4444
TableSnapshotStatistics::from(ts)
4545
}
46-
TableSnapshotStatisticsVersion::V3(v) => load_json(reader, v)?,
46+
TableSnapshotStatisticsVersion::V3(v) => {
47+
let ts = load_json(reader, v)?;
48+
TableSnapshotStatistics::from(ts)
49+
}
50+
TableSnapshotStatisticsVersion::V4(v) => load_json(reader, v)?,
4751
};
4852
Ok(r)
4953
}

0 commit comments

Comments
 (0)