Skip to content

Commit 923fc36

Browse files
committed
update
1 parent fd3ad03 commit 923fc36

File tree

16 files changed

+84
-16
lines changed

16 files changed

+84
-16
lines changed

Cargo.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,7 @@ sha1 = "0.10.5"
491491
sha2 = "0.10.8"
492492
simdutf8 = "0.1.4"
493493
similar = "2.7.0"
494-
simple_hll = { version = "0.0.1", features = ["serde_borsh"] }
494+
simple_hll = { version = "0.0.4", features = ["serde_borsh"] }
495495
simsearch = "0.2"
496496
siphasher = "0.3"
497497
sled = { version = "0.34", default-features = false }

src/common/storage/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ prometheus-client = { workspace = true }
3737
regex = { workspace = true }
3838
reqwest = { workspace = true }
3939
serde = { workspace = true }
40+
simple_hll = { workspace = true, features = ["serde_borsh"] }
4041
thiserror = { workspace = true }
4142
url = { workspace = true }
4243

src/common/storage/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ pub use histogram::HistogramBucket;
8585
pub use histogram::DEFAULT_HISTOGRAM_BUCKETS;
8686
pub use merge::MutationStatus;
8787
pub use meta_hll::MetaHLL;
88+
pub use meta_hll::MetaHLL12;
8889
pub use multi_table_insert::MultiTableInsertStatus;
8990
pub use statistics::Datum;
9091
pub use statistics::F64;

src/common/storage/src/meta_hll.rs

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ use std::hash::Hash;
1818

1919
use ahash::RandomState;
2020

21+
pub type MetaHLL12 = simple_hll::HyperLogLog<12>;
22+
2123
const P: usize = 7_usize;
2224
const Q: usize = 64 - P;
2325
const M: usize = 1 << P;
@@ -51,6 +53,11 @@ impl MetaHLL {
5153
}
5254
}
5355

56+
pub fn with_registers(registers: Vec<u8>) -> Self {
57+
assert_eq!(registers.len(), M);
58+
Self { registers }
59+
}
60+
5461
/// Adds an hash to the MetaHLL.
5562
/// hash value is dertermined by caller
5663
#[inline]
@@ -67,7 +74,7 @@ impl MetaHLL {
6774

6875
/// Adds an object to the MetaHLL.
6976
/// Though we could pass different types into this method, caller should notice that
70-
pub fn add_object<T: Hash>(&mut self, obj: &T) {
77+
pub fn add_object<T: ?Sized + Hash>(&mut self, obj: &T) {
7178
let hash = SEED.hash_one(obj);
7279
self.add_hash(hash);
7380
}
@@ -110,6 +117,18 @@ impl MetaHLL {
110117
pub fn num_empty_registers(&self) -> usize {
111118
self.registers.iter().filter(|x| **x == 0).count()
112119
}
120+
121+
pub fn from_hll(hll: MetaHLL12) -> Self {
122+
let registers = hll.get_registers();
123+
let mut new_registers = vec![0; M];
124+
let group_size = registers.len() / M;
125+
for i in 0..M {
126+
for j in 0..group_size {
127+
new_registers[i] = new_registers[i].max(registers[i * group_size + j]);
128+
}
129+
}
130+
Self::with_registers(new_registers)
131+
}
113132
}
114133

115134
/// Helper function sigma as defined in
@@ -366,4 +385,20 @@ mod tests {
366385
}
367386
compare_with_delta(hll.count(), 1000);
368387
}
388+
389+
#[test]
390+
fn test_from_hll() {
391+
let mut hll = MetaHLL12::new();
392+
for i in 0..100_000 {
393+
hll.add_object(&i);
394+
}
395+
396+
let hll = MetaHLL::from_hll(hll);
397+
let count = hll.count();
398+
let error_rate = 1.04 / ((M as f64).sqrt());
399+
let diff = count as f64 / 100_000f64;
400+
401+
assert!(diff >= 1.0 - error_rate);
402+
assert!(diff <= 1.0 + error_rate);
403+
}
369404
}

src/query/service/tests/it/storages/fuse/operations/table_analyze.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ use databend_common_expression::types::number::NumberScalar;
2323
use databend_common_expression::ColumnId;
2424
use databend_common_expression::Scalar;
2525
use databend_common_io::prelude::borsh_deserialize_from_slice;
26+
use databend_common_storage::MetaHLL12;
2627
use databend_common_storages_fuse::io::MetaReaders;
2728
use databend_common_storages_fuse::io::MetaWriter;
2829
use databend_common_storages_fuse::statistics::reducers::merge_statistics_mut;
@@ -33,7 +34,6 @@ use databend_query::sql::plans::Plan;
3334
use databend_query::sql::Planner;
3435
use databend_query::test_kits::*;
3536
use databend_storages_common_cache::LoadParams;
36-
use databend_storages_common_table_meta::meta::MetaHLL12;
3737
use databend_storages_common_table_meta::meta::SegmentInfo;
3838
use databend_storages_common_table_meta::meta::Statistics;
3939
use databend_storages_common_table_meta::meta::TableSnapshot;

src/query/storages/common/table_meta/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ parquet = { workspace = true }
3030
rmp-serde = { workspace = true }
3131
serde = { workspace = true }
3232
serde_json = { workspace = true }
33-
simple_hll = { workspace = true, features = ["serde_borsh"] }
3433
snap = { workspace = true, optional = true }
3534
typetag = { workspace = true }
3635
zstd = { workspace = true }

src/query/storages/common/table_meta/src/meta/current/mod.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ pub use v2::ColumnStatistics;
2121
pub use v2::DraftVirtualBlockMeta;
2222
pub use v2::DraftVirtualColumnMeta;
2323
pub use v2::ExtendedBlockMeta;
24-
pub use v2::MetaHLL12;
2524
pub use v2::SegmentStatistics;
2625
pub use v2::Statistics;
2726
pub use v2::VirtualBlockMeta;

src/query/storages/common/table_meta/src/meta/v2/mod.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,5 +32,4 @@ pub use statistics::AdditionalStatsMeta;
3232
pub use statistics::ClusterStatistics;
3333
pub use statistics::ColumnStatistics;
3434
pub use statistics::Statistics;
35-
pub use table_snapshot_statistics::MetaHLL12;
3635
pub use table_snapshot_statistics::TableSnapshotStatistics;

src/query/storages/common/table_meta/src/meta/v2/table_snapshot_statistics.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
use std::collections::HashMap;
1616

1717
use databend_common_expression::ColumnId;
18+
use databend_common_storage::MetaHLL12;
1819
use serde::Deserialize;
1920
use serde::Serialize;
2021

@@ -23,8 +24,6 @@ use crate::meta::FormatVersion;
2324
use crate::meta::SnapshotId;
2425
use crate::meta::Versioned;
2526

26-
pub type MetaHLL12 = simple_hll::HyperLogLog<12>;
27-
2827
#[derive(Serialize, Deserialize, Clone, Debug)]
2928
pub struct TableSnapshotStatistics {
3029
/// format version of snapshot

0 commit comments

Comments
 (0)