1515use std:: collections:: HashMap ;
1616use std:: sync:: Arc ;
1717
18+ use common_arrow:: parquet:: metadata:: FileMetaData ;
19+ use common_arrow:: parquet:: metadata:: ThriftFileMetaData ;
1820use common_base:: base:: tokio;
21+ use common_cache:: Cache ;
1922use common_expression:: type_check:: check;
2023use common_expression:: types:: number:: Int32Type ;
2124use common_expression:: types:: number:: NumberScalar ;
@@ -35,6 +38,7 @@ use common_expression::Scalar;
3538use common_expression:: TableDataType ;
3639use common_expression:: TableField ;
3740use common_expression:: TableSchema ;
41+ use common_expression:: TableSchemaRefExt ;
3842use common_functions:: aggregates:: eval_aggr;
3943use common_functions:: scalars:: BUILTIN_FUNCTIONS ;
4044use common_sql:: evaluator:: BlockOperator ;
@@ -50,11 +54,19 @@ use databend_query::storages::fuse::statistics::ClusterStatsGenerator;
5054use databend_query:: storages:: fuse:: statistics:: StatisticsAccumulator ;
5155use opendal:: Operator ;
5256use rand:: Rng ;
57+ use storages_common_cache:: InMemoryCacheBuilder ;
58+ use storages_common_cache:: InMemoryItemCacheHolder ;
59+ use storages_common_index:: BloomIndexMetaMini ;
5360use storages_common_table_meta:: meta:: BlockMeta ;
5461use storages_common_table_meta:: meta:: ClusterStatistics ;
5562use storages_common_table_meta:: meta:: ColumnStatistics ;
5663use storages_common_table_meta:: meta:: Compression ;
5764use storages_common_table_meta:: meta:: Statistics ;
65+ use sysinfo:: get_current_pid;
66+ use sysinfo:: ProcessExt ;
67+ use sysinfo:: System ;
68+ use sysinfo:: SystemExt ;
69+ use uuid:: Uuid ;
5870
5971use crate :: storages:: fuse:: block_writer:: BlockWriter ;
6072use crate :: storages:: fuse:: table_test_fixture:: TestFixture ;
@@ -236,7 +248,7 @@ async fn test_accumulator() -> common_exception::Result<()> {
236248 let block = item?;
237249 let col_stats = gen_columns_statistics ( & block, None , & schema) ?;
238250 let block_writer = BlockWriter :: new ( & operator, & loc_generator) ;
239- let block_meta = block_writer
251+ let ( block_meta, _index_meta ) = block_writer
240252 . write ( FuseStorageFormat :: Parquet , & schema, block, col_stats, None )
241253 . await ?;
242254 stats_acc. add_with_block_meta ( block_meta) ;
@@ -553,3 +565,105 @@ fn test_reduce_block_meta() -> common_exception::Result<()> {
553565
554566 Ok ( ( ) )
555567}
568+
569+ fn populate_cache < T > ( cache : & InMemoryItemCacheHolder < T > , item : T , num_cache : usize )
570+ where T : Clone {
571+ let mut c = cache. write ( ) ;
572+ for _ in 0 ..num_cache {
573+ let uuid = Uuid :: new_v4 ( ) ;
574+ ( * c) . put (
575+ format ! ( "{}" , uuid. simple( ) ) ,
576+ std:: sync:: Arc :: new ( item. clone ( ) ) ,
577+ ) ;
578+ }
579+ }
580+
581+ async fn setup ( ) -> common_exception:: Result < ThriftFileMetaData > {
582+ let fields = ( 0 ..23 )
583+ . into_iter ( )
584+ . map ( |_| TableField :: new ( "id" , TableDataType :: Number ( NumberDataType :: Int32 ) ) )
585+ . collect :: < Vec < _ > > ( ) ;
586+
587+ let schema = TableSchemaRefExt :: create ( fields) ;
588+
589+ let mut columns = vec ! [ ] ;
590+ for _ in 0 ..schema. fields ( ) . len ( ) {
591+ // values do not matter
592+ let column = Int32Type :: from_data ( vec ! [ 1 ] ) ;
593+ columns. push ( column)
594+ }
595+
596+ let block = DataBlock :: new_from_columns ( columns) ;
597+ let operator = Operator :: new ( opendal:: services:: Memory :: default ( ) ) ?. finish ( ) ;
598+ let loc_generator = TableMetaLocationGenerator :: with_prefix ( "/" . to_owned ( ) ) ;
599+ let col_stats = gen_columns_statistics ( & block, None , & schema) ?;
600+ let block_writer = BlockWriter :: new ( & operator, & loc_generator) ;
601+ let ( _block_meta, thrift_file_meta) = block_writer
602+ . write ( FuseStorageFormat :: Parquet , & schema, block, col_stats, None )
603+ . await ?;
604+
605+ Ok ( thrift_file_meta. unwrap ( ) )
606+ }
607+
608+ fn show_memory_usage ( case : & str , base_memory_usage : u64 , num_cache_items : usize ) {
609+ let sys = System :: new_all ( ) ;
610+ let pid = get_current_pid ( ) . unwrap ( ) ;
611+ let process = sys. process ( pid) . unwrap ( ) ;
612+ {
613+ let memory_after = process. memory ( ) ;
614+ let delta = memory_after - base_memory_usage;
615+ let delta_gb = ( delta as f64 ) / 1024.0 / 1024.0 / 1024.0 ;
616+ eprintln ! (
617+ " cache type: {}, number of cached items {}, mem usage(B):{:+}, mem usage(GB){:+}" ,
618+ case, num_cache_items, delta, delta_gb
619+ ) ;
620+ }
621+ }
622+
623+ #[ tokio:: test( flavor = "multi_thread" ) ]
624+ #[ ignore]
625+ async fn test_index_meta_cache_size_file_meta_data ( ) -> common_exception:: Result < ( ) > {
626+ let thrift_file_meta = setup ( ) . await ?;
627+
628+ let cache_number = 300_000 ;
629+
630+ let meta: FileMetaData = FileMetaData :: try_from_thrift ( thrift_file_meta. clone ( ) ) ?;
631+
632+ let sys = System :: new_all ( ) ;
633+ let pid = get_current_pid ( ) . unwrap ( ) ;
634+ let process = sys. process ( pid) . unwrap ( ) ;
635+ let base_memory_usage = process. memory ( ) ;
636+
637+ let cache = InMemoryCacheBuilder :: new_item_cache :: < FileMetaData > ( cache_number as u64 ) ;
638+
639+ populate_cache ( & cache, meta, cache_number) ;
640+ show_memory_usage ( "FileMetaData" , base_memory_usage, cache_number) ;
641+
642+ drop ( cache) ;
643+
644+ Ok ( ( ) )
645+ }
646+
647+ #[ tokio:: test( flavor = "multi_thread" ) ]
648+ #[ ignore]
649+ async fn test_index_meta_cache_size_bloom_meta ( ) -> common_exception:: Result < ( ) > {
650+ let thrift_file_meta = setup ( ) . await ?;
651+
652+ let cache_number = 300_000 ;
653+
654+ let meta: FileMetaData = FileMetaData :: try_from_thrift ( thrift_file_meta. clone ( ) ) ?;
655+ let bloom_index_meta = BloomIndexMetaMini :: try_from ( meta. clone ( ) ) ?;
656+
657+ let sys = System :: new_all ( ) ;
658+ let pid = get_current_pid ( ) . unwrap ( ) ;
659+ let process = sys. process ( pid) . unwrap ( ) ;
660+ let base_memory_usage = process. memory ( ) ;
661+
662+ let cache = InMemoryCacheBuilder :: new_item_cache :: < BloomIndexMetaMini > ( cache_number as u64 ) ;
663+ populate_cache ( & cache, bloom_index_meta, cache_number) ;
664+ show_memory_usage ( "BloomIndexMetaMini" , base_memory_usage, cache_number) ;
665+
666+ drop ( cache) ;
667+
668+ Ok ( ( ) )
669+ }
0 commit comments