|
| 1 | +// Copyright 2023 Datafuse Labs. |
| 2 | +// |
| 3 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +// you may not use this file except in compliance with the License. |
| 5 | +// You may obtain a copy of the License at |
| 6 | +// |
| 7 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +// |
| 9 | +// Unless required by applicable law or agreed to in writing, software |
| 10 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +// See the License for the specific language governing permissions and |
| 13 | +// limitations under the License. |
| 14 | + |
| 15 | +use common_arrow::parquet::metadata::FileMetaData; |
| 16 | +use common_arrow::parquet::metadata::ThriftFileMetaData; |
| 17 | +use common_base::base::tokio; |
| 18 | +use common_cache::Cache; |
| 19 | +use common_expression::types::Int32Type; |
| 20 | +use common_expression::types::NumberDataType; |
| 21 | +use common_expression::DataBlock; |
| 22 | +use common_expression::FromData; |
| 23 | +use common_expression::TableDataType; |
| 24 | +use common_expression::TableField; |
| 25 | +use common_expression::TableSchemaRefExt; |
| 26 | +use common_storages_fuse::io::TableMetaLocationGenerator; |
| 27 | +use common_storages_fuse::statistics::gen_columns_statistics; |
| 28 | +use common_storages_fuse::FuseStorageFormat; |
| 29 | +use opendal::Operator; |
| 30 | +use storages_common_cache::InMemoryCacheBuilder; |
| 31 | +use storages_common_cache::InMemoryItemCacheHolder; |
| 32 | +use storages_common_index::BloomIndexMeta; |
| 33 | +use sysinfo::get_current_pid; |
| 34 | +use sysinfo::ProcessExt; |
| 35 | +use sysinfo::System; |
| 36 | +use sysinfo::SystemExt; |
| 37 | +use uuid::Uuid; |
| 38 | + |
| 39 | +use crate::storages::fuse::block_writer::BlockWriter; |
| 40 | + |
| 41 | +// NOTE: |
| 42 | +// |
| 43 | +// usage of memory is observed at *process* level, please do not combine them into |
| 44 | +// one test function. |
| 45 | +// |
| 46 | +// by default, these cases are ignored (in CI). |
| 47 | +// |
| 48 | +// please run the following two cases individually (in different process) |
| 49 | + |
| 50 | +#[tokio::test(flavor = "multi_thread")] |
| 51 | +#[ignore] |
| 52 | +async fn test_index_meta_cache_size_file_meta_data() -> common_exception::Result<()> { |
| 53 | + let thrift_file_meta = setup().await?; |
| 54 | + |
| 55 | + let cache_number = 300_000; |
| 56 | + |
| 57 | + let meta: FileMetaData = FileMetaData::try_from_thrift(thrift_file_meta)?; |
| 58 | + |
| 59 | + let sys = System::new_all(); |
| 60 | + let pid = get_current_pid().unwrap(); |
| 61 | + let process = sys.process(pid).unwrap(); |
| 62 | + let base_memory_usage = process.memory(); |
| 63 | + let scenario = "FileMetaData"; |
| 64 | + |
| 65 | + eprintln!( |
| 66 | + "scenario {}, pid {}, base memory {}", |
| 67 | + scenario, pid, base_memory_usage |
| 68 | + ); |
| 69 | + |
| 70 | + let cache = InMemoryCacheBuilder::new_item_cache::<FileMetaData>(cache_number as u64); |
| 71 | + |
| 72 | + populate_cache(&cache, meta, cache_number); |
| 73 | + show_memory_usage(scenario, base_memory_usage, cache_number); |
| 74 | + |
| 75 | + drop(cache); |
| 76 | + |
| 77 | + Ok(()) |
| 78 | +} |
| 79 | + |
| 80 | +#[tokio::test(flavor = "multi_thread")] |
| 81 | +#[ignore] |
| 82 | +async fn test_index_meta_cache_size_bloom_meta() -> common_exception::Result<()> { |
| 83 | + let thrift_file_meta = setup().await?; |
| 84 | + |
| 85 | + let cache_number = 300_000; |
| 86 | + |
| 87 | + let bloom_index_meta = BloomIndexMeta::try_from(thrift_file_meta)?; |
| 88 | + |
| 89 | + let sys = System::new_all(); |
| 90 | + let pid = get_current_pid().unwrap(); |
| 91 | + let process = sys.process(pid).unwrap(); |
| 92 | + let base_memory_usage = process.memory(); |
| 93 | + |
| 94 | + let scenario = "BloomIndexMeta(mini)"; |
| 95 | + eprintln!( |
| 96 | + "scenario {}, pid {}, base memory {}", |
| 97 | + scenario, pid, base_memory_usage |
| 98 | + ); |
| 99 | + |
| 100 | + let cache = InMemoryCacheBuilder::new_item_cache::<BloomIndexMeta>(cache_number as u64); |
| 101 | + populate_cache(&cache, bloom_index_meta, cache_number); |
| 102 | + show_memory_usage("BloomIndexMeta(Mini)", base_memory_usage, cache_number); |
| 103 | + |
| 104 | + drop(cache); |
| 105 | + |
| 106 | + Ok(()) |
| 107 | +} |
| 108 | + |
| 109 | +fn populate_cache<T>(cache: &InMemoryItemCacheHolder<T>, item: T, num_cache: usize) |
| 110 | +where T: Clone { |
| 111 | + let mut c = cache.write(); |
| 112 | + for _ in 0..num_cache { |
| 113 | + let uuid = Uuid::new_v4(); |
| 114 | + (*c).put( |
| 115 | + format!("{}", uuid.simple()), |
| 116 | + std::sync::Arc::new(item.clone()), |
| 117 | + ); |
| 118 | + } |
| 119 | +} |
| 120 | + |
| 121 | +async fn setup() -> common_exception::Result<ThriftFileMetaData> { |
| 122 | + let fields = (0..23) |
| 123 | + .map(|_| TableField::new("id", TableDataType::Number(NumberDataType::Int32))) |
| 124 | + .collect::<Vec<_>>(); |
| 125 | + |
| 126 | + let schema = TableSchemaRefExt::create(fields); |
| 127 | + |
| 128 | + let mut columns = vec![]; |
| 129 | + for _ in 0..schema.fields().len() { |
| 130 | + // values do not matter |
| 131 | + let column = Int32Type::from_data(vec![1]); |
| 132 | + columns.push(column) |
| 133 | + } |
| 134 | + |
| 135 | + let block = DataBlock::new_from_columns(columns); |
| 136 | + let operator = Operator::new(opendal::services::Memory::default())?.finish(); |
| 137 | + let loc_generator = TableMetaLocationGenerator::with_prefix("/".to_owned()); |
| 138 | + let col_stats = gen_columns_statistics(&block, None, &schema)?; |
| 139 | + let block_writer = BlockWriter::new(&operator, &loc_generator); |
| 140 | + let (_block_meta, thrift_file_meta) = block_writer |
| 141 | + .write(FuseStorageFormat::Parquet, &schema, block, col_stats, None) |
| 142 | + .await?; |
| 143 | + |
| 144 | + Ok(thrift_file_meta.unwrap()) |
| 145 | +} |
| 146 | + |
| 147 | +fn show_memory_usage(case: &str, base_memory_usage: u64, num_cache_items: usize) { |
| 148 | + let sys = System::new_all(); |
| 149 | + let pid = get_current_pid().unwrap(); |
| 150 | + let process = sys.process(pid).unwrap(); |
| 151 | + { |
| 152 | + let memory_after = process.memory(); |
| 153 | + let delta = memory_after - base_memory_usage; |
| 154 | + let delta_gb = (delta as f64) / 1024.0 / 1024.0 / 1024.0; |
| 155 | + eprintln!( |
| 156 | + " |
| 157 | + cache item type : {}, |
| 158 | + number of cached items {}, |
| 159 | + mem usage(B):{:+}, |
| 160 | + mem usage(GB){:+} |
| 161 | + ", |
| 162 | + case, num_cache_items, delta, delta_gb |
| 163 | + ); |
| 164 | + } |
| 165 | +} |
0 commit comments