|
| 1 | +use std::{collections::HashMap, mem::size_of_val, sync::Arc}; |
| 2 | + |
| 3 | +use log::{debug, info}; |
| 4 | +use rand::{ |
| 5 | + Rng, |
| 6 | + distr::{Alphanumeric, SampleString}, |
| 7 | +}; |
| 8 | +use rand_chacha::{ChaCha8Rng, rand_core::SeedableRng}; |
| 9 | +use rand_distr::Zipf; |
| 10 | +use rayon::prelude::*; |
| 11 | + |
| 12 | +use crate::{Protocol, fs::write_hashmap_to_file}; |
| 13 | + |
| 14 | +const SEED: u64 = 12312; |
| 15 | + |
| 16 | +/// Generate random string of specified length |
| 17 | +pub(crate) fn generate_random_str(len: usize) -> String { |
| 18 | + Alphanumeric.sample_string(&mut rand::rng(), len) |
| 19 | +} |
| 20 | + |
| 21 | +/// Generate test dictionary for memcached with random keys and values |
| 22 | +pub(crate) fn generate_memcached_test_dict( |
| 23 | + key_size: usize, |
| 24 | + value_size: usize, |
| 25 | + nums: usize, |
| 26 | +) -> HashMap<String, String> { |
| 27 | + // random generate dict for memcached test |
| 28 | + (0..nums) |
| 29 | + .into_par_iter() |
| 30 | + .map(|_| { |
| 31 | + ( |
| 32 | + generate_random_str(key_size), |
| 33 | + generate_random_str(value_size), |
| 34 | + ) |
| 35 | + }) |
| 36 | + .collect() |
| 37 | +} |
| 38 | + |
| 39 | +/// Generate test dict and write to disk |
| 40 | +/// # Arguments |
| 41 | +/// * `key_size` - key size |
| 42 | +/// * `value_size` - value size |
| 43 | +/// * `nums` - number of entries |
| 44 | +/// * `dict_path` - dict path to store |
| 45 | +/// # Returns |
| 46 | +/// * `Result` - Result<HashMap<String, String>, anyhow::Error> |
| 47 | +/// # Example |
| 48 | +/// ```rust |
| 49 | +/// let test_dict = generate_test_dict_write_to_disk(16, 32, 100000, "test_dict.yml.zst"); |
| 50 | +/// ``` |
| 51 | +pub(crate) fn generate_test_dict_write_to_disk( |
| 52 | + key_size: usize, |
| 53 | + value_size: usize, |
| 54 | + nums: usize, |
| 55 | + dict_path: &str, |
| 56 | +) -> anyhow::Result<HashMap<String, String>> { |
| 57 | + let test_dict = generate_memcached_test_dict(key_size, value_size, nums); |
| 58 | + debug!("test dict len: {}", test_dict.len()); |
| 59 | + if let Some((key, value)) = test_dict.iter().next() { |
| 60 | + debug!("test dict key size: {}", size_of_val(key.as_str())); |
| 61 | + debug!("test dict value size: {}", size_of_val(value.as_str())); |
| 62 | + } else { |
| 63 | + return Err(anyhow::anyhow!("test dict is empty")); |
| 64 | + } |
| 65 | + write_hashmap_to_file(&test_dict, dict_path)?; |
| 66 | + info!("write test dict to path {}", dict_path); |
| 67 | + Ok(test_dict) |
| 68 | +} |
| 69 | + |
| 70 | +/// Generate test entries with Zipf distribution for benchmarking |
| 71 | +pub(crate) fn generate_test_entries( |
| 72 | + test_dict: Arc<HashMap<Arc<String>, Arc<String>>>, |
| 73 | + nums: usize, |
| 74 | +) -> Vec<(Arc<String>, Arc<String>, Protocol)> { |
| 75 | + let mut rng = ChaCha8Rng::seed_from_u64(SEED); |
| 76 | + let zipf = Zipf::new((test_dict.len() - 1) as f64, 0.99).unwrap(); |
| 77 | + |
| 78 | + let keys: Vec<Arc<String>> = test_dict.keys().cloned().collect(); |
| 79 | + (0..nums) |
| 80 | + .map(|idx| { |
| 81 | + let key = &keys[rng.sample(zipf) as usize]; |
| 82 | + let value = test_dict.get(key).unwrap(); |
| 83 | + // every 31 element is tcp. udp:tcp = 30:1 |
| 84 | + let protocol = if idx % 31 == 30 { |
| 85 | + Protocol::Tcp |
| 86 | + } else { |
| 87 | + Protocol::Udp |
| 88 | + }; |
| 89 | + (key.clone(), value.clone(), protocol) |
| 90 | + }) |
| 91 | + .collect() |
| 92 | +} |
| 93 | + |
| 94 | +/// Analyze the statistics of test entries |
| 95 | +#[allow(dead_code)] |
| 96 | +pub(crate) fn test_entries_statistics( |
| 97 | + test_entries: Arc<Vec<(&String, &String, Protocol)>>, |
| 98 | +) { |
| 99 | + let mut udp_count: usize = 0; |
| 100 | + let mut tcp_count: usize = 0; |
| 101 | + |
| 102 | + // analyze the key distribution base on the frequency |
| 103 | + let mut key_frequency = HashMap::new(); |
| 104 | + |
| 105 | + // only get the first element in the tuple |
| 106 | + test_entries.iter().for_each(|(key, _, proto)| { |
| 107 | + *key_frequency.entry(key.to_string()).or_insert(0) += 1; |
| 108 | + if *proto == Protocol::Udp { |
| 109 | + udp_count += 1; |
| 110 | + } else { |
| 111 | + tcp_count += 1; |
| 112 | + } |
| 113 | + }); |
| 114 | + |
| 115 | + // sort by frequency |
| 116 | + let mut key_frequency: Vec<_> = key_frequency.into_iter().collect(); |
| 117 | + key_frequency.sort_by(|a, b| a.1.cmp(&b.1)); |
| 118 | + |
| 119 | + // Display the frequency of each item |
| 120 | + for (key, count) in &key_frequency { |
| 121 | + if *count < key_frequency.len() / 1000 { |
| 122 | + continue; |
| 123 | + } |
| 124 | + info!("{}: {}", key, count); |
| 125 | + } |
| 126 | + |
| 127 | + info!("tcp count: {}, udp count: {}", tcp_count, udp_count); |
| 128 | +} |
0 commit comments