|
1 | | -use clap::Parser; |
2 | | -use lmdb_rs::{ |
3 | | - codec::types::{Bytes, Str}, |
4 | | - env::Env, |
5 | | -}; |
6 | | -use memmap2::MmapOptions; |
7 | | -use std::fs::File; |
8 | | -use std::path::PathBuf; |
9 | | - |
10 | | -#[derive(Parser, Debug)] |
11 | | -#[command(author, version, about, long_about = None)] |
12 | | -struct Args { |
13 | | - /// Path to the original (larger) LMDB data file |
14 | | - #[arg(value_name = "OLD_FILE")] |
15 | | - old_path: PathBuf, |
16 | | - |
17 | | - /// Path to the new (smaller) LMDB data file |
18 | | - #[arg(value_name = "NEW_FILE")] |
19 | | - new_path: PathBuf, |
20 | | -} |
21 | | - |
22 | | -fn main() -> Result<(), Box<dyn std::error::Error>> { |
23 | | - let args = Args::parse(); |
24 | | - |
25 | | - println!("Comparing:"); |
26 | | - println!(" Old: {:?}", args.old_path); |
27 | | - println!(" New: {:?}", args.new_path); |
28 | | - |
29 | | - let old_file = File::open(&args.old_path)?; |
30 | | - let old_mmap = unsafe { MmapOptions::new().map(&old_file)? }; |
31 | | - let old_env = Env::new(&old_mmap)?; |
32 | | - |
33 | | - let new_file = File::open(&args.new_path)?; |
34 | | - let new_mmap = unsafe { MmapOptions::new().map(&new_file)? }; |
35 | | - let new_env = Env::new(&new_mmap)?; |
36 | | - |
37 | | - let old_txn = old_env.read_txn()?; |
38 | | - let new_txn = new_env.read_txn()?; |
39 | | - |
40 | | - let db_names = vec!["blocks", "dw", "main"]; |
41 | | - |
42 | | - for name in db_names { |
43 | | - println!("\n--- Database: {} ---", name); |
44 | | - |
45 | | - // Open in Old |
46 | | - let old_db = match old_env.open_database::<Str, Bytes>(&old_txn, Some(name))? { |
47 | | - Some(db) => db, |
48 | | - None => { |
49 | | - println!(" [WARN] Database '{}' missing in OLD file. Skipping.", name); |
50 | | - continue; |
51 | | - } |
52 | | - }; |
53 | | - |
54 | | - // Open in New |
55 | | - let new_db = match new_env.open_database::<Str, Bytes>(&new_txn, Some(name))? { |
56 | | - Some(db) => db, |
57 | | - None => { |
58 | | - println!(" [CRITICAL] Database '{}' completely MISSING in NEW file!", name); |
59 | | - continue; |
60 | | - } |
61 | | - }; |
62 | | - |
63 | | - let mut missing_keys_count = 0; |
64 | | - let mut missing_bytes = 0usize; |
65 | | - let mut changed_value_count = 0; |
66 | | - let mut value_size_delta: i64 = 0; |
67 | | - let mut inspected_keys = 0; |
68 | | - |
69 | | - // Iterate Old items |
70 | | - let iter = old_db.iter(&old_txn)?; |
71 | | - for item in iter { |
72 | | - let (key, old_val) = item?; |
73 | | - inspected_keys += 1; |
74 | | - |
75 | | - match new_db.get(&new_txn, key) { |
76 | | - Ok(Some(new_val)) => { |
77 | | - // Key exists, check value |
78 | | - if old_val.len() != new_val.len() { |
79 | | - changed_value_count += 1; |
80 | | - let diff = (new_val.len() as i64) - (old_val.len() as i64); |
81 | | - value_size_delta += diff; |
82 | | - |
83 | | - // Verbose detail for significant changes (optional, maybe flag gated?) |
84 | | - // For now just aggregate |
85 | | - } else if old_val != new_val { |
86 | | - // Same length, different content (unlikely to affect size, but good to know) |
87 | | - // not tracking separately for size reduction task |
88 | | - } |
89 | | - } |
90 | | - Ok(None) => { |
91 | | - // Key missing in New |
92 | | - missing_keys_count += 1; |
93 | | - missing_bytes += old_val.len(); |
94 | | - // println!(" [MISSING] Key: '{}' (Lost {} bytes)", key, old_val.len()); |
95 | | - // Too spammy if 500kb is lost in small chunks. |
96 | | - // Maybe print first few? |
97 | | - if missing_keys_count <= 10 { |
98 | | - println!(" [MISSING] Key: '{}' (Lost {} bytes)", key, old_val.len()); |
99 | | - } |
100 | | - } |
101 | | - Err(e) => { |
102 | | - println!(" [ERROR] checking key '{}': {}", key, e); |
103 | | - } |
104 | | - } |
105 | | - } |
106 | | - |
107 | | - println!(" Summary for '{}':", name); |
108 | | - println!(" Total Keys in Old: {}", inspected_keys); |
109 | | - println!(" Missing Keys: {}", missing_keys_count); |
110 | | - println!(" Missing Bytes: {} (from missing keys)", missing_bytes); |
111 | | - println!(" Changed Values: {}", changed_value_count); |
112 | | - println!(" Size Delta: {} bytes (from changed values)", value_size_delta); |
113 | | - |
114 | | - let net_change = (value_size_delta) - (missing_bytes as i64); |
115 | | - println!(" NET CHANGE: {} bytes", net_change); |
116 | | - } |
117 | | - |
118 | | - Ok(()) |
119 | | -} |
| 1 | +use clap::Parser; |
| 2 | +use lmdb_rs::{ |
| 3 | + codec::types::{Bytes, Str}, |
| 4 | + env::Env, |
| 5 | +}; |
| 6 | +use memmap2::MmapOptions; |
| 7 | +use std::fs::File; |
| 8 | +use std::path::PathBuf; |
| 9 | + |
| 10 | +#[derive(Parser, Debug)] |
| 11 | +#[command(author, version, about, long_about = None)] |
| 12 | +struct Args { |
| 13 | + /// Path to the original (larger) LMDB data file |
| 14 | + #[arg(value_name = "OLD_FILE")] |
| 15 | + old_path: PathBuf, |
| 16 | + |
| 17 | + /// Path to the new (smaller) LMDB data file |
| 18 | + #[arg(value_name = "NEW_FILE")] |
| 19 | + new_path: PathBuf, |
| 20 | +} |
| 21 | + |
| 22 | +fn main() -> Result<(), Box<dyn std::error::Error>> { |
| 23 | + let args = Args::parse(); |
| 24 | + |
| 25 | + println!("Comparing:"); |
| 26 | + println!(" Old: {:?}", args.old_path); |
| 27 | + println!(" New: {:?}", args.new_path); |
| 28 | + |
| 29 | + let old_file = File::open(&args.old_path)?; |
| 30 | + let old_mmap = unsafe { MmapOptions::new().map(&old_file)? }; |
| 31 | + let old_env = Env::new(&old_mmap)?; |
| 32 | + |
| 33 | + let new_file = File::open(&args.new_path)?; |
| 34 | + let new_mmap = unsafe { MmapOptions::new().map(&new_file)? }; |
| 35 | + let new_env = Env::new(&new_mmap)?; |
| 36 | + |
| 37 | + let old_txn = old_env.read_txn()?; |
| 38 | + let new_txn = new_env.read_txn()?; |
| 39 | + |
| 40 | + let db_names = vec!["blocks", "dw", "main"]; |
| 41 | + |
| 42 | + for name in db_names { |
| 43 | + println!("\n--- Database: {} ---", name); |
| 44 | + |
| 45 | + // Open in Old |
| 46 | + let old_db = match old_env.open_database::<Str, Bytes>(&old_txn, Some(name))? { |
| 47 | + Some(db) => db, |
| 48 | + None => { |
| 49 | + println!( |
| 50 | + " [WARN] Database '{}' missing in OLD file. Skipping.", |
| 51 | + name |
| 52 | + ); |
| 53 | + continue; |
| 54 | + } |
| 55 | + }; |
| 56 | + |
| 57 | + // Open in New |
| 58 | + let new_db = match new_env.open_database::<Str, Bytes>(&new_txn, Some(name))? { |
| 59 | + Some(db) => db, |
| 60 | + None => { |
| 61 | + println!( |
| 62 | + " [CRITICAL] Database '{}' completely MISSING in NEW file!", |
| 63 | + name |
| 64 | + ); |
| 65 | + continue; |
| 66 | + } |
| 67 | + }; |
| 68 | + |
| 69 | + let mut missing_keys_count = 0; |
| 70 | + let mut missing_bytes = 0usize; |
| 71 | + let mut changed_value_count = 0; |
| 72 | + let mut value_size_delta: i64 = 0; |
| 73 | + let mut inspected_keys = 0; |
| 74 | + |
| 75 | + // Iterate Old items |
| 76 | + let iter = old_db.iter(&old_txn)?; |
| 77 | + for item in iter { |
| 78 | + let (key, old_val) = item?; |
| 79 | + inspected_keys += 1; |
| 80 | + |
| 81 | + match new_db.get(&new_txn, key) { |
| 82 | + Ok(Some(new_val)) => { |
| 83 | + // Key exists, check value |
| 84 | + if old_val.len() != new_val.len() { |
| 85 | + changed_value_count += 1; |
| 86 | + let diff = (new_val.len() as i64) - (old_val.len() as i64); |
| 87 | + value_size_delta += diff; |
| 88 | + |
| 89 | + // Verbose detail for significant changes (optional, maybe flag gated?) |
| 90 | + // For now just aggregate |
| 91 | + } else if old_val != new_val { |
| 92 | + // Same length, different content (unlikely to affect size, but good to know) |
| 93 | + // not tracking separately for size reduction task |
| 94 | + } |
| 95 | + } |
| 96 | + Ok(None) => { |
| 97 | + // Key missing in New |
| 98 | + missing_keys_count += 1; |
| 99 | + missing_bytes += old_val.len(); |
| 100 | + // println!(" [MISSING] Key: '{}' (Lost {} bytes)", key, old_val.len()); |
| 101 | + // Too spammy if 500kb is lost in small chunks. |
| 102 | + // Maybe print first few? |
| 103 | + if missing_keys_count <= 10 { |
| 104 | + println!(" [MISSING] Key: '{}' (Lost {} bytes)", key, old_val.len()); |
| 105 | + } |
| 106 | + } |
| 107 | + Err(e) => { |
| 108 | + println!(" [ERROR] checking key '{}': {}", key, e); |
| 109 | + } |
| 110 | + } |
| 111 | + } |
| 112 | + |
| 113 | + println!(" Summary for '{}':", name); |
| 114 | + println!(" Total Keys in Old: {}", inspected_keys); |
| 115 | + println!(" Missing Keys: {}", missing_keys_count); |
| 116 | + println!( |
| 117 | + " Missing Bytes: {} (from missing keys)", |
| 118 | + missing_bytes |
| 119 | + ); |
| 120 | + println!(" Changed Values: {}", changed_value_count); |
| 121 | + println!( |
| 122 | + " Size Delta: {} bytes (from changed values)", |
| 123 | + value_size_delta |
| 124 | + ); |
| 125 | + |
| 126 | + let net_change = (value_size_delta) - (missing_bytes as i64); |
| 127 | + println!(" NET CHANGE: {} bytes", net_change); |
| 128 | + } |
| 129 | + |
| 130 | + Ok(()) |
| 131 | +} |
0 commit comments