diff --git a/examples/sst_file_info_example.rs b/examples/sst_file_info_example.rs new file mode 100644 index 000000000..6dc1abfee --- /dev/null +++ b/examples/sst_file_info_example.rs @@ -0,0 +1,244 @@ +// Copyright 2025 Contributors to rust-rocksdb +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This example demonstrates how to use the range-based SST file metadata API +// to get information about SST files that contain data within a specific key range. + +use rocksdb::{DBOptions, FlushOptions, SstFileInfo, Writable, DB}; +use std::env; +use std::path::Path; + +fn print_file_info(file: &SstFileInfo, index: usize) { + let smallest_str = String::from_utf8_lossy(&file.smallest_key); + let largest_str = String::from_utf8_lossy(&file.largest_key); + + println!( + " File {}: {} (Level {}, {} bytes, keys: {}..{}, entries: {}, deletions: {})", + index + 1, + file.name, + file.level, + file.size, + smallest_str, + largest_str, + file.num_entries, + file.num_deletions + ); +} + +fn analyze_range_files( + db: &DB, + start_key: Option<&[u8]>, + end_key: Option<&[u8]>, + description: &str, +) { + println!("\n=== {} ===", description); + + match db.get_sst_files_in_range_default(start_key, end_key) { + Ok(files) => { + if files.is_empty() { + println!("No SST files found in the specified range."); + } else { + println!("Found {} SST file(s) in range:", files.len()); + for (i, file) in files.iter().enumerate() { + print_file_info(file, i); + } + + // Calculate total size and statistics + let total_size: usize = files.iter().map(|f| f.size).sum(); + let total_entries: u64 = files.iter().map(|f| f.num_entries).sum(); + let total_deletions: u64 = files.iter().map(|f| f.num_deletions).sum(); + println!("Total size: {} bytes", total_size); + println!("Total entries: {}", total_entries); + println!("Total deletions: {}", total_deletions); + println!( + "Deletion ratio: {:.2}%", + if total_entries > 0 { + (total_deletions as f64 / total_entries as f64) * 100.0 + } else { + 0.0 + } + ); + + // Show level distribution + let mut level_counts = std::collections::HashMap::new(); + for file in &files { + *level_counts.entry(file.level).or_insert(0) += 1; + } + println!("Files by level: {:?}", level_counts); + } + } + Err(e) => { + println!("Error getting files in range: {}", e); + } + } +} + +fn main() -> Result<(), Box> { + // Get the database path from command line arguments or use a default + let db_path = env::args() + .nth(1) + .unwrap_or_else(|| "/tmp/rocksdb_sst_file_info_example".to_string()); + + // Remove the existing database if it exists + // Note: In a real application, you might want to handle cleanup differently + + // Open the database + let mut opts = DBOptions::new(); + opts.create_if_missing(true); + let db = DB::open_default(&db_path)?; + + println!("Database opened at: {}", db_path); + + // Insert data in different key ranges to create multiple SST files + println!("\nInserting data in different key ranges..."); + let mut flush_opts = FlushOptions::default(); + flush_opts.set_wait(true); + + // Group 1: Keys starting with "user_" + for i in 0..5 { + let key = format!("user_{:03}", i); + let value = format!("user_data_{}", i); + db.put(key.as_bytes(), value.as_bytes())?; + println!("Inserted: {} -> {}", key, value); + } + db.flush(&flush_opts)?; + println!("Flushed user data"); + + // Group 2: Keys starting with "order_" + for i in 0..5 { + let key = format!("order_{:03}", i); + let value = format!("order_data_{}", i); + db.put(key.as_bytes(), value.as_bytes())?; + println!("Inserted: {} -> {}", key, value); + } + db.flush(&flush_opts)?; + println!("Flushed order data"); + + // Group 3: Keys starting with "product_" + for i in 0..5 { + let key = format!("product_{:03}", i); + let value = format!("product_data_{}", i); + db.put(key.as_bytes(), value.as_bytes())?; + println!("Inserted: {} -> {}", key, value); + } + db.flush(&flush_opts)?; + println!("Flushed product data"); + + // Now analyze different key ranges + analyze_range_files(&db, None, None, "All SST Files"); + + analyze_range_files(&db, Some(b"user_000"), Some(b"user_999"), "User Data Range"); + + analyze_range_files( + &db, + Some(b"order_000"), + Some(b"order_999"), + "Order Data Range", + ); + + analyze_range_files( + &db, + Some(b"product_000"), + Some(b"product_999"), + "Product Data Range", + ); + + analyze_range_files( + &db, + Some(b"order_001"), + Some(b"order_003"), + "Specific Order Range", + ); + + analyze_range_files( + &db, + Some(b"system_"), + Some(b"system_999"), + "Non-existent Range", + ); + + // Demonstrate the overlap checking functionality + println!("\n=== Overlap Analysis ==="); + match db.get_sst_files_in_range_default(Some(b"user_000"), Some(b"user_999")) { + Ok(files) => { + for file in &files { + println!("\nAnalyzing file: {}", file.name); + + // Test different overlap scenarios + let test_ranges = [ + ( + Some(b"user_001" as &[u8]), + Some(b"user_003" as &[u8]), + "user_001..user_003", + ), + ( + Some(b"user_002" as &[u8]), + Some(b"user_005" as &[u8]), + "user_002..user_005", + ), + ( + Some(b"order_000" as &[u8]), + Some(b"order_999" as &[u8]), + "order_000..order_999", + ), + ( + Some(b"user_000" as &[u8]), + Some(b"user_999" as &[u8]), + "user_000..user_999", + ), + ]; + + for (start, end, desc) in &test_ranges { + let overlaps = file.overlaps_with_range(*start, *end); + let contained = file.is_contained_in_range(*start, *end); + println!( + " Range {}: overlaps={}, contained={}", + desc, overlaps, contained + ); + } + } + } + Err(e) => { + println!("Error: {}", e); + } + } + + // Demonstrate binary search optimization + println!("\n=== Binary Search Optimization ==="); + println!("For levels 1 and above, SST files are non-overlapping and sorted by smallest_key."); + println!( + "The API uses binary search to find the first overlapping file, then iterates forward" + ); + println!( + "until it encounters a file whose smallest_key >= end_key. This provides O(log n + k)" + ); + println!( + "performance where k is the number of overlapping files, instead of O(n) for each level." + ); + + // Show performance benefit by analyzing a specific range + analyze_range_files( + &db, + Some(b"order_002"), + Some(b"order_004"), + "Specific Order Range (Binary Search Optimized)", + ); + + // Clean up + drop(db); + // Note: In a real application, you might want to handle cleanup differently + println!("\nExample completed successfully!"); + + Ok(()) +} diff --git a/librocksdb_sys/crocksdb/c.cc b/librocksdb_sys/crocksdb/c.cc index 5cdc9e990..bae31ff7f 100644 --- a/librocksdb_sys/crocksdb/c.cc +++ b/librocksdb_sys/crocksdb/c.cc @@ -5886,6 +5886,16 @@ const char* crocksdb_sst_file_meta_data_largestkey( return meta->rep.largestkey.data(); } +uint64_t crocksdb_sst_file_meta_data_num_entries( + const crocksdb_sst_file_meta_data_t* meta) { + return meta->rep.num_entries; +} + +uint64_t crocksdb_sst_file_meta_data_num_deletions( + const crocksdb_sst_file_meta_data_t* meta) { + return meta->rep.num_deletions; +} + crocksdb_compaction_options_t* crocksdb_compaction_options_create() { return new crocksdb_compaction_options_t(); } diff --git a/librocksdb_sys/crocksdb/crocksdb/c.h b/librocksdb_sys/crocksdb/crocksdb/c.h index e60e69eee..b66e95223 100644 --- a/librocksdb_sys/crocksdb/crocksdb/c.h +++ b/librocksdb_sys/crocksdb/crocksdb/c.h @@ -2007,8 +2007,9 @@ crocksdb_slicetransform_create( unsigned char (*in_domain)(void*, const char* key, size_t length), unsigned char (*in_range)(void*, const char* key, size_t length), const char* (*name)(void*)); + extern C_ROCKSDB_LIBRARY_API crocksdb_slicetransform_t* - crocksdb_slicetransform_create_fixed_prefix(size_t); +crocksdb_slicetransform_create_fixed_prefix(size_t); extern C_ROCKSDB_LIBRARY_API crocksdb_slicetransform_t* crocksdb_slicetransform_create_noop(); extern C_ROCKSDB_LIBRARY_API void crocksdb_slicetransform_destroy( @@ -2306,6 +2307,10 @@ crocksdb_sst_file_meta_data_smallestkey(const crocksdb_sst_file_meta_data_t*, size_t*); extern C_ROCKSDB_LIBRARY_API const char* crocksdb_sst_file_meta_data_largestkey( const crocksdb_sst_file_meta_data_t*, size_t*); +extern C_ROCKSDB_LIBRARY_API uint64_t +crocksdb_sst_file_meta_data_num_entries(const crocksdb_sst_file_meta_data_t*); +extern C_ROCKSDB_LIBRARY_API uint64_t +crocksdb_sst_file_meta_data_num_deletions(const crocksdb_sst_file_meta_data_t*); /* CompactFiles */ extern C_ROCKSDB_LIBRARY_API crocksdb_compaction_options_t* diff --git a/librocksdb_sys/src/lib.rs b/librocksdb_sys/src/lib.rs index e0370cb30..fe875f1c0 100644 --- a/librocksdb_sys/src/lib.rs +++ b/librocksdb_sys/src/lib.rs @@ -2590,6 +2590,8 @@ extern "C" { meta: *const DBSstFileMetaData, len: *mut size_t, ) -> *const c_char; + pub fn crocksdb_sst_file_meta_data_num_entries(meta: *const DBSstFileMetaData) -> u64; + pub fn crocksdb_sst_file_meta_data_num_deletions(meta: *const DBSstFileMetaData) -> u64; pub fn crocksdb_livefiles(db: *mut DBInstance) -> *mut DBLivefiles; pub fn crocksdb_livefiles_count(lf: *const DBLivefiles) -> size_t; diff --git a/src/lib.rs b/src/lib.rs index 10926dc39..aa01ff302 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -53,7 +53,7 @@ pub use librocksdb_sys::{ }; pub use logger::Logger; pub use merge_operator::MergeOperands; -pub use metadata::{ColumnFamilyMetaData, LevelMetaData, SstFileMetaData}; +pub use metadata::{ColumnFamilyMetaData, LevelMetaData, SstFileInfo, SstFileMetaData}; pub use perf_context::{ get_perf_level, set_perf_flags, set_perf_level, IOStatsContext, PerfContext, PerfFlag, PerfFlags, PerfLevel, diff --git a/src/metadata.rs b/src/metadata.rs index 7f2891667..6069ae8da 100644 --- a/src/metadata.rs +++ b/src/metadata.rs @@ -121,6 +121,14 @@ impl<'a> SstFileMetaData<'a> { slice::from_raw_parts(ptr as *const u8, len) } } + + pub fn get_num_entries(&self) -> u64 { + unsafe { crocksdb_ffi::crocksdb_sst_file_meta_data_num_entries(self.inner) } + } + + pub fn get_num_deletions(&self) -> u64 { + unsafe { crocksdb_ffi::crocksdb_sst_file_meta_data_num_deletions(self.inner) } + } } pub struct LiveFiles { @@ -172,3 +180,76 @@ impl Drop for LiveFiles { } } } + +/// Represents metadata for an SST file with its key range information. +#[derive(Debug, Clone)] +pub struct SstFileInfo { + /// The name/path of the SST file + pub name: String, + /// The size of the file in bytes + pub size: usize, + /// The level where this file resides + pub level: usize, + /// The smallest key in this file + pub smallest_key: Vec, + /// The largest key in this file + pub largest_key: Vec, + /// The number of entries in this file + pub num_entries: u64, + /// The number of deletions in this file + pub num_deletions: u64, +} + +impl SstFileInfo { + /// Check if this SST file overlaps with the given key range. + /// + /// # Arguments + /// * `start_key` - The start of the key range (inclusive), None means no lower bound + /// * `end_key` - The end of the key range (exclusive), None means no upper bound + /// + /// # Returns + /// `true` if the file overlaps with the range, `false` otherwise + pub fn overlaps_with_range(&self, start_key: Option<&[u8]>, end_key: Option<&[u8]>) -> bool { + // Check if file's largest key is before the start of the range + if let Some(start) = start_key { + if self.largest_key.as_slice() < start { + return false; + } + } + + // Check if file's smallest key is at or after the end of the range + if let Some(end) = end_key { + if self.smallest_key.as_slice() >= end { + return false; + } + } + + true + } + + /// Check if this SST file is completely contained within the given key range. + /// + /// # Arguments + /// * `start_key` - The start of the key range (inclusive), None means no lower bound + /// * `end_key` - The end of the key range (exclusive), None means no upper bound + /// + /// # Returns + /// `true` if the file is completely contained within the range, `false` otherwise + pub fn is_contained_in_range(&self, start_key: Option<&[u8]>, end_key: Option<&[u8]>) -> bool { + // Check if file's smallest key is at or after the start of the range + if let Some(start) = start_key { + if self.smallest_key.as_slice() < start { + return false; + } + } + + // Check if file's largest key is before the end of the range + if let Some(end) = end_key { + if self.largest_key.as_slice() >= end { + return false; + } + } + + true + } +} diff --git a/src/rocksdb.rs b/src/rocksdb.rs index 6073ff6ba..198eba119 100644 --- a/src/rocksdb.rs +++ b/src/rocksdb.rs @@ -2112,6 +2112,155 @@ impl DB { } } + /// Get metadata for SST files that overlap with the specified key range. + /// + /// For level 0: Files may overlap, so we check all files. + /// For levels 1+: Files are non-overlapping and sorted by smallest_key, so we use binary search. + /// + /// # Arguments + /// * `cf` - The column family handle + /// * `start_key` - The start of the key range (inclusive), None means no lower bound + /// * `end_key` - The end of the key range (exclusive), None means no upper bound + /// + /// # Returns + /// A vector of `SstFileInfo` containing metadata for files that overlap with the range + pub fn get_sst_files_in_range( + &self, + cf: &CFHandle, + start_key: Option<&[u8]>, + end_key: Option<&[u8]>, + ) -> Vec { + let cf_metadata = self.get_column_family_meta_data(cf); + let mut sst_files = Vec::new(); + + // Iterate through all levels + for (level_index, level_metadata) in cf_metadata.get_levels().iter().enumerate() { + let files = level_metadata.get_files(); + + if level_index == 0 { + // Level 0: Files may overlap, check all files + for file_metadata in files { + let file_info = crate::metadata::SstFileInfo { + name: file_metadata.get_name(), + size: file_metadata.get_size(), + level: level_index, + smallest_key: file_metadata.get_smallestkey().to_vec(), + largest_key: file_metadata.get_largestkey().to_vec(), + num_entries: file_metadata.get_num_entries(), + num_deletions: file_metadata.get_num_deletions(), + }; + + if file_info.overlaps_with_range(start_key, end_key) { + sst_files.push(file_info); + } + } + } else { + // Levels 1+: Files are non-overlapping and sorted by smallest_key + // Use binary search to find the first overlapping file, then iterate forward + if files.is_empty() { + continue; + } + + // Find the first file that could overlap with our range + let first_overlapping = + self.find_first_overlapping_file(&files, start_key, end_key); + + // Iterate forward from the first overlapping file + for i in first_overlapping..files.len() { + let file_metadata = &files[i]; + + // Stop iteration if file's smallest_key >= end_key + if let Some(end_key) = end_key { + if file_metadata.get_smallestkey() >= end_key { + break; + } + } + + let file_info = crate::metadata::SstFileInfo { + name: file_metadata.get_name(), + size: file_metadata.get_size(), + level: level_index, + smallest_key: file_metadata.get_smallestkey().to_vec(), + largest_key: file_metadata.get_largestkey().to_vec(), + num_entries: file_metadata.get_num_entries(), + num_deletions: file_metadata.get_num_deletions(), + }; + + // Check if this file actually overlaps with the range + if file_info.overlaps_with_range(start_key, end_key) { + sst_files.push(file_info); + } + } + } + } + + sst_files + } + + /// Find the index of the first file that could overlap with the given range. + /// Uses binary search since files are sorted by smallest_key. + /// Returns the index to start iteration from (we'll iterate forward from here). + fn find_first_overlapping_file( + &self, + files: &[crate::metadata::SstFileMetaData], + start_key: Option<&[u8]>, + _end_key: Option<&[u8]>, + ) -> usize { + if files.is_empty() { + return 0; + } + + // If no start_key, start from the first file + if start_key.is_none() { + return 0; + } + + let start_key = start_key.unwrap(); + + // Binary search for the first file whose smallest_key <= start_key + // This file might overlap if its largest_key >= start_key + let mut left = 0; + let mut right = files.len(); + + while left < right { + let mid = left + (right - left) / 2; + let file_smallest_key = files[mid].get_smallestkey(); + + if file_smallest_key <= start_key { + left = mid + 1; + } else { + right = mid; + } + } + + // left now points to the first file with smallest_key > start_key + // The previous file (left-1) might overlap if its largest_key >= start_key + if left > 0 { + left - 1 + } else { + 0 + } + } + + /// Get metadata for SST files that overlap with the specified key range in the default column family. + /// + /// # Arguments + /// * `start_key` - The start of the key range (inclusive), None means no lower bound + /// * `end_key` - The end of the key range (exclusive), None means no upper bound + /// + /// # Returns + /// A vector of `SstFileInfo` containing metadata for files that overlap with the range + pub fn get_sst_files_in_range_default( + &self, + start_key: Option<&[u8]>, + end_key: Option<&[u8]>, + ) -> Result, String> { + let cf_handle = self + .cf_handle("default") + .ok_or_else(|| "Default column family not found".to_string())?; + Ok(self.get_sst_files_in_range(cf_handle, start_key, end_key)) + } + pub fn compact_files_cf( &self, cf: &CFHandle, diff --git a/tests/cases/mod.rs b/tests/cases/mod.rs index b9c22e974..20081a1cb 100644 --- a/tests/cases/mod.rs +++ b/tests/cases/mod.rs @@ -17,6 +17,7 @@ mod test_rate_limiter; mod test_read_only; mod test_rocksdb_options; mod test_slice_transform; +mod test_sst_file_info; mod test_statistics; mod test_table_properties; mod test_table_properties_rc; diff --git a/tests/cases/test_sst_file_info.rs b/tests/cases/test_sst_file_info.rs new file mode 100644 index 000000000..d4648043d --- /dev/null +++ b/tests/cases/test_sst_file_info.rs @@ -0,0 +1,424 @@ +// Copyright 2025 Contributors to rust-rocksdb +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use rocksdb::{CFHandle, ColumnFamilyOptions, DBOptions, FlushOptions, SstFileInfo, Writable, DB}; + +use super::tempdir_with_prefix; + +#[test] +fn test_sst_files_in_range_basic() { + let path = tempdir_with_prefix("_rust_rocksdb_test_sst_files_range_basic"); + let mut opts = DBOptions::new(); + opts.create_if_missing(true); + let mut cf_opts = ColumnFamilyOptions::new(); + cf_opts.set_disable_auto_compactions(true); + + let db = DB::open_cf( + opts, + path.path().to_str().unwrap(), + vec![("default", cf_opts)], + ) + .unwrap(); + + let cf_handle = db.cf_handle("default").unwrap(); + let mut fopts = FlushOptions::default(); + fopts.set_wait(true); + + // Insert data with different key ranges + // File 1: keys 0-2 + db.put(b"key000", b"value0").unwrap(); + db.put(b"key001", b"value1").unwrap(); + db.put(b"key002", b"value2").unwrap(); + db.flush(&fopts).unwrap(); + + // File 2: keys 5-7 + db.put(b"key005", b"value5").unwrap(); + db.put(b"key006", b"value6").unwrap(); + db.put(b"key007", b"value7").unwrap(); + db.flush(&fopts).unwrap(); + + // File 3: keys 10-12 + db.put(b"key010", b"value10").unwrap(); + db.put(b"key011", b"value11").unwrap(); + db.put(b"key012", b"value12").unwrap(); + db.flush(&fopts).unwrap(); + + // Test 1: Get files in range [key001, key006) - should match file 1 and file 2 + let files_in_range = db.get_sst_files_in_range(cf_handle, Some(b"key001"), Some(b"key006")); + assert_eq!(files_in_range.len(), 2); + + // Verify the files contain the expected keys + for file in &files_in_range { + assert!(file.smallest_key <= b"key006"); + assert!(file.largest_key >= b"key001"); + assert!(file.name.contains(".sst")); + assert!(file.size > 0); + assert!(file.level == 0); // All files should be in level 0 + } + + // Test 2: Get files in range [key005, key011) - should match file 2 and file 3 + let files_in_range = db.get_sst_files_in_range(cf_handle, Some(b"key005"), Some(b"key011")); + assert_eq!(files_in_range.len(), 2); + + // Test 3: Get files in range [key003, key004) - should match no files + let files_in_range = db.get_sst_files_in_range(cf_handle, Some(b"key003"), Some(b"key004")); + assert_eq!(files_in_range.len(), 0); + + // Test 4: Get files in range [key000, key013) - should match all files + let files_in_range = db.get_sst_files_in_range(cf_handle, Some(b"key000"), Some(b"key013")); + assert_eq!(files_in_range.len(), 3); + + // Test 5: Get files with no range bounds - should match all files + let files_in_range = db.get_sst_files_in_range(cf_handle, None, None); + assert_eq!(files_in_range.len(), 3); +} + +#[test] +fn test_sst_files_in_range_default_cf() { + let path = tempdir_with_prefix("_rust_rocksdb_test_sst_files_range_default"); + let mut opts = DBOptions::new(); + opts.create_if_missing(true); + + let db = DB::open_default(path.path().to_str().unwrap()).unwrap(); + let mut fopts = FlushOptions::default(); + fopts.set_wait(true); + + // Insert some data + db.put(b"a", b"value_a").unwrap(); + db.put(b"b", b"value_b").unwrap(); + db.flush(&fopts).unwrap(); + + db.put(b"c", b"value_c").unwrap(); + db.put(b"d", b"value_d").unwrap(); + db.flush(&fopts).unwrap(); + + // Test getting files in range using default CF method + let files_in_range = db + .get_sst_files_in_range_default(Some(b"b"), Some(b"d")) + .unwrap(); + assert_eq!(files_in_range.len(), 2); + + // Verify file properties + for file in &files_in_range { + assert!(file.smallest_key <= b"d"); + assert!(file.largest_key >= b"b"); + assert!(!file.name.is_empty()); + assert!(file.size > 0); + } +} + +#[test] +fn test_sst_files_in_range_with_column_families() { + let path = tempdir_with_prefix("_rust_rocksdb_test_sst_files_range_cf"); + let mut opts = DBOptions::new(); + opts.create_if_missing(true); + opts.create_missing_column_families(true); + + let mut cf_opts = ColumnFamilyOptions::new(); + cf_opts.set_disable_auto_compactions(true); + + let db = DB::open_cf( + opts, + path.path().to_str().unwrap(), + vec![ + ("default", ColumnFamilyOptions::new()), + ("cf1", cf_opts.clone()), + ("cf2", cf_opts), + ], + ) + .unwrap(); + + let cf1_handle = db.cf_handle("cf1").unwrap(); + let cf2_handle = db.cf_handle("cf2").unwrap(); + let mut fopts = FlushOptions::default(); + fopts.set_wait(true); + + // Insert data into different column families + db.put_cf(cf1_handle, b"key1", b"value1").unwrap(); + db.put_cf(cf1_handle, b"key2", b"value2").unwrap(); + db.flush_cf(cf1_handle, &fopts).unwrap(); + + db.put_cf(cf2_handle, b"key3", b"value3").unwrap(); + db.put_cf(cf2_handle, b"key4", b"value4").unwrap(); + db.flush_cf(cf2_handle, &fopts).unwrap(); + + // Test getting files from specific column families + let cf1_files = db.get_sst_files_in_range(cf1_handle, Some(b"key1"), Some(b"key3")); + assert_eq!(cf1_files.len(), 1); + assert!(cf1_files[0].smallest_key <= b"key2"); + assert!(cf1_files[0].largest_key >= b"key1"); + + let cf2_files = db.get_sst_files_in_range(cf2_handle, Some(b"key3"), Some(b"key5")); + assert_eq!(cf2_files.len(), 1); + assert!(cf2_files[0].smallest_key <= b"key4"); + assert!(cf2_files[0].largest_key >= b"key3"); + + // Files from cf1 should not overlap with cf2 range + let cf1_files_in_cf2_range = + db.get_sst_files_in_range(cf1_handle, Some(b"key3"), Some(b"key5")); + assert_eq!(cf1_files_in_cf2_range.len(), 0); +} + +#[test] +fn test_sst_file_info_overlap_methods() { + let path = tempdir_with_prefix("_rust_rocksdb_test_sst_file_info_overlap"); + let mut opts = DBOptions::new(); + opts.create_if_missing(true); + + let db = DB::open_default(path.path().to_str().unwrap()).unwrap(); + let mut fopts = FlushOptions::default(); + fopts.set_wait(true); + + // Insert data to create an SST file + db.put(b"key001", b"value1").unwrap(); + db.put(b"key002", b"value2").unwrap(); + db.put(b"key003", b"value3").unwrap(); + db.flush(&fopts).unwrap(); + + // Get the SST file info + let files = db.get_sst_files_in_range_default(None, None).unwrap(); + assert_eq!(files.len(), 1); + let file_info = &files[0]; + + // Test overlaps_with_range + assert!(file_info.overlaps_with_range(Some(b"key001"), Some(b"key004"))); + assert!(file_info.overlaps_with_range(Some(b"key002"), Some(b"key005"))); + assert!(file_info.overlaps_with_range(Some(b"key000"), Some(b"key002"))); + assert!(!file_info.overlaps_with_range(Some(b"key000"), Some(b"key001"))); + assert!(!file_info.overlaps_with_range(Some(b"key004"), Some(b"key005"))); + + // Test is_contained_in_range + assert!(file_info.is_contained_in_range(Some(b"key000"), Some(b"key005"))); + assert!(file_info.is_contained_in_range(Some(b"key001"), Some(b"key004"))); + assert!(!file_info.is_contained_in_range(Some(b"key001"), Some(b"key003"))); + assert!(!file_info.is_contained_in_range(Some(b"key002"), Some(b"key005"))); + + // Test with no bounds + assert!(file_info.overlaps_with_range(None, None)); + assert!(file_info.is_contained_in_range(None, None)); +} + +#[test] +fn test_sst_files_in_range_empty_database() { + let path = tempdir_with_prefix("_rust_rocksdb_test_sst_files_range_empty"); + let mut opts = DBOptions::new(); + opts.create_if_missing(true); + + let db = DB::open_default(path.path().to_str().unwrap()).unwrap(); + + // Empty database should have no SST files + let files_in_range = db + .get_sst_files_in_range_default(Some(b"a"), Some(b"z")) + .unwrap(); + assert_eq!(files_in_range.len(), 0); +} + +#[test] +fn test_sst_files_in_range_edge_cases() { + let path = tempdir_with_prefix("_rust_rocksdb_test_sst_files_range_edge"); + let mut opts = DBOptions::new(); + opts.create_if_missing(true); + + let db = DB::open_default(path.path().to_str().unwrap()).unwrap(); + let mut fopts = FlushOptions::default(); + fopts.set_wait(true); + + // Insert data with specific keys + db.put(b"a", b"value_a").unwrap(); + db.put(b"z", b"value_z").unwrap(); + db.flush(&fopts).unwrap(); + + // Test exact boundary matches + let files_exact_start = db.get_sst_files_in_range_default(Some(b"a"), None).unwrap(); + assert_eq!(files_exact_start.len(), 1); + + let files_exact_end = db.get_sst_files_in_range_default(None, Some(b"z")).unwrap(); + assert_eq!(files_exact_end.len(), 1); + + // Test range that exactly matches file boundaries + let files_exact_range = db + .get_sst_files_in_range_default(Some(b"a"), Some(b"z")) + .unwrap(); + assert_eq!(files_exact_range.len(), 1); + + // Test single key range + let files_single_key = db + .get_sst_files_in_range_default(Some(b"a"), Some(b"b")) + .unwrap(); + assert_eq!(files_single_key.len(), 1); +} + +#[test] +fn test_sst_files_in_range_binary_search_optimization() { + // Test the binary search optimization for levels 1+ where files are non-overlapping and sorted + // The algorithm finds the first overlapping file using binary search, then iterates forward + // until it encounters a file whose smallest_key >= end_key + let path = tempdir_with_prefix("_rust_rocksdb_test_sst_files_range_binary_search"); + let mut opts = DBOptions::new(); + opts.create_if_missing(true); + + let mut cf_opts = ColumnFamilyOptions::new(); + cf_opts.set_disable_auto_compactions(true); + cf_opts.set_target_file_size_base(1024); // Small target size to create multiple files + + let db = DB::open_cf( + opts, + path.path().to_str().unwrap(), + vec![("default", cf_opts)], + ) + .unwrap(); + + let cf_handle = db.cf_handle("default").unwrap(); + let mut fopts = FlushOptions::default(); + fopts.set_wait(true); + + // Create multiple files with distinct key ranges to test binary search + // File 1: keys 000-099 + for i in 0..20 { + let key = format!("key{:03}", i); + let value = format!("value_{}", i); + db.put(key.as_bytes(), value.as_bytes()).unwrap(); + } + db.flush(&fopts).unwrap(); + + // File 2: keys 100-199 + for i in 100..120 { + let key = format!("key{:03}", i); + let value = format!("value_{}", i); + db.put(key.as_bytes(), value.as_bytes()).unwrap(); + } + db.flush(&fopts).unwrap(); + + // File 3: keys 200-299 + for i in 200..220 { + let key = format!("key{:03}", i); + let value = format!("value_{}", i); + db.put(key.as_bytes(), value.as_bytes()).unwrap(); + } + db.flush(&fopts).unwrap(); + + // File 4: keys 300-399 + for i in 300..320 { + let key = format!("key{:03}", i); + let value = format!("value_{}", i); + db.put(key.as_bytes(), value.as_bytes()).unwrap(); + } + db.flush(&fopts).unwrap(); + + // File 5: keys 400-499 + for i in 400..420 { + let key = format!("key{:03}", i); + let value = format!("value_{}", i); + db.put(key.as_bytes(), value.as_bytes()).unwrap(); + } + db.flush(&fopts).unwrap(); + + // Test binary search optimization with ranges that should only match specific files + // Range [150, 250) should only match files 2 and 3 + let files_in_range = db.get_sst_files_in_range(cf_handle, Some(b"key150"), Some(b"key250")); + assert_eq!(files_in_range.len(), 2); + + // Verify the files are from the expected ranges + for file in &files_in_range { + assert!(file.smallest_key >= b"key100"); + assert!(file.smallest_key <= b"key299"); + + // Test the new num_entries and num_deletions fields + assert!(file.num_entries > 0); // Each file should have entries + assert!(file.num_deletions >= 0); // Deletions can be 0 or more + assert!(file.num_entries >= file.num_deletions); // Entries should be >= deletions + } + + // Range [50, 150) should only match files 1 and 2 + let files_in_range = db.get_sst_files_in_range(cf_handle, Some(b"key050"), Some(b"key150")); + assert_eq!(files_in_range.len(), 2); + + // Range [250, 350) should only match files 3 and 4 + let files_in_range = db.get_sst_files_in_range(cf_handle, Some(b"key250"), Some(b"key350")); + assert_eq!(files_in_range.len(), 2); + + // Range [350, 450) should only match files 4 and 5 + let files_in_range = db.get_sst_files_in_range(cf_handle, Some(b"key350"), Some(b"key450")); + assert_eq!(files_in_range.len(), 2); + + // Range [450, 550) should only match file 5 + let files_in_range = db.get_sst_files_in_range(cf_handle, Some(b"key450"), Some(b"key550")); + assert_eq!(files_in_range.len(), 1); + + // Range [25, 75) should only match file 1 + let files_in_range = db.get_sst_files_in_range(cf_handle, Some(b"key025"), Some(b"key075")); + assert_eq!(files_in_range.len(), 1); + + // Range that spans all files + let files_in_range = db.get_sst_files_in_range(cf_handle, Some(b"key000"), Some(b"key500")); + assert_eq!(files_in_range.len(), 5); + + // Range that matches no files + let files_in_range = db.get_sst_files_in_range(cf_handle, Some(b"key500"), Some(b"key600")); + assert_eq!(files_in_range.len(), 0); +} + +#[test] +fn test_sst_file_info_entries_and_deletions() { + let path = tempdir_with_prefix("_rust_rocksdb_test_sst_file_info_entries_deletions"); + let mut opts = DBOptions::new(); + opts.create_if_missing(true); + + let mut cf_opts = ColumnFamilyOptions::new(); + cf_opts.set_disable_auto_compactions(true); + + let db = DB::open_cf( + opts, + path.path().to_str().unwrap(), + vec![("default", cf_opts)], + ) + .unwrap(); + + let cf_handle = db.cf_handle("default").unwrap(); + let mut fopts = FlushOptions::default(); + fopts.set_wait(true); + + // Insert some data + for i in 0..10 { + let key = format!("key{:03}", i); + let value = format!("value_{}", i); + db.put(key.as_bytes(), value.as_bytes()).unwrap(); + } + + // Delete some keys to create deletions + for i in 2..5 { + let key = format!("key{:03}", i); + db.delete(key.as_bytes()).unwrap(); + } + + db.flush(&fopts).unwrap(); + + // Get SST file info + let files = db.get_sst_files_in_range(cf_handle, None, None); + assert!(!files.is_empty()); + + for file in &files { + // Test that we have the new fields + assert!(file.num_entries > 0); + assert!(file.num_deletions >= 0); + assert!(file.num_entries >= file.num_deletions); + + // For this test, we expect some deletions since we deleted keys 2, 3, 4 + // The exact count depends on RocksDB's internal organization + println!( + "File {}: entries={}, deletions={}", + file.name, file.num_entries, file.num_deletions + ); + } +}