Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
244 changes: 244 additions & 0 deletions examples/sst_file_info_example.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
// Copyright 2025 Contributors to rust-rocksdb
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// This example demonstrates how to use the range-based SST file metadata API
// to get information about SST files that contain data within a specific key range.

use rocksdb::{DBOptions, FlushOptions, SstFileInfo, Writable, DB};
use std::env;
use std::path::Path;

fn print_file_info(file: &SstFileInfo, index: usize) {
let smallest_str = String::from_utf8_lossy(&file.smallest_key);
let largest_str = String::from_utf8_lossy(&file.largest_key);

println!(
" File {}: {} (Level {}, {} bytes, keys: {}..{}, entries: {}, deletions: {})",
index + 1,
file.name,
file.level,
file.size,
smallest_str,
largest_str,
file.num_entries,
file.num_deletions
);
}

fn analyze_range_files(
db: &DB,
start_key: Option<&[u8]>,
end_key: Option<&[u8]>,
description: &str,
) {
println!("\n=== {} ===", description);

match db.get_sst_files_in_range_default(start_key, end_key) {
Ok(files) => {
if files.is_empty() {
println!("No SST files found in the specified range.");
} else {
println!("Found {} SST file(s) in range:", files.len());
for (i, file) in files.iter().enumerate() {
print_file_info(file, i);
}

// Calculate total size and statistics
let total_size: usize = files.iter().map(|f| f.size).sum();
let total_entries: u64 = files.iter().map(|f| f.num_entries).sum();
let total_deletions: u64 = files.iter().map(|f| f.num_deletions).sum();
println!("Total size: {} bytes", total_size);
println!("Total entries: {}", total_entries);
println!("Total deletions: {}", total_deletions);
println!(
"Deletion ratio: {:.2}%",
if total_entries > 0 {
(total_deletions as f64 / total_entries as f64) * 100.0
} else {
0.0
}
);

// Show level distribution
let mut level_counts = std::collections::HashMap::new();
for file in &files {
*level_counts.entry(file.level).or_insert(0) += 1;
}
println!("Files by level: {:?}", level_counts);
}
}
Err(e) => {
println!("Error getting files in range: {}", e);
}
}
}

fn main() -> Result<(), Box<dyn std::error::Error>> {
// Get the database path from command line arguments or use a default
let db_path = env::args()
.nth(1)
.unwrap_or_else(|| "/tmp/rocksdb_sst_file_info_example".to_string());

// Remove the existing database if it exists
// Note: In a real application, you might want to handle cleanup differently

// Open the database
let mut opts = DBOptions::new();
opts.create_if_missing(true);
let db = DB::open_default(&db_path)?;

println!("Database opened at: {}", db_path);

// Insert data in different key ranges to create multiple SST files
println!("\nInserting data in different key ranges...");
let mut flush_opts = FlushOptions::default();
flush_opts.set_wait(true);

// Group 1: Keys starting with "user_"
for i in 0..5 {
let key = format!("user_{:03}", i);
let value = format!("user_data_{}", i);
db.put(key.as_bytes(), value.as_bytes())?;
println!("Inserted: {} -> {}", key, value);
}
db.flush(&flush_opts)?;
println!("Flushed user data");

// Group 2: Keys starting with "order_"
for i in 0..5 {
let key = format!("order_{:03}", i);
let value = format!("order_data_{}", i);
db.put(key.as_bytes(), value.as_bytes())?;
println!("Inserted: {} -> {}", key, value);
}
db.flush(&flush_opts)?;
println!("Flushed order data");

// Group 3: Keys starting with "product_"
for i in 0..5 {
let key = format!("product_{:03}", i);
let value = format!("product_data_{}", i);
db.put(key.as_bytes(), value.as_bytes())?;
println!("Inserted: {} -> {}", key, value);
}
db.flush(&flush_opts)?;
println!("Flushed product data");

// Now analyze different key ranges
analyze_range_files(&db, None, None, "All SST Files");

analyze_range_files(&db, Some(b"user_000"), Some(b"user_999"), "User Data Range");

analyze_range_files(
&db,
Some(b"order_000"),
Some(b"order_999"),
"Order Data Range",
);

analyze_range_files(
&db,
Some(b"product_000"),
Some(b"product_999"),
"Product Data Range",
);

analyze_range_files(
&db,
Some(b"order_001"),
Some(b"order_003"),
"Specific Order Range",
);

analyze_range_files(
&db,
Some(b"system_"),
Some(b"system_999"),
"Non-existent Range",
);

// Demonstrate the overlap checking functionality
println!("\n=== Overlap Analysis ===");
match db.get_sst_files_in_range_default(Some(b"user_000"), Some(b"user_999")) {
Ok(files) => {
for file in &files {
println!("\nAnalyzing file: {}", file.name);

// Test different overlap scenarios
let test_ranges = [
(
Some(b"user_001" as &[u8]),
Some(b"user_003" as &[u8]),
"user_001..user_003",
),
(
Some(b"user_002" as &[u8]),
Some(b"user_005" as &[u8]),
"user_002..user_005",
),
(
Some(b"order_000" as &[u8]),
Some(b"order_999" as &[u8]),
"order_000..order_999",
),
(
Some(b"user_000" as &[u8]),
Some(b"user_999" as &[u8]),
"user_000..user_999",
),
];

for (start, end, desc) in &test_ranges {
let overlaps = file.overlaps_with_range(*start, *end);
let contained = file.is_contained_in_range(*start, *end);
println!(
" Range {}: overlaps={}, contained={}",
desc, overlaps, contained
);
}
}
}
Err(e) => {
println!("Error: {}", e);
}
}

// Demonstrate binary search optimization
println!("\n=== Binary Search Optimization ===");
println!("For levels 1 and above, SST files are non-overlapping and sorted by smallest_key.");
println!(
"The API uses binary search to find the first overlapping file, then iterates forward"
);
println!(
"until it encounters a file whose smallest_key >= end_key. This provides O(log n + k)"
);
println!(
"performance where k is the number of overlapping files, instead of O(n) for each level."
);

// Show performance benefit by analyzing a specific range
analyze_range_files(
&db,
Some(b"order_002"),
Some(b"order_004"),
"Specific Order Range (Binary Search Optimized)",
);

// Clean up
drop(db);
// Note: In a real application, you might want to handle cleanup differently
println!("\nExample completed successfully!");

Ok(())
}
10 changes: 10 additions & 0 deletions librocksdb_sys/crocksdb/c.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5886,6 +5886,16 @@ const char* crocksdb_sst_file_meta_data_largestkey(
return meta->rep.largestkey.data();
}

uint64_t crocksdb_sst_file_meta_data_num_entries(
const crocksdb_sst_file_meta_data_t* meta) {
return meta->rep.num_entries;
}

uint64_t crocksdb_sst_file_meta_data_num_deletions(
const crocksdb_sst_file_meta_data_t* meta) {
return meta->rep.num_deletions;
}

crocksdb_compaction_options_t* crocksdb_compaction_options_create() {
return new crocksdb_compaction_options_t();
}
Expand Down
7 changes: 6 additions & 1 deletion librocksdb_sys/crocksdb/crocksdb/c.h
Original file line number Diff line number Diff line change
Expand Up @@ -2007,8 +2007,9 @@ crocksdb_slicetransform_create(
unsigned char (*in_domain)(void*, const char* key, size_t length),
unsigned char (*in_range)(void*, const char* key, size_t length),
const char* (*name)(void*));

extern C_ROCKSDB_LIBRARY_API crocksdb_slicetransform_t*
crocksdb_slicetransform_create_fixed_prefix(size_t);
crocksdb_slicetransform_create_fixed_prefix(size_t);
extern C_ROCKSDB_LIBRARY_API crocksdb_slicetransform_t*
crocksdb_slicetransform_create_noop();
extern C_ROCKSDB_LIBRARY_API void crocksdb_slicetransform_destroy(
Expand Down Expand Up @@ -2306,6 +2307,10 @@ crocksdb_sst_file_meta_data_smallestkey(const crocksdb_sst_file_meta_data_t*,
size_t*);
extern C_ROCKSDB_LIBRARY_API const char* crocksdb_sst_file_meta_data_largestkey(
const crocksdb_sst_file_meta_data_t*, size_t*);
extern C_ROCKSDB_LIBRARY_API uint64_t
crocksdb_sst_file_meta_data_num_entries(const crocksdb_sst_file_meta_data_t*);
extern C_ROCKSDB_LIBRARY_API uint64_t
crocksdb_sst_file_meta_data_num_deletions(const crocksdb_sst_file_meta_data_t*);

/* CompactFiles */
extern C_ROCKSDB_LIBRARY_API crocksdb_compaction_options_t*
Expand Down
2 changes: 2 additions & 0 deletions librocksdb_sys/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2590,6 +2590,8 @@ extern "C" {
meta: *const DBSstFileMetaData,
len: *mut size_t,
) -> *const c_char;
pub fn crocksdb_sst_file_meta_data_num_entries(meta: *const DBSstFileMetaData) -> u64;
pub fn crocksdb_sst_file_meta_data_num_deletions(meta: *const DBSstFileMetaData) -> u64;

pub fn crocksdb_livefiles(db: *mut DBInstance) -> *mut DBLivefiles;
pub fn crocksdb_livefiles_count(lf: *const DBLivefiles) -> size_t;
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ pub use librocksdb_sys::{
};
pub use logger::Logger;
pub use merge_operator::MergeOperands;
pub use metadata::{ColumnFamilyMetaData, LevelMetaData, SstFileMetaData};
pub use metadata::{ColumnFamilyMetaData, LevelMetaData, SstFileInfo, SstFileMetaData};
pub use perf_context::{
get_perf_level, set_perf_flags, set_perf_level, IOStatsContext, PerfContext, PerfFlag,
PerfFlags, PerfLevel,
Expand Down
81 changes: 81 additions & 0 deletions src/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,14 @@ impl<'a> SstFileMetaData<'a> {
slice::from_raw_parts(ptr as *const u8, len)
}
}

pub fn get_num_entries(&self) -> u64 {
unsafe { crocksdb_ffi::crocksdb_sst_file_meta_data_num_entries(self.inner) }
}

pub fn get_num_deletions(&self) -> u64 {
unsafe { crocksdb_ffi::crocksdb_sst_file_meta_data_num_deletions(self.inner) }
}
}

pub struct LiveFiles {
Expand Down Expand Up @@ -172,3 +180,76 @@ impl Drop for LiveFiles {
}
}
}

/// Represents metadata for an SST file with its key range information.
#[derive(Debug, Clone)]
pub struct SstFileInfo {
/// The name/path of the SST file
pub name: String,
/// The size of the file in bytes
pub size: usize,
/// The level where this file resides
pub level: usize,
/// The smallest key in this file
pub smallest_key: Vec<u8>,
/// The largest key in this file
pub largest_key: Vec<u8>,
/// The number of entries in this file
pub num_entries: u64,
/// The number of deletions in this file
pub num_deletions: u64,
}

impl SstFileInfo {
/// Check if this SST file overlaps with the given key range.
///
/// # Arguments
/// * `start_key` - The start of the key range (inclusive), None means no lower bound
/// * `end_key` - The end of the key range (exclusive), None means no upper bound
///
/// # Returns
/// `true` if the file overlaps with the range, `false` otherwise
pub fn overlaps_with_range(&self, start_key: Option<&[u8]>, end_key: Option<&[u8]>) -> bool {
// Check if file's largest key is before the start of the range
if let Some(start) = start_key {
if self.largest_key.as_slice() < start {
return false;
}
}

// Check if file's smallest key is at or after the end of the range
if let Some(end) = end_key {
if self.smallest_key.as_slice() >= end {
return false;
}
}

true
}

/// Check if this SST file is completely contained within the given key range.
///
/// # Arguments
/// * `start_key` - The start of the key range (inclusive), None means no lower bound
/// * `end_key` - The end of the key range (exclusive), None means no upper bound
///
/// # Returns
/// `true` if the file is completely contained within the range, `false` otherwise
pub fn is_contained_in_range(&self, start_key: Option<&[u8]>, end_key: Option<&[u8]>) -> bool {
// Check if file's smallest key is at or after the start of the range
if let Some(start) = start_key {
if self.smallest_key.as_slice() < start {
return false;
}
}

// Check if file's largest key is before the end of the range
if let Some(end) = end_key {
if self.largest_key.as_slice() >= end {
return false;
}
}

true
}
}
Loading