diff --git a/Cargo.toml b/Cargo.toml index ccc35b556..810ae3437 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,29 +16,48 @@ categories = ["data-structures", "database-implementations", "algorithms"] name = "lsm_tree" path = "src/lib.rs" +[[bin]] +name = "lsm" +path = "src/tool.rs" +required-features = ["tool"] + [features] default = [] lz4 = ["dep:lz4_flex"] bytes_1 = ["dep:bytes"] metrics = [] +tool = ["dep:clap", "dep:shlex", "dep:rustyline", "dep:parse-size", "dep:humansize", + "dep:tracing", "dep:tracing-log", "dep:tracing-panic", "dep:tracing-subscriber"] [dependencies] bytes = { version = "1", optional = true } byteorder = { package = "byteorder-lite", version = "0.1.0" } byteview = "~0.10.0" +clap = { version = "4", features = ["derive"], optional = true } crossbeam-skiplist = "0.1.3" enum_dispatch = "0.3.13" +humansize = { version = "2.1", optional = true } interval-heap = "0.0.5" log = "0.4.27" lz4_flex = { version = "0.11.5", optional = true, default-features = false } +parse-size = { version = "1.0", optional = true } quick_cache = { version = "0.6.16", default-features = false, features = [] } rustc-hash = "2.1.1" +rustyline = { version = "15", optional = true } self_cell = "1.2.0" sfa = "~1.0.0" +shlex = { version = "1", optional = true } tempfile = "3.20.0" +tracing = { version = "0.1", optional = true } +tracing-log = { version = "0.2", optional = true } +tracing-panic = { version = "0.1.2", optional = true } +tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt", "registry"], optional = true } varint-rs = "2.2.0" xxhash-rust = { version = "0.8.15", features = ["xxh3"] } +[target.'cfg(unix)'.dev-dependencies] +rexpect = "0.5" + [dev-dependencies] criterion = { version = "0.8.0", features = ["html_reports"] } fs_extra = "1.3.0" diff --git a/README.md b/README.md index b18a3469c..c987a92bb 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,176 @@ Uses [`bytes`](https://github.com/tokio-rs/bytes) as the underlying `Slice` type *Disabled by default.* +### tool + +Enables the `lsm` CLI binary for interacting with LSM trees from the command line. + +*Disabled by default.* + +## CLI Tool + +The crate includes an optional CLI tool (`lsm`) for inspecting and manipulating LSM trees. + +### Installation + +```bash +cargo install lsm-tree --features tool +``` + +Or build from source: + +```bash +cargo build --release --features tool +``` + +### Usage + +The tool can be used either with direct commands or in interactive shell mode. + +#### Direct Commands + +```bash +# Set a key-value pair +lsm /path/to/db set mykey "my value" + +# Get a value +lsm /path/to/db get mykey + +# Delete a key +lsm /path/to/db del mykey + +# List all keys (aliases: list, ls) +lsm /path/to/db scan + +# List keys with a prefix +lsm /path/to/db scan "user:" + +# List keys in a range [start, end) +lsm /path/to/db range a z + +# Count items +lsm /path/to/db count + +# Show database info +lsm /path/to/db info + +# Flush memtable to disk +lsm /path/to/db flush + +# Run compaction +lsm /path/to/db compact +``` + +#### Interactive Shell + +Start an interactive shell by running without a command: + +```bash +lsm /path/to/db +``` + +The shell supports all the above commands plus: + +- `begin` - Start a batch/transaction +- `commit` - Commit the current batch +- `rollback` - Discard the current batch +- `exit` / `quit` - Exit (flushes data first) +- `abort` - Exit without flushing +- `help` - Show available commands + +#### Batch Operations + +The shell supports batching multiple operations into an atomic unit: + +``` +lsm> begin +OK (batch started) +lsm> set key1 value1 +OK (batched, ready to commit) +lsm> set key2 value2 +OK (batched, ready to commit) +lsm> del key3 +OK (batched, ready to commit) +lsm> commit +OK (batch committed, ready to flush) +``` + +While a batch is active: +- `get` reads from the batch first, then falls back to the tree +- `scan` and `range` warn that they ignore uncommitted batch operations +- `info` shows the pending batch operations +- `rollback` discards all batched operations + +#### Long Scan + +Use `-l` / `--long` to show internal entry details including sequence numbers, value types, and tombstones: + +``` +lsm> scan -l +=== Active Memtable === +key1 = value1 [seqno=0, type=Value] +key2 [seqno=1, type=Tombstone] + +=== Persisted (on disk) === +key3 = value3 [seqno=2, type=Value] + +(3 total items, 2 in memtable, 1 persisted, 1 tombstones) +``` + +#### Blob Trees with Indirect Items + +A blob tree uses key-value separation, storing large values in separate blob files and keeping indirect references (indirections) in the main LSM-tree. This improves performance for large values by reducing write amplification and improving compaction efficiency. + +To create a blob tree, use the `--blob-tree` flag along with `--separation-threshold` (or `-t`) to specify the size threshold in bytes. Values larger than this threshold will be stored as indirect items: + +```bash +# Create a blob tree with 1 KiB separation threshold +lsm --blob-tree --separation-threshold 1024 /path/to/db set largekey "very large value..." + +# Or using the short form +lsm -b -t 1KiB /path/to/db set largekey "very large value..." + +# In interactive mode +lsm --blob-tree -t 1024 /path/to/db +lsm> set largekey "very large value..." +OK (set) +lsm> flush +OK (flushed) +lsm> scan -l +=== Active Memtable === +=== Persisted (on disk) === +largekey = very large value... [seqno=0, type=Indirection] +``` + +After flushing, values that exceed the separation threshold will appear as `type=Indirection` in verbose scan output, indicating they are stored in separate blob files rather than inline in the table. + +#### Weak Tombstones + +A weak tombstone is a special type of deletion marker that provides a "single deletion" semantic. Unlike regular tombstones, weak tombstones are designed to be removed during compaction when they encounter the key they mark for deletion, making them useful for scenarios where you want to delete a key but don't need the tombstone to persist indefinitely. + +To delete a key with a weak tombstone, use the `--weak` (or `-w`) flag: + +```bash +# Delete with weak tombstone from command line +lsm /path/to/db del --weak mykey + +# Or using the short form +lsm /path/to/db del -w mykey + +# In interactive shell +lsm> del --weak mykey +OK +lsm> scan -l +=== Active Memtable === +mykey [seqno=0, type=WeakTombstone] +``` + +**Important notes:** +- Weak deletes are **not supported in batches** - they always execute immediately +- Weak tombstones appear as `type=WeakTombstone` in long scan output +- Weak tombstones are typically removed during compaction when they encounter the deleted key +- Use weak tombstones when you want single-deletion semantics rather than persistent deletion markers + ## Run unit benchmarks ```bash diff --git a/src/tool.rs b/src/tool.rs new file mode 100644 index 000000000..f4c9c57e9 --- /dev/null +++ b/src/tool.rs @@ -0,0 +1,898 @@ +// Copyright (c) 2024-present, fjall-rs +// This source code is licensed under both the Apache 2.0 and MIT License +// (found in the LICENSE-* files in the repository) + +//! CLI tool for interacting with LSM trees + +use clap::{ArgAction, CommandFactory, Parser, Subcommand}; +use humansize::{SizeFormatter, BINARY}; +use lsm_tree::config::KvSeparationOptions; +use lsm_tree::{AbstractTree, AnyTree, Config, Guard, SeqNo, SequenceNumberCounter, ValueType}; +use rustyline::DefaultEditor; +use std::cell::RefCell; +use std::collections::HashMap; +use std::io::{self, BufRead, IsTerminal, Write}; +use std::path::PathBuf; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::LazyLock; +use tracing_subscriber::{ + filter::{EnvFilter, LevelFilter}, + prelude::*, + registry::Registry, +}; + +macro_rules! die { + ($fmt:literal, $($arg:tt)*) => {{ + eprintln!($fmt, $($arg)*); + std::process::exit(1); + }}; + + ($msg:literal) => {{ + eprintln!($msg); + std::process::exit(1); + }}; + + () => {{ + eprintln!("Program terminated unexpectedly"); + std::process::exit(1); + }}; +} + +#[allow(unused_imports)] +use tracing::{debug, error, info, trace, warn}; + +pub fn init_tracing(quiet: bool, verbose: u8) -> (bool, LevelFilter) { + let is_verbose = !quiet && verbose > 0; + + let level_filter = if quiet { + LevelFilter::ERROR + } else { + match verbose { + 0 => LevelFilter::WARN, + 1 => LevelFilter::INFO, + 2 => LevelFilter::DEBUG, + _ => LevelFilter::TRACE, + } + }; + + // Bridge log crate macros to tracing (for library code that uses log::*) + tracing_log::LogTracer::init().expect("Failed to set log tracer"); + + let registry = Registry::default(); + + let env_filter = EnvFilter::builder() + .with_default_directive(level_filter.into()) + .with_env_var("LSM_LOG") + .from_env_lossy() + .add_directive( + "rustyline=warn" + .parse() + .expect("Failed to parse rustyline directive"), + ); + + let subscriber = registry.with(env_filter).with( + tracing_subscriber::fmt::layer() + .with_writer(std::io::stderr) + .compact(), + ); + + if tracing::subscriber::set_global_default(subscriber).is_err() { + die!("INTERNAL ERROR: setting default tracing::subscriber failed"); + } + + let prev_hook = std::panic::take_hook(); + std::panic::set_hook(Box::new(move |info| { + tracing_panic::panic_hook(info); + prev_hook(info); // daisy-chain to old panic hook + })); + + (is_verbose, level_filter) +} + +fn parse_size_as_u32(s: &str) -> Result { + let cfg = parse_size::Config::new().with_binary(); + cfg.parse_size(s) + .map(|size| size as u32) + .map_err(|e| e.to_string()) +} + +static DEFAULT_KV_SEPARATION_OPTIONS: LazyLock = + LazyLock::new(|| KvSeparationOptions::default()); +static DEFAULT_SEPARATION_THRESHOLD: LazyLock = LazyLock::new(|| { + SizeFormatter::new(DEFAULT_KV_SEPARATION_OPTIONS.separation_threshold, BINARY).to_string() +}); + +/// CLI tool for interacting with LSM trees +#[derive(Parser, Debug)] +#[command(name = "lsm")] +#[command(about = "CLI tool for interacting with LSM trees")] +struct ToolArgs { + /// Suppress all output except for errors. This overrides the -v flag. + #[arg(short, long, global = true)] + quiet: bool, + + /// Turn on verbose output. Supply -v multiple times to increase verbosity. + #[arg(short, long, action = ArgAction::Count, global = true)] + verbose: u8, + + /// Path to the LSM tree directory (will be created if it doesn't exist) + lsm_path: PathBuf, + + /// Key/Value Separation Threshold (e.g., "1KiB", "4096") + #[arg( + short = 't', long, + default_value = &**DEFAULT_SEPARATION_THRESHOLD, + value_parser = parse_size_as_u32, + value_name = "THRESHOLD", + )] + separation_threshold: u32, + + /// Open as Blob Tree + #[arg(short, long, default_value_t = false)] + blob_tree: bool, + + /// Command to run (if omitted, starts interactive shell) + #[command(subcommand)] + command: Option, +} + +#[derive(Subcommand, Debug, Clone)] +enum ToolCommand { + /// Get the value for a key + Get { + /// The key to look up + key: String, + }, + /// Set a key-value pair + Set { + /// The key to set + key: String, + /// The value to store + value: String, + }, + /// Delete a key + Del { + /// The key to delete + key: String, + /// Use weak tombstone instead of regular tombstone + #[arg(short = 'w', long = "weak")] + weak: bool, + }, + /// List all keys, optionally filtered by prefix + #[command(visible_alias = "list", visible_alias = "ls")] + Scan { + /// Optional prefix to filter keys + prefix: Option, + + /// Show internal key fields (seqno, value_type) + #[arg(short = 'l', long = "long")] + long: bool, + }, + #[command(hide = true, alias = "ll")] + ScanLong { + /// Optional prefix to filter keys + prefix: Option, + }, + /// List keys in a range [start, end) + Range { + /// Start of the range (inclusive) + start: String, + /// End of the range (exclusive) + end: String, + }, + /// Count the number of items + Count, + /// Flush memtable to disk + Flush, + /// Run major compaction + Compact, + /// Show tree statistics + Info, +} + +// Internal shell commands, include all external tool commands +#[derive(Parser, Debug)] +#[command(name = "")] +#[command(no_binary_name = true)] +#[command(disable_version_flag = true)] +#[command(help_template = " +{version} + +Available Commands: + +{subcommands} + +Use `help COMMAND` or `COMMAND --help` for more details. + +")] + +struct ShellArgs { + #[command(subcommand)] + command: ShellCommand, +} + +// Shell commands (including ones not available from CLI) +#[derive(Subcommand, Debug, Clone)] +enum ShellCommand { + #[command(flatten)] + ToolCmd(ToolCommand), + + /// Exit the current shell (with implicit flush) + #[command(visible_alias = "quit")] + Exit, + /// Abort the curent shell (without flush) + Abort, + /// Begin a new batch (transaction) + Begin, + /// Commit the current batch + Commit, + /// Rollback (discard) the current batch + Rollback, +} + +/// A pending operation in a batch +#[derive(Debug, Clone)] +enum BatchOp { + Set { key: String, value: String }, + Del { key: String }, +} + +/// A batch of pending operations +#[derive(Debug, Default)] +struct Batch { + /// Operations in order they were added + ops: Vec, + /// Current state of keys in the batch (for reads) + state: HashMap>, +} + +struct Session { + tree: AnyTree, + seqno: AtomicU64, + path: PathBuf, + batch: RefCell>, +} + +impl Session { + fn open(path: PathBuf, separation_threshold: u32, blob_tree: bool) -> lsm_tree::Result { + let config = Config::new( + &path, + SequenceNumberCounter::default(), + SequenceNumberCounter::default(), + ); + + let config = if blob_tree { + let kv_opts = KvSeparationOptions::default().separation_threshold(separation_threshold); + config.with_kv_separation(Some(kv_opts)) + } else { + config + }; + + let tree = config.open()?; + + // Start seqno from highest existing seqno + 1, or 0 if empty + let start_seqno = tree.get_highest_seqno().map(|s| s + 1).unwrap_or(0); + + Ok(Self { + tree, + seqno: AtomicU64::new(start_seqno), + path, + batch: RefCell::new(None), + }) + } + + fn next_seqno(&self) -> SeqNo { + self.seqno.fetch_add(1, Ordering::SeqCst) + } + + fn current_seqno(&self) -> SeqNo { + self.seqno.load(Ordering::SeqCst) + } + + fn has_batch(&self) -> bool { + self.batch.borrow().is_some() + } + + fn begin_batch(&self) -> bool { + let mut batch = self.batch.borrow_mut(); + if batch.is_some() { + false + } else { + *batch = Some(Batch::default()); + true + } + } + + fn rollback_batch(&self) -> bool { + let mut batch = self.batch.borrow_mut(); + if batch.is_some() { + *batch = None; + true + } else { + false + } + } + + fn commit_batch(&self) -> bool { + let mut batch_ref = self.batch.borrow_mut(); + if let Some(batch) = batch_ref.take() { + for op in batch.ops { + match op { + BatchOp::Set { key, value } => { + let seqno = self.next_seqno(); + self.tree.insert(key.as_bytes(), value.as_bytes(), seqno); + } + BatchOp::Del { key } => { + let seqno = self.next_seqno(); + self.tree.remove(key.as_bytes(), seqno); + } + } + } + true + } else { + false + } + } + + fn batch_set(&self, key: String, value: String) { + let mut batch = self.batch.borrow_mut(); + if let Some(ref mut b) = *batch { + b.state.insert(key.clone(), Some(value.clone())); + b.ops.push(BatchOp::Set { key, value }); + } + } + + fn batch_del(&self, key: String) { + let mut batch = self.batch.borrow_mut(); + if let Some(ref mut b) = *batch { + b.state.insert(key.clone(), None); + b.ops.push(BatchOp::Del { key }); + } + } + + fn batch_get(&self, key: &str) -> Option> { + let batch = self.batch.borrow(); + if let Some(ref b) = *batch { + b.state.get(key).cloned() + } else { + None + } + } + + fn batch_len(&self) -> usize { + let batch = self.batch.borrow(); + if let Some(ref b) = *batch { + b.ops.len() + } else { + 0 + } + } +} + +fn print_info(session: &Session) { + // If there's an active batch, show its contents + let batch = session.batch.borrow(); + if let Some(ref b) = *batch { + println!("Active batch ({} operations):", b.ops.len()); + for (i, op) in b.ops.iter().enumerate() { + match op { + BatchOp::Set { key, value } => { + let value_display = if value.len() > 50 { + format!("{}...", &value[..50]) + } else { + value.clone() + }; + println!(" {}. SET {} = {}", i + 1, key, value_display); + } + BatchOp::Del { key } => { + println!(" {}. DEL {}", i + 1, key); + } + } + } + println!(); + } + drop(batch); + + println!("Path: {}", session.path.display()); + println!("Tables: {}", session.tree.table_count()); + println!("Approximate items: {}", session.tree.approximate_len()); + println!("Disk space: {} bytes", session.tree.disk_space()); + println!("Sealed memtables: {}", session.tree.sealed_memtable_count()); + println!("Current seqno: {}", session.current_seqno()); + if let Some(seqno) = session.tree.get_highest_seqno() { + println!("Highest seqno: {}", seqno); + } + if let Some(seqno) = session.tree.get_highest_persisted_seqno() { + println!("Highest persisted seqno: {}", seqno); + } + println!("L0 runs: {}", session.tree.l0_run_count()); + for level in 0..7 { + if let Some(count) = session.tree.level_table_count(level) { + if count > 0 { + println!(" L{} tables: {}", level, count); + } + } + } +} + +fn handle_get(session: &Session, key: &str) { + // Check batch first if one exists + if let Some(batch_value) = session.batch_get(key) { + match batch_value { + Some(value) => println!("{}", value), + None => println!("(deleted in batch)"), + } + return; + } + + match session.tree.get(key.as_bytes(), SeqNo::MAX) { + Ok(Some(value)) => match std::str::from_utf8(&value) { + Ok(s) => println!("{}", s), + Err(_) => println!("{:?}", value.as_ref()), + }, + Ok(None) => println!("(not found)"), + Err(e) => eprintln!("Error: {}", e), + } +} + +fn handle_set(session: &Session, key: &str, value: &str, flush: bool) { + // If batch exists, add to batch instead + if session.has_batch() { + session.batch_set(key.to_string(), value.to_string()); + println!("OK (batched, ready to commit)"); + return; + } + + let seqno = session.next_seqno(); + session.tree.insert(key.as_bytes(), value.as_bytes(), seqno); + if flush { + if let Err(e) = session.tree.flush_active_memtable(0) { + eprintln!("Error flushing: {}", e); + return; + } + } + println!("OK (set)"); +} + +fn handle_del(session: &Session, key: &str, weak: bool, flush: bool) { + // If batch exists and not weak delete, add to batch instead + if session.has_batch() && !weak { + session.batch_del(key.to_string()); + println!("OK (batched, ready to commit)"); + return; + } + + // Note: weak delete is not supported in batches, so we always do direct delete + let seqno = session.next_seqno(); + if weak { + session.tree.remove_weak(key.as_bytes(), seqno); + } else { + session.tree.remove(key.as_bytes(), seqno); + } + if flush { + if let Err(e) = session.tree.flush_active_memtable(0) { + eprintln!("Error flushing: {}", e); + return; + } + } + println!("OK"); +} + +fn handle_scan(session: &Session, prefix: Option<&str>, long: bool) { + if session.has_batch() { + eprintln!("Warning: scan ignores uncommitted batch operations"); + } + + if long { + // Long mode: iterate over memtable directly to see all internal values + // including tombstones, then show persisted entries + handle_scan_long(session, prefix); + } else { + // Normal mode: use regular iterator (filters tombstones) + handle_scan_normal(session, prefix); + } +} + +fn handle_scan_normal(session: &Session, prefix: Option<&str>) { + let iter: Box + Send> = match prefix { + Some(p) => session.tree.prefix(p.as_bytes(), SeqNo::MAX, None), + None => session.tree.iter(SeqNo::MAX, None), + }; + + let mut count = 0; + for item in iter { + match item.into_inner() { + Ok((key, value)) => { + let key_str = String::from_utf8_lossy(&key); + let value_str = match std::str::from_utf8(&value) { + Ok(s) => s.to_string(), + Err(_) => format!("{:?}", value.as_ref()), + }; + println!("{} = {}", key_str, value_str); + count += 1; + } + Err(e) => { + eprintln!("Error reading item: {}", e); + } + } + } + println!("OK ({} items)", count); +} + +fn format_value_type(vt: ValueType) -> &'static str { + match vt { + ValueType::Value => "Value", + ValueType::Tombstone => "Tombstone", + ValueType::WeakTombstone => "WeakTombstone", + ValueType::Indirection => "Indirection", + } +} + +fn handle_scan_long(session: &Session, prefix: Option<&str>) { + let mut count = 0; + let mut tombstone_count = 0; + + // First, scan the active memtable (includes tombstones) + let memtable = session.tree.active_memtable(); + println!("=== Active Memtable ==="); + for item in memtable.iter() { + let key = &item.key.user_key; + let key_str = String::from_utf8_lossy(key); + + // Apply prefix filter if specified + if let Some(p) = prefix { + if !key_str.starts_with(p) { + continue; + } + } + + let value_type = format_value_type(item.key.value_type); + let value_len = item.value.len(); + let is_tombstone = item.key.value_type.is_tombstone(); + + if is_tombstone { + println!( + "{} [len={}, seqno={}, type={}]", + key_str, value_len, item.key.seqno, value_type + ); + tombstone_count += 1; + } else { + let value_str = match std::str::from_utf8(&item.value) { + Ok(s) => { + if s.len() > 50 { + format!("{}...", &s[..50]) + } else { + s.to_string() + } + } + Err(_) => format!("{:?}", item.value.as_ref()), + }; + println!( + "{} = {} [len={}, seqno={}, type={}]", + key_str, value_str, value_len, item.key.seqno, value_type + ); + } + count += 1; + } + + // Then show persisted entries (from disk) + println!("\n=== Persisted (on disk) ==="); + let iter: Box + Send> = match prefix { + Some(p) => session.tree.prefix(p.as_bytes(), SeqNo::MAX, None), + None => session.tree.iter(SeqNo::MAX, None), + }; + + let mut persisted_count = 0; + for item in iter { + match item.into_inner() { + Ok((key, value)) => { + // Skip if this key is in the memtable (already shown) + if memtable.get(&key, SeqNo::MAX).is_some() { + continue; + } + + let key_str = String::from_utf8_lossy(&key); + let value_str = match std::str::from_utf8(&value) { + Ok(s) => s.to_string(), + Err(_) => format!("{:?}", value.as_ref()), + }; + + // Get internal entry for metadata + match session.tree.get_internal_entry(&key, SeqNo::MAX) { + Ok(Some(internal)) => { + let value_type = format_value_type(internal.key.value_type); + let value_len = value.len(); + println!( + "{} = {} [len={}, seqno={}, type={}]", + key_str, value_str, value_len, internal.key.seqno, value_type + ); + } + Ok(None) => { + println!("{} = {} [no internal entry]", key_str, value_str); + } + Err(e) => { + println!("{} = {} [error: {}]", key_str, value_str, e); + } + } + persisted_count += 1; + count += 1; + } + Err(e) => { + eprintln!("Error reading item: {}", e); + } + } + } + + println!( + "\n({} total items, {} in memtable, {} persisted, {} tombstones)", + count, + count - persisted_count, + persisted_count, + tombstone_count + ); + + // Also show tree-level tombstone statistics + let tree_tombstones = session.tree.tombstone_count(); + let tree_weak_tombstones = session.tree.weak_tombstone_count(); + if tree_tombstones > 0 || tree_weak_tombstones > 0 { + println!( + "Tree statistics: {} tombstones, {} weak tombstones (approximate)", + tree_tombstones, tree_weak_tombstones + ); + } +} + +fn handle_range(session: &Session, start: &str, end: &str) { + if session.has_batch() { + eprintln!("Warning: range ignores uncommitted batch operations"); + } + + let iter = session + .tree + .range(start.as_bytes()..end.as_bytes(), SeqNo::MAX, None); + + let mut count = 0; + for item in iter { + match item.into_inner() { + Ok((key, value)) => { + let key_str = String::from_utf8_lossy(&key); + let value_str = match std::str::from_utf8(&value) { + Ok(s) => s.to_string(), + Err(_) => format!("{:?}", value.as_ref()), + }; + println!("{} = {}", key_str, value_str); + count += 1; + } + Err(e) => { + eprintln!("Error reading item: {}", e); + } + } + } + println!("({} items)", count); +} + +fn handle_count(session: &Session) { + match session.tree.len(SeqNo::MAX, None) { + Ok(count) => println!("{}", count), + Err(e) => eprintln!("Error: {}", e), + } +} + +fn handle_flush(session: &Session) { + if session.has_batch() { + eprintln!( + "WARNING: Active batch ({} operations) still pending", + session.batch_len() + ); + } + match session.tree.flush_active_memtable(0) { + Ok(()) => println!("OK (flushed)"), + Err(e) => eprintln!("Error: {}", e), + } +} + +fn handle_compact(session: &Session) { + match session.tree.major_compact(64 * 1024 * 1024, 0) { + Ok(()) => println!("OK"), + Err(e) => eprintln!("Error: {}", e), + } +} + +/// Result of executing a command +enum CommandResult { + Continue, + Exit, +} + +/// Execute a parsed command +fn execute_command(session: &Session, cmd: ToolCommand, auto_flush: bool) -> CommandResult { + match cmd { + ToolCommand::Get { key } => handle_get(session, &key), + ToolCommand::Set { key, value } => handle_set(session, &key, &value, auto_flush), + ToolCommand::Del { key, weak } => handle_del(session, &key, weak, auto_flush), + ToolCommand::Scan { prefix, long } => handle_scan(session, prefix.as_deref(), long), + ToolCommand::ScanLong { prefix } => handle_scan_long(session, prefix.as_deref()), + ToolCommand::Range { start, end } => handle_range(session, &start, &end), + ToolCommand::Count => handle_count(session), + ToolCommand::Flush => handle_flush(session), + ToolCommand::Compact => handle_compact(session), + ToolCommand::Info => print_info(session), + } + CommandResult::Continue +} + +/// Execute a shell-only command +fn execute_shell_command(session: &Session, cmd: ShellCommand, auto_flush: bool) -> CommandResult { + match cmd { + ShellCommand::ToolCmd(tool_cmd) => execute_command(session, tool_cmd, auto_flush), + ShellCommand::Exit => { + if session.has_batch() { + eprintln!("Warning: discarding uncommitted batch"); + session.rollback_batch(); + } + handle_flush(session); + CommandResult::Exit + } + ShellCommand::Abort => { + if session.has_batch() { + eprintln!("Warning: discarding uncommitted batch"); + } + CommandResult::Exit + } + ShellCommand::Begin => { + if session.begin_batch() { + println!("OK (batch started)"); + } else { + eprintln!("Error: batch already active"); + } + CommandResult::Continue + } + ShellCommand::Commit => { + if session.commit_batch() { + println!("OK (batch committed, ready to flush)"); + } else { + eprintln!("Error: no active batch"); + } + CommandResult::Continue + } + ShellCommand::Rollback => { + if session.rollback_batch() { + println!("OK (batch rolled back)"); + } else { + eprintln!("Error: no active batch"); + } + CommandResult::Continue + } + } +} + +/// Parse and run a shell command line +fn run_shell_command(session: &Session, line: &str) -> CommandResult { + let line = line.trim(); + if line.is_empty() { + return CommandResult::Continue; + } + + let tokens = match shlex::split(line) { + Some(t) if !t.is_empty() => t, + Some(_) => return CommandResult::Continue, + None => { + eprintln!("error: unclosed quote"); + return CommandResult::Continue; + } + }; + + // Parse remaining commands + match ShellArgs::try_parse_from(&tokens) { + Ok(args) => execute_shell_command(session, args.command, false), + Err(e) => { + // Print clap's error message + eprintln!("{}", e); + CommandResult::Continue + } + } +} + +fn run_shell(session: &Session) { + if io::stdin().is_terminal() { + run_shell_interactive(session); + } else { + run_shell_non_interactive(session); + } +} + +fn run_shell_interactive(session: &Session) { + println!("Welcome to the LSM-tree shell"); + println!("Type 'help' for available commands, 'exit' to quit.\n"); + + let mut rl = match DefaultEditor::new() { + Ok(editor) => editor, + Err(e) => { + eprintln!("Error initializing line editor: {}", e); + return; + } + }; + + loop { + match rl.readline("lsm> ") { + Ok(line) => { + rl.add_history_entry(&line); + if let CommandResult::Exit = run_shell_command(session, &line) { + break; + } + } + Err(rustyline::error::ReadlineError::Interrupted) => { + // Ignore Ctrl+C, just show a new prompt + continue; + } + Err(rustyline::error::ReadlineError::Eof) => { + println!(); + break; + } + Err(e) => { + eprintln!("Error reading input: {}", e); + break; + } + } + } +} + +fn run_shell_non_interactive(session: &Session) { + let stdin = io::stdin(); + let mut stdout = io::stdout(); + + loop { + if stdout.flush().is_err() { + die!("can't flush stdout"); + } + + let mut line = String::new(); + match stdin.lock().read_line(&mut line) { + Ok(0) => { + // EOF + break; + } + Ok(_) => { + if let CommandResult::Exit = run_shell_command(session, &line) { + break; + } + } + Err(e) => { + die!("Error reading input: {}", e); + } + } + } +} + +fn main() { + let args = ToolArgs::parse(); + let (verbose, level_filter) = init_tracing(args.quiet, args.verbose); + + let cmd = ToolArgs::command(); + + info!( + "starting {} ({} {}), log level: {level_filter}", + cmd.get_name(), + env!("CARGO_PKG_NAME"), + env!("CARGO_PKG_VERSION") + ); + + let session = match Session::open(args.lsm_path, args.separation_threshold, args.blob_tree) { + Ok(s) => s, + Err(e) => { + let note = if verbose { + "" + } else { + ". Note: Use -v (one or multiple times) for more information" + }; + die!("Error opening tree: {}{}", e, note); + } + }; + + match args.command { + Some(cmd) => { + execute_command(&session, cmd, true); + } + None => run_shell(&session), + } +} diff --git a/tests/tool_tests.rs b/tests/tool_tests.rs new file mode 100644 index 000000000..4046fc07f --- /dev/null +++ b/tests/tool_tests.rs @@ -0,0 +1,1794 @@ +// Copyright (c) 2024-present, fjall-rs +// This source code is licensed under both the Apache 2.0 and MIT License +// (found in the LICENSE-* files in the repository) + +#![cfg(feature = "tool")] + +//! Integration tests for the `lsm` CLI tool binary. +//! +//! These tests run the actual binary and verify its behavior. + +use std::io::Write; +use std::path::PathBuf; +use std::process::{Command, Stdio}; + +/// Get the path to the lsm binary +fn lsm_binary() -> std::path::PathBuf { + let mut path = std::env::current_exe().unwrap(); + path.pop(); // remove test binary name + path.pop(); // remove deps + path.push("lsm"); + path +} + +/// Run the lsm binary with flags and CLI arguments +fn run_cli(db_path: &std::path::Path, flags: &[&str], args: &[&str]) -> (String, String, bool) { + let mut cmd_args = flags.to_vec(); + cmd_args.push(db_path.to_str().unwrap()); + cmd_args.extend(args); + + let output = Command::new(lsm_binary()) + .args(&cmd_args) + .output() + .expect("Failed to execute lsm binary"); + + let stdout = String::from_utf8_lossy(&output.stdout).to_string(); + let stderr = String::from_utf8_lossy(&output.stderr).to_string(); + (stdout, stderr, output.status.success()) +} + +/// Run the lsm binary in shell mode with piped input +fn run_shell(db_path: &std::path::Path, input: &str) -> (String, String, bool) { + let mut child = Command::new(lsm_binary()) + .arg(db_path) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .expect("Failed to spawn lsm binary"); + + { + let stdin = child.stdin.as_mut().expect("Failed to open stdin"); + stdin + .write_all(input.as_bytes()) + .expect("Failed to write to stdin"); + } + + let output = child.wait_with_output().expect("Failed to read output"); + let stdout = String::from_utf8_lossy(&output.stdout).to_string(); + let stderr = String::from_utf8_lossy(&output.stderr).to_string(); + (stdout, stderr, output.status.success()) +} + +/// Create a temporary directory for test databases +fn temp_db() -> tempfile::TempDir { + tempfile::tempdir().expect("Failed to create temp dir") +} + +// ============================================================================ +// CLI Command Tests +// ============================================================================ + +#[test] +fn test_cli_set_and_get() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + // Set a value + let (stdout, stderr, success) = run_cli(&db_path, &[], &["set", "mykey", "myvalue"]); + assert!(success, "set failed: {}", stderr); + assert!(stdout.contains("OK"), "Expected OK in output: {}", stdout); + + // Get the value back + let (stdout, stderr, success) = run_cli(&db_path, &[], &["get", "mykey"]); + assert!(success, "get failed: {}", stderr); + assert!( + stdout.trim() == "myvalue", + "Expected 'myvalue', got: {}", + stdout + ); +} + +#[test] +fn test_cli_get_nonexistent() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (stdout, _, success) = run_cli(&db_path, &[], &["get", "nonexistent"]); + assert!(success); + assert!( + stdout.contains("(not found)"), + "Expected '(not found)': {}", + stdout + ); +} + +#[test] +fn test_cli_del() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + // Set then delete + run_cli(&db_path, &[], &["set", "key1", "value1"]); + let (stdout, stderr, success) = run_cli(&db_path, &[], &["del", "key1"]); + assert!(success, "del failed: {}", stderr); + assert!(stdout.contains("OK"), "Expected OK: {}", stdout); + + // Verify deleted + let (stdout, _, _) = run_cli(&db_path, &[], &["get", "key1"]); + assert!( + stdout.contains("(not found)"), + "Key should be deleted: {}", + stdout + ); +} + +#[test] +fn test_cli_scan() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + // Add some data + run_cli(&db_path, &[], &["set", "apple", "red"]); + run_cli(&db_path, &[], &["set", "banana", "yellow"]); + run_cli(&db_path, &[], &["set", "cherry", "red"]); + + // Scan all + let (stdout, _, success) = run_cli(&db_path, &[], &["scan"]); + assert!(success); + assert!(stdout.contains("apple = red")); + assert!(stdout.contains("banana = yellow")); + assert!(stdout.contains("cherry = red")); + assert!(stdout.contains("(3 items)")); +} + +#[test] +fn test_cli_scan_with_prefix() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + run_cli(&db_path, &[], &["set", "user:1", "alice"]); + run_cli(&db_path, &[], &["set", "user:2", "bob"]); + run_cli(&db_path, &[], &["set", "item:1", "widget"]); + + // Scan with prefix + let (stdout, _, success) = run_cli(&db_path, &[], &["scan", "user:"]); + assert!(success); + assert!(stdout.contains("user:1 = alice")); + assert!(stdout.contains("user:2 = bob")); + assert!(!stdout.contains("item:1")); + assert!(stdout.contains("(2 items)")); +} + +#[test] +fn test_cli_scan_aliases() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + run_cli(&db_path, &[], &["set", "key1", "val1"]); + + // Test 'list' alias + let (stdout1, _, success1) = run_cli(&db_path, &[], &["list"]); + assert!(success1); + assert!(stdout1.contains("key1 = val1")); + + // Test 'ls' alias + let (stdout2, _, success2) = run_cli(&db_path, &[], &["ls"]); + assert!(success2); + assert!(stdout2.contains("key1 = val1")); +} + +#[test] +fn test_cli_range() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + run_cli(&db_path, &[], &["set", "a", "1"]); + run_cli(&db_path, &[], &["set", "b", "2"]); + run_cli(&db_path, &[], &["set", "c", "3"]); + run_cli(&db_path, &[], &["set", "d", "4"]); + + // Range [b, d) should include b and c + let (stdout, _, success) = run_cli(&db_path, &[], &["range", "b", "d"]); + assert!(success); + assert!(stdout.contains("b = 2")); + assert!(stdout.contains("c = 3")); + assert!(!stdout.contains("a = 1")); + assert!(!stdout.contains("d = 4")); + assert!(stdout.contains("(2 items)")); +} + +#[test] +fn test_cli_count() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + run_cli(&db_path, &[], &["set", "k1", "v1"]); + run_cli(&db_path, &[], &["set", "k2", "v2"]); + run_cli(&db_path, &[], &["set", "k3", "v3"]); + + let (stdout, _, success) = run_cli(&db_path, &[], &["count"]); + assert!(success); + assert!(stdout.trim() == "3", "Expected 3, got: {}", stdout); +} + +#[test] +fn test_cli_info() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + run_cli(&db_path, &[], &["set", "key", "value"]); + + let (stdout, _, success) = run_cli(&db_path, &[], &["info"]); + assert!(success); + assert!(stdout.contains("Path:")); + assert!(stdout.contains("Tables:")); + assert!(stdout.contains("Approximate items:")); +} + +#[test] +fn test_cli_flush() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + run_cli(&db_path, &[], &["set", "key", "value"]); + let (stdout, _, success) = run_cli(&db_path, &[], &["flush"]); + assert!(success); + assert!( + stdout.contains("OK (flushed)"), + "Expected flushed: {}", + stdout + ); +} + +// ============================================================================ +// Shell Mode Tests +// ============================================================================ + +#[test] +fn test_shell_basic_commands() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (stdout, stderr, success) = + run_shell(&db_path, "set foo bar\nget foo\nscan\ncount\nexit\n"); + + assert!(success, "Shell failed: {}", stderr); + assert!(stdout.contains("OK (set)"), "set failed: {}", stdout); + assert!(stdout.contains("bar"), "get failed: {}", stdout); + assert!(stdout.contains("foo = bar"), "scan failed: {}", stdout); + assert!(stdout.contains("1"), "count failed: {}", stdout); +} + +#[test] +fn test_shell_quoted_values() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (stdout, _, success) = run_shell( + &db_path, + "set mykey \"hello world with spaces\"\nget mykey\nexit\n", + ); + + assert!(success); + assert!(stdout.contains("hello world with spaces")); +} + +#[test] +fn test_shell_single_quotes() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (stdout, _, success) = run_shell( + &db_path, + "set mykey 'single quoted value'\nget mykey\nexit\n", + ); + + assert!(success); + assert!(stdout.contains("single quoted value")); +} + +#[test] +fn test_shell_exit_flushes() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + // Set value and exit (should flush) + run_shell(&db_path, "set persistent_key persistent_value\nexit\n"); + + // Reopen and verify data persisted + let (stdout, _, success) = run_cli(&db_path, &[], &["get", "persistent_key"]); + assert!(success); + assert!( + stdout.contains("persistent_value"), + "Data should persist after exit: {}", + stdout + ); +} + +#[test] +fn test_shell_abort_no_flush() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + // Set value and abort (should NOT flush) + run_shell(&db_path, "set temp_key temp_value\nabort\n"); + + // Reopen and verify data did NOT persist + let (stdout, _, _) = run_cli(&db_path, &[], &["get", "temp_key"]); + assert!( + stdout.contains("(not found)"), + "Data should not persist after abort: {}", + stdout + ); +} + +#[test] +fn test_shell_quit_alias() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (stdout, _, success) = run_shell(&db_path, "set k v\nquit\n"); + assert!(success); + assert!(stdout.contains("OK (flushed)")); +} + +// ============================================================================ +// Batch Operation Tests +// ============================================================================ + +#[test] +fn test_batch_begin_commit() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (stdout, stderr, success) = run_shell( + &db_path, + "begin\nset k1 v1\nset k2 v2\nset k3 v3\ndel k2\ncommit\nscan\nexit\n", + ); + + assert!(success, "Shell failed: {}", stderr); + assert!( + stdout.contains("OK (batch started)"), + "begin failed: {}", + stdout + ); + assert!( + stdout.contains("OK (batched, ready to commit)"), + "batched set failed: {}", + stdout + ); + assert!( + stdout.contains("OK (batch committed"), + "commit failed: {}", + stdout + ); + assert!(stdout.contains("k1 = v1")); + assert!(!stdout.contains("k2 = v2")); + assert!(stdout.contains("k3 = v3")); +} + +#[test] +fn test_batch_rollback() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (stdout, _, success) = run_shell( + &db_path, + "set existing before\nbegin\nset k1 v1\nrollback\nscan\nexit\n", + ); + + assert!(success); + assert!(stdout.contains("OK (batch rolled back)")); + // k1 should not exist after rollback + assert!(!stdout.contains("k1 = v1")); + // existing key should still be there + assert!(stdout.contains("existing = before")); +} + +#[test] +fn test_batch_get_reads_from_batch() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (stdout, _, success) = run_shell( + &db_path, + "begin\nset newkey newvalue\nget newkey\nrollback\nexit\n", + ); + + assert!(success); + // get should return the batched value + assert!( + stdout.contains("newvalue"), + "Should read from batch: {}", + stdout + ); +} + +#[test] +fn test_batch_del_in_batch() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (stdout, _, success) = run_shell( + &db_path, + "set mykey myvalue\nflush\nbegin\ndel mykey\nget mykey\nrollback\nexit\n", + ); + + assert!(success); + assert!( + stdout.contains("(deleted in batch)"), + "Should show deleted in batch: {}", + stdout + ); +} + +#[test] +fn test_batch_info_shows_operations() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (stdout, _, success) = + run_shell(&db_path, "begin\nset k1 v1\ndel k2\ninfo\nrollback\nexit\n"); + + assert!(success); + assert!( + stdout.contains("Active batch (2 operations)"), + "Should show batch info: {}", + stdout + ); + assert!(stdout.contains("SET k1 = v1")); + assert!(stdout.contains("DEL k2")); +} + +#[test] +fn test_batch_double_begin_error() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (_, stderr, success) = run_shell(&db_path, "begin\nbegin\nrollback\nexit\n"); + + assert!(success); + assert!( + stderr.contains("batch already active"), + "Should error on double begin: {}", + stderr + ); +} + +#[test] +fn test_commit_without_batch_error() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (_, stderr, success) = run_shell(&db_path, "commit\nexit\n"); + + assert!(success); + assert!( + stderr.contains("no active batch"), + "Should error: {}", + stderr + ); +} + +#[test] +fn test_rollback_without_batch_error() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (_, stderr, success) = run_shell(&db_path, "rollback\nexit\n"); + + assert!(success); + assert!( + stderr.contains("no active batch"), + "Should error: {}", + stderr + ); +} + +#[test] +fn test_flush_warns_about_pending_batch() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (_, stderr, success) = run_shell(&db_path, "begin\nset k v\nflush\nrollback\nexit\n"); + + assert!(success); + assert!( + stderr.contains("WARNING: Active batch"), + "Should warn about pending batch: {}", + stderr + ); +} + +#[test] +fn test_scan_warns_about_batch() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (_, stderr, success) = run_shell(&db_path, "begin\nset k v\nscan\nrollback\nexit\n"); + + assert!(success); + assert!( + stderr.contains("scan ignores uncommitted batch"), + "Should warn: {}", + stderr + ); +} + +#[test] +fn test_range_warns_about_batch() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (_, stderr, success) = run_shell(&db_path, "begin\nset k v\nrange a z\nrollback\nexit\n"); + + assert!(success); + assert!( + stderr.contains("range ignores uncommitted batch"), + "Should warn: {}", + stderr + ); +} + +#[test] +fn test_exit_warns_about_uncommitted_batch() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (_, stderr, success) = run_shell(&db_path, "begin\nset k v\nexit\n"); + + assert!(success); + assert!( + stderr.contains("discarding uncommitted batch"), + "Should warn: {}", + stderr + ); +} + +// ============================================================================ +// Scan Long/Verbose Mode Tests +// ============================================================================ + +#[test] +fn test_scan_long_flag() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (stdout, _, success) = run_shell(&db_path, "set k1 v1\nscan -l\nexit\n"); + + assert!(success); + assert!( + stdout.contains("=== Active Memtable ==="), + "Should show memtable section: {}", + stdout + ); + assert!(stdout.contains("seqno="), "Should show seqno: {}", stdout); + assert!( + stdout.contains("type=Value"), + "Should show type: {}", + stdout + ); +} + +#[test] +fn test_scan_memtable_match_no_match() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (stdout, _, success) = run_shell(&db_path, "set k1 v1\nset x2 v2\nscan -l k\nexit\n"); + + assert!(success); + assert!( + stdout.contains("=== Active Memtable ==="), + "Should show memtable section: {}", + stdout + ); + assert!( + stdout.contains("k1 = v1"), + "Should show 'k1 = v1' as we scanned for k: {}", + stdout + ); + assert!( + !stdout.contains("x2 = v2"), + "Should not contain 'x2 = v2' as we scanned for k: {}", + stdout + ); +} + +#[test] +fn test_scan_long_shows_tombstones() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (stdout, _, success) = run_shell(&db_path, "set k1 v1\ndel k1\nscan -l\nexit\n"); + + assert!(success); + assert!( + stdout.contains("type=Tombstone"), + "Should show tombstone: {}", + stdout + ); +} + +#[test] +fn test_ll_alias_for_scan_long() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (stdout, _, success) = run_shell(&db_path, "set k1 v1\nll\nexit\n"); + + assert!(success); + assert!( + stdout.contains("=== Active Memtable ==="), + "ll should be scan long: {}", + stdout + ); +} + +// ============================================================================ +// Data Persistence Tests +// ============================================================================ + +#[test] +fn test_data_persists_across_sessions() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + // First session: write and flush + run_shell(&db_path, "set persistent data\nflush\nabort\n"); + + // Second session: read + let (stdout, _, success) = run_cli(&db_path, &[], &["get", "persistent"]); + assert!(success); + assert!(stdout.contains("data"), "Data should persist: {}", stdout); +} + +#[test] +fn test_unflushed_data_lost_on_abort() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + // Write without flush and abort + run_shell(&db_path, "set unflushed value\nabort\n"); + + // Verify lost + let (stdout, _, _) = run_cli(&db_path, &[], &["get", "unflushed"]); + assert!( + stdout.contains("(not found)"), + "Unflushed data should be lost: {}", + stdout + ); +} + +// ============================================================================ +// Error Handling Tests +// ============================================================================ + +#[test] +fn test_unclosed_quote_error() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (_, stderr, success) = run_shell(&db_path, "set key \"unclosed\nexit\n"); + + assert!(success); + assert!( + stderr.contains("unclosed quote"), + "Should error on unclosed quote: {}", + stderr + ); +} + +#[test] +fn test_unknown_command_error() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (_, stderr, success) = run_shell(&db_path, "notacommand\nexit\n"); + + assert!(success); + assert!( + stderr.contains("unrecognized subcommand") || stderr.contains("error"), + "Should error on unknown command: {}", + stderr + ); +} + +#[test] +fn test_missing_argument_error() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (_, stderr, success) = run_shell(&db_path, "set onlykey\nexit\n"); + + assert!(success); + assert!( + stderr.contains("required") || stderr.contains("VALUE"), + "Should error on missing argument: {}", + stderr + ); +} + +// ============================================================================ +// Help Tests +// ============================================================================ + +#[test] +fn test_cli_help() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let output = Command::new(lsm_binary()) + .arg(db_path) + .arg("--help") + .output() + .expect("Failed to run"); + + let stdout = String::from_utf8_lossy(&output.stdout); + assert!(stdout.contains("CLI tool for interacting with LSM trees")); + assert!(stdout.contains("get")); + assert!(stdout.contains("set")); + assert!(stdout.contains("scan")); +} + +#[test] +fn test_shell_help_command() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + // The shell uses clap's built-in help, which outputs to stderr + let (stdout, stderr, success) = run_shell(&db_path, "help\nexit\n"); + + assert!(success); + // Help output goes to stdout or stderr depending on clap version + let combined = format!("{}{}", stdout, stderr); + assert!( + combined.contains("Available Commands") + || combined.contains("get") + || combined.contains("Commands:"), + "Help should show commands. stdout: {}, stderr: {}", + stdout, + stderr + ); +} + +// ============================================================================ +// Edge Cases +// ============================================================================ + +#[test] +fn test_empty_database_scan() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (stdout, _, success) = run_cli(&db_path, &[], &["scan"]); + assert!(success); + assert!(stdout.contains("(0 items)")); +} + +#[test] +fn test_empty_database_count() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (stdout, _, success) = run_cli(&db_path, &[], &["count"]); + assert!(success); + assert!(stdout.trim() == "0"); +} + +#[test] +fn test_special_characters_in_key() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (stdout, _, success) = run_shell( + &db_path, + "set \"key:with:colons\" value\nget \"key:with:colons\"\nexit\n", + ); + + assert!(success); + assert!(stdout.contains("value")); +} + +#[test] +fn test_special_characters_in_value() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (stdout, _, success) = run_shell( + &db_path, + "set mykey \"value with = equals and : colons\"\nget mykey\nexit\n", + ); + + assert!(success); + assert!(stdout.contains("value with = equals and : colons")); +} + +#[test] +fn test_overwrite_key() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (stdout, _, success) = run_shell( + &db_path, + "set mykey original\nset mykey updated\nget mykey\nexit\n", + ); + + assert!(success); + assert!(stdout.contains("updated")); + // Should not contain 'original' in the get output (might be in set output) + let lines: Vec<&str> = stdout.lines().collect(); + let get_output = lines.iter().find(|l| !l.contains("OK")).unwrap(); + assert!( + get_output.contains("updated") && !get_output.contains("original"), + "Should show updated value: {}", + get_output + ); +} + +#[test] +fn test_delete_nonexistent_key() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + // Deleting a non-existent key should succeed (creates a tombstone) + let (stdout, _, success) = run_cli(&db_path, &[], &["del", "nonexistent"]); + assert!(success); + assert!(stdout.contains("OK")); +} + +#[test] +fn test_compact_empty_database() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (stdout, _, success) = run_cli(&db_path, &[], &["compact"]); + assert!(success); + assert!(stdout.contains("OK")); +} + +#[test] +fn test_multiple_flush_calls() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (stdout, _, success) = run_shell(&db_path, "set k1 v1\nflush\nflush\nflush\nexit\n"); + + assert!(success); + // Multiple flushes should all succeed + let flush_count = stdout.matches("OK (flushed)").count(); + assert!( + flush_count >= 3, + "Should have multiple flush OKs: {}", + stdout + ); +} + +#[test] +fn test_long_value_truncated_in_verbose_scan() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + // Create a value longer than 50 characters + let long_value = "a".repeat(100); + + // Use scan -l (verbose/long mode) which truncates long values + let (stdout, _, success) = run_shell( + &db_path, + &format!("set mykey {}\nscan -l\nexit\n", long_value), + ); + + assert!(success); + // Value should be truncated with "..." in verbose scan output + assert!( + stdout.contains("..."), + "Long value should be truncated with '...': {}", + stdout + ); + // Should show exactly 50 'a' characters followed by "..." + assert!( + stdout.contains(&format!("{}...", "a".repeat(50))), + "Should show first 50 chars followed by '...': {}", + stdout + ); +} + +#[test] +fn test_long_value_truncated_in_batch_info() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + // Create a value longer than 50 characters + let long_value = "x".repeat(75); + + let (stdout, _, success) = run_shell( + &db_path, + &format!("begin\nset batchkey {}\ninfo\nrollback\nexit\n", long_value), + ); + + assert!(success); + // Value should be truncated with "..." in batch info output + assert!( + stdout.contains("..."), + "Long value in batch info should be truncated with '...': {}", + stdout + ); + // Should show exactly 50 'x' characters followed by "..." + assert!( + stdout.contains(&format!("{}...", "x".repeat(50))), + "Should show first 50 chars followed by '...': {}", + stdout + ); +} + +#[test] +fn test_shell_empty_lines() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + // Send empty lines interspersed with commands + let (stdout, _, success) = run_shell(&db_path, "\n\nset key1 value1\n\n\nget key1\n\nexit\n"); + + assert!(success); + // Commands should still work despite empty lines + assert!(stdout.contains("OK"), "Set should succeed: {}", stdout); + assert!( + stdout.contains("value1"), + "Get should return value: {}", + stdout + ); +} + +#[test] +fn test_shell_whitespace_only_lines() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + // Send whitespace-only lines interspersed with commands + let (stdout, _, success) = run_shell( + &db_path, + " \n\t\nset key1 value1\n \n\t\t\nget key1\nexit\n", + ); + + assert!(success); + // Commands should still work despite whitespace-only lines + assert!(stdout.contains("OK"), "Set should succeed: {}", stdout); + assert!( + stdout.contains("value1"), + "Get should return value: {}", + stdout + ); +} + +#[test] +fn test_shell_comment_lines() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + // Send comment lines (shlex treats # as start of comment, resulting in zero tokens) + let (stdout, _, success) = run_shell( + &db_path, + "# This is a comment\nset key1 value1\n# Another comment\nget key1\n#\nexit\n", + ); + + assert!(success); + // Commands should still work, comments should be ignored + assert!(stdout.contains("OK"), "Set should succeed: {}", stdout); + assert!( + stdout.contains("value1"), + "Get should return value: {}", + stdout + ); +} + +#[test] +fn test_batch_abort_without_commit() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + // Begin a batch, set some keys, then abort without committing + let (stdout, stderr, success) = run_shell( + &db_path, + "begin\nset key1 val1\nset key2 val2\nset key3 val3\nabort\n", + ); + + assert!(success); + // Should see batch started + assert!( + stdout.contains("batch started"), + "Should see batch started: {}", + stdout + ); + // Should see batched confirmations + assert!( + stdout.contains("batched"), + "Should see batched confirmations: {}", + stdout + ); + // Should warn about uncommitted batch on abort + assert!( + stderr.contains("uncommitted") || stdout.contains("uncommitted"), + "Should warn about uncommitted batch: stdout={}, stderr={}", + stdout, + stderr + ); + + // Verify data was NOT persisted - open a new session and check + let (stdout2, _, success2) = run_shell(&db_path, "get key1\nget key2\nget key3\nexit\n"); + + assert!(success2); + // All keys should not be found + assert!( + stdout2.contains("(not found)"), + "Uncommitted batch data should not persist: {}", + stdout2 + ); +} + +#[test] +fn test_blob_tree_with_separation_threshold() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + // Create a value of 128 bytes (exceeds 64 byte threshold) + let large_value = "x".repeat(128); + + // Set value with blob tree mode and 64 byte separation threshold + let (stdout, stderr, success) = run_cli( + &db_path, + &["--blob-tree", "--separation-threshold", "64"], + &["set", "largekey", &large_value], + ); + assert!(success, "set failed: stderr={}", stderr); + assert!(stdout.contains("OK"), "Expected OK in output: {}", stdout); + + // Get the value back + let (stdout, stderr, success) = run_cli( + &db_path, + &["--blob-tree", "--separation-threshold", "64"], + &["get", "largekey"], + ); + assert!(success, "get failed: stderr={}", stderr); + assert!( + stdout.trim() == large_value, + "Expected value of length {}, got: {}", + large_value.len(), + stdout.trim().len() + ); + assert!( + stdout.trim() == large_value, + "Expected '{}', got: '{}'", + large_value, + stdout.trim() + ); + + // Scan with -l to see internal details - should show Indirection type after flush + let (stdout, stderr, success) = run_cli( + &db_path, + &["--blob-tree", "--separation-threshold", "64"], + &["scan", "-l"], + ); + assert!(success, "scan failed: stderr={}", stderr); + assert!( + stdout.contains("type=Indirection"), + "Expected type=Indirection in output: {}", + stdout + ); +} + +#[test] +fn test_weak_tombstone() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + // Use shell mode to keep same session across commands + let (stdout, _, success) = run_shell( + &db_path, + "set weakkey weakvalue\ndel --weak weakkey\nscan -l\nflush\nexit\n", + ); + assert!(success, "shell commands failed: {}", stdout); + + // Verify weak tombstone appears in scan -l output + // Weak tombstones should show in memtable section or tree statistics + assert!( + stdout.contains("weak tombstones") || stdout.contains("type=WeakTombstone"), + "Expected weak tombstone indication in output: {}", + stdout + ); + + // After flush, verify weak tombstone count in a new session + let (stdout, _, success) = run_shell(&db_path, "scan -l\nexit\n"); + assert!(success, "scan failed: {}", stdout); + assert!( + stdout.contains("weak tombstones"), + "Expected weak tombstones count after flush: {}", + stdout + ); +} + +// ============================================================================ +// Command line parsting Tests +// ============================================================================ + +#[test] +fn test_parse_size() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + let (stdout, stderr, success) = run_cli(&db_path, &["-t", "kaboom"], &[]); + assert!(!success, "Expected command to fail"); + assert!( + stderr.contains("invalid digit found in string"), + "Expected parsing error for non-numeric argument: {}", + stderr + ); +} + +#[test] +fn test_verbose() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let (stdout, stderr, success) = run_cli(&db_path, &["-q"], &["info"]); + assert!(success, "Expected command to succeed"); + + let (stdout, stderr, success) = run_cli(&db_path, &["-v"], &["info"]); + assert!(success, "Expected command to succeed"); + assert!( + stderr.contains("log level: info"), + "Expected 'log level: info' statement: {}", + stderr + ); + + let (stdout, stderr, success) = run_cli(&db_path, &["-vv"], &["info"]); + assert!(success, "Expected command to succeed"); + assert!( + stderr.contains("log level: debug"), + "Expected 'log level: debug' statement: {}", + stderr + ); + + let (stdout, stderr, success) = run_cli(&db_path, &["-vvv"], &["info"]); + assert!(success, "Expected command to succeed"); + assert!( + stderr.contains("log level: trace"), + "Expected 'log level: trace' statement: {}", + stderr + ); +} + +#[test] +fn test_cli_error() { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + // Create DB as Blob Tree + let (stdout, stderr, success) = run_cli(&db_path, &["-b"], &["info"]); + assert!(success, "Expected command to succeed"); + + // Open DB as regular tree + let (stdout, stderr, success) = run_cli(&db_path, &[], &["info"]); + assert!(!success, "Expected command to fail"); + assert!( + stderr.contains("LsmTreeError: Unrecoverable"), + "Expected 'LsmTreeError: Unrecoverable': {}", + stderr + ); + assert!( + stderr.contains("Note: Use -v (one or multiple times) for more information"), + "Expected 'Note: Use -v (one or multiple times) for more information': {}", + stderr + ); + + // Open DB as regular tree + let (stdout, stderr, success) = run_cli(&db_path, &["-v"], &["info"]); + assert!(!success, "Expected command to fail"); + assert!( + stderr.contains("LsmTreeError: Unrecoverable"), + "Expected 'LsmTreeError: Unrecoverable': {}", + stderr + ); +} + +// ============================================================================ +// Interactive shell tests using rexpect +// +// These tests verify the interactive shell behavior including: +// - Prompt display +// - Command execution +// - Ctrl+C handling +// - Ctrl+D handling +// - History navigation (if supported) +// ============================================================================ + +#[cfg(unix)] +mod tests_rexpect_unix_only { + use super::{lsm_binary, temp_db}; + use rexpect::session::PtySession; + + /// Spawn an interactive shell session + fn spawn_shell(db_path: &std::path::Path) -> Result { + let binary = lsm_binary(); + let db_path_str = db_path.to_str().unwrap(); + // Use sh -c to execute the command with arguments + let command = format!("sh -c '{} {}'", binary.to_str().unwrap(), db_path_str); + rexpect::spawn(&command, Some(5000)) + } + + #[test] + fn test_interactive_prompt() -> Result<(), rexpect::error::Error> { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let mut p = spawn_shell(&db_path).expect("Failed to spawn shell"); + + // Wait for welcome message and prompt + p.exp_string("Welcome to the LSM-tree shell") + .expect("Failed to see welcome message"); + p.exp_string("Type 'help' for available commands") + .expect("Failed to see help message"); + p.exp_regex("lsm> ").expect("Failed to see prompt"); + + // Send exit command + p.send_line("exit")?; + p.exp_string("OK (flushed)")?; + p.exp_eof()?; + + Ok(()) + } + + #[test] + fn test_interactive_basic_commands() -> Result<(), rexpect::error::Error> { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let mut p = spawn_shell(&db_path).expect("Failed to spawn shell"); + + // Skip welcome messages + p.exp_regex("lsm> ")?; + + // Test set command + p.send_line("set testkey testvalue")?; + p.exp_string("OK (set)")?; + p.exp_regex("lsm> ")?; + + // Test get command + p.send_line("get testkey")?; + p.exp_string("testvalue")?; + p.exp_regex("lsm> ")?; + + // Test scan command + p.send_line("scan")?; + p.exp_string("testkey = testvalue")?; + p.exp_string("OK (1 items)")?; + p.exp_regex("lsm> ")?; + + // Test count command + p.send_line("count")?; + p.exp_string("1")?; + p.exp_regex("lsm> ")?; + + // Exit + p.send_line("exit")?; + p.exp_eof()?; + + Ok(()) + } + + #[test] + fn test_interactive_delete() -> Result<(), rexpect::error::Error> { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let mut p = spawn_shell(&db_path).expect("Failed to spawn shell"); + + p.exp_regex("lsm> ")?; + + // Set a key + p.send_line("set key1 value1")?; + p.exp_string("OK (set)")?; + p.exp_regex("lsm> ")?; + + // Delete it + p.send_line("del key1")?; + p.exp_string("OK")?; + p.exp_regex("lsm> ")?; + + // Verify it's gone + p.send_line("get key1")?; + p.exp_string("(not found)")?; + p.exp_regex("lsm> ")?; + + p.send_line("exit")?; + p.exp_eof()?; + + Ok(()) + } + + #[test] + fn test_interactive_batch_operations() -> Result<(), rexpect::error::Error> { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let mut p = spawn_shell(&db_path).expect("Failed to spawn shell"); + + p.exp_regex("lsm> ")?; + + // Begin batch + p.send_line("begin")?; + p.exp_string("OK (batch started)")?; + p.exp_regex("lsm> ")?; + + // Add operations to batch + p.send_line("set batch1 val1")?; + p.exp_string("OK (batched, ready to commit)")?; + p.exp_regex("lsm> ")?; + + p.send_line("set batch2 val2")?; + p.exp_string("OK (batched, ready to commit)")?; + p.exp_regex("lsm> ")?; + + // Get from batch + p.send_line("get batch1")?; + p.exp_string("val1")?; + p.exp_regex("lsm> ")?; + + // Commit batch + p.send_line("commit")?; + p.exp_string("OK (batch committed")?; + p.exp_regex("lsm> ")?; + + // Verify committed + p.send_line("scan")?; + p.exp_string("batch1 = val1")?; + p.exp_string("batch2 = val2")?; + p.exp_regex("lsm> ")?; + + p.send_line("exit")?; + p.exp_eof()?; + + Ok(()) + } + + #[test] + fn test_interactive_batch_rollback() -> Result<(), rexpect::error::Error> { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let mut p = spawn_shell(&db_path).expect("Failed to spawn shell"); + + p.exp_regex("lsm> ")?; + + // Set a value outside batch + p.send_line("set existing value")?; + p.exp_string("OK (set)")?; + p.exp_regex("lsm> ")?; + + // Begin batch + p.send_line("begin")?; + p.exp_string("OK (batch started)")?; + p.exp_regex("lsm> ")?; + + // Add to batch + p.send_line("set batchkey batchvalue")?; + p.exp_string("OK (batched, ready to commit)")?; + p.exp_regex("lsm> ")?; + + // Rollback + p.send_line("rollback")?; + p.exp_string("OK (batch rolled back)")?; + p.exp_regex("lsm> ")?; + + // Verify batch key is gone, existing key remains + p.send_line("get batchkey")?; + p.exp_string("(not found)")?; + p.exp_regex("lsm> ")?; + + p.send_line("get existing")?; + p.exp_string("value")?; + p.exp_regex("lsm> ")?; + + p.send_line("exit")?; + p.exp_eof()?; + + Ok(()) + } + + #[test] + fn test_interactive_info_command() -> Result<(), rexpect::error::Error> { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let mut p = spawn_shell(&db_path).expect("Failed to spawn shell"); + + p.exp_regex("lsm> ")?; + + // Add some data + p.send_line("set key1 value1")?; + p.exp_string("OK (set)")?; + p.exp_regex("lsm> ")?; + + // Run info + p.send_line("info")?; + p.exp_string("Path:")?; + p.exp_string("Tables:")?; + p.exp_string("Approximate items:")?; + p.exp_string("Disk space:")?; + p.exp_regex("lsm> ")?; + + p.send_line("exit")?; + p.exp_eof()?; + + Ok(()) + } + + #[test] + fn test_interactive_flush() -> Result<(), rexpect::error::Error> { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let mut p = spawn_shell(&db_path).expect("Failed to spawn shell"); + + p.exp_regex("lsm> ")?; + + // Set a value + p.send_line("set flushkey flushvalue")?; + p.exp_string("OK (set)")?; + p.exp_regex("lsm> ")?; + + // Flush + p.send_line("flush")?; + p.exp_string("OK (flushed)")?; + p.exp_regex("lsm> ")?; + + p.send_line("exit")?; + p.exp_eof()?; + + Ok(()) + } + + #[test] + fn test_interactive_quit_alias() -> Result<(), rexpect::error::Error> { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let mut p = spawn_shell(&db_path).expect("Failed to spawn shell"); + + p.exp_regex("lsm> ")?; + + // Use quit instead of exit + p.send_line("quit")?; + p.exp_string("OK (flushed)")?; + p.exp_eof()?; + + Ok(()) + } + + #[test] + fn test_interactive_abort() -> Result<(), rexpect::error::Error> { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let mut p = spawn_shell(&db_path).expect("Failed to spawn shell"); + + p.exp_regex("lsm> ")?; + + // Set a value + p.send_line("set abortkey abortvalue")?; + p.exp_string("OK (set)")?; + p.exp_regex("lsm> ")?; + + // Abort (should not flush) + p.send_line("abort")?; + p.exp_eof()?; + + // Verify data was not persisted by opening a new session + let mut p2 = spawn_shell(&db_path).expect("Failed to spawn shell"); + p2.exp_regex("lsm> ")?; + p2.send_line("get abortkey")?; + p2.exp_string("(not found)")?; + p2.send_line("exit")?; + p2.exp_eof()?; + + Ok(()) + } + + #[test] + fn test_interactive_empty_lines() -> Result<(), rexpect::error::Error> { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let mut p = spawn_shell(&db_path).expect("Failed to spawn shell"); + + p.exp_regex("lsm> ")?; + + // Send empty line + p.send_line("")?; + p.exp_regex("lsm> ")?; + + // Send command after empty line + p.send_line("set key value")?; + p.exp_string("OK (set)")?; + p.exp_regex("lsm> ")?; + + p.send_line("exit")?; + p.exp_eof()?; + + Ok(()) + } + + #[test] + fn test_interactive_range_command() -> Result<(), rexpect::error::Error> { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let mut p = spawn_shell(&db_path).expect("Failed to spawn shell"); + + p.exp_regex("lsm> ")?; + + // Add keys + p.send_line("set a 1")?; + p.exp_string("OK (set)")?; + p.exp_regex("lsm> ")?; + + p.send_line("set b 2")?; + p.exp_string("OK (set)")?; + p.exp_regex("lsm> ")?; + + p.send_line("set c 3")?; + p.exp_string("OK (set)")?; + p.exp_regex("lsm> ")?; + + // Range query + p.send_line("range a c")?; + p.exp_string("a = 1")?; + p.exp_string("b = 2")?; + p.exp_string("(2 items)")?; + p.exp_regex("lsm> ")?; + + p.send_line("exit")?; + p.exp_eof()?; + + Ok(()) + } + + #[test] + fn test_interactive_scan_with_prefix() -> Result<(), rexpect::error::Error> { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let mut p = spawn_shell(&db_path).expect("Failed to spawn shell"); + + p.exp_regex("lsm> ")?; + + // Add keys with different prefixes + p.send_line("set user:1 alice")?; + p.exp_string("OK (set)")?; + p.exp_regex("lsm> ")?; + + p.send_line("set user:2 bob")?; + p.exp_string("OK (set)")?; + p.exp_regex("lsm> ")?; + + p.send_line("set item:1 widget")?; + p.exp_string("OK (set)")?; + p.exp_regex("lsm> ")?; + + // Scan with prefix + p.send_line("scan user:")?; + p.exp_string("user:1 = alice")?; + p.exp_string("user:2 = bob")?; + p.exp_string("(2 items)")?; + p.exp_regex("lsm> ")?; + + p.send_line("exit")?; + p.exp_eof()?; + + Ok(()) + } + + #[test] + fn test_interactive_scan_long() -> Result<(), rexpect::error::Error> { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let mut p = spawn_shell(&db_path).expect("Failed to spawn shell"); + + p.exp_regex("lsm> ")?; + + // Set a value + p.send_line("set testkey testvalue")?; + p.exp_string("OK (set)")?; + p.exp_regex("lsm> ")?; + + // Scan with long flag + p.send_line("scan -l")?; + p.exp_string("=== Active Memtable ===")?; + p.exp_string("seqno=")?; + p.exp_string("type=Value")?; + p.exp_regex("lsm> ")?; + + p.send_line("exit")?; + p.exp_eof()?; + + Ok(()) + } + + #[test] + fn test_interactive_help_command() -> Result<(), rexpect::error::Error> { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let mut p = spawn_shell(&db_path).expect("Failed to spawn shell"); + + p.exp_regex("lsm> ")?; + + // Send help command + p.send_line("help")?; + // Help output may go to stdout or stderr, but should contain command info + // We'll just check that we get back to the prompt + p.exp_regex("lsm> ")?; + + p.send_line("exit")?; + p.exp_eof()?; + + Ok(()) + } + + #[test] + fn test_interactive_error_handling() -> Result<(), rexpect::error::Error> { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let mut p = spawn_shell(&db_path).expect("Failed to spawn shell"); + + p.exp_regex("lsm> ")?; + + // Try invalid command + p.send_line("notacommand")?; + // Should show error but continue + p.exp_regex("lsm> ")?; + + // Try command with missing args + p.send_line("set onlykey")?; + // Should show error but continue + p.exp_regex("lsm> ")?; + + p.send_line("exit")?; + p.exp_eof()?; + + Ok(()) + } + + #[test] + fn test_interactive_batch_info() -> Result<(), rexpect::error::Error> { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let mut p = spawn_shell(&db_path).expect("Failed to spawn shell"); + + p.exp_regex("lsm> ")?; + + // Begin batch + p.send_line("begin")?; + p.exp_string("OK (batch started)")?; + p.exp_regex("lsm> ")?; + + // Add operations + p.send_line("set k1 v1")?; + p.exp_string("OK (batched, ready to commit)")?; + p.exp_regex("lsm> ")?; + + p.send_line("del k2")?; + p.exp_string("OK (batched, ready to commit)")?; + p.exp_regex("lsm> ")?; + + // Check info shows batch + p.send_line("info")?; + p.exp_string("Active batch (2 operations)")?; + p.exp_string("SET k1 = v1")?; + p.exp_string("DEL k2")?; + p.exp_regex("lsm> ")?; + + p.send_line("rollback")?; + p.exp_string("OK (batch rolled back)")?; + p.exp_regex("lsm> ")?; + + p.send_line("exit")?; + p.exp_eof()?; + + Ok(()) + } + + #[test] + fn test_interactive_exit_with_batch_warning() -> Result<(), rexpect::error::Error> { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let mut p = spawn_shell(&db_path).expect("Failed to spawn shell"); + + p.exp_regex("lsm> ")?; + + // Begin batch + p.send_line("begin")?; + p.exp_string("OK (batch started)")?; + p.exp_regex("lsm> ")?; + + // Add operation + p.send_line("set k v")?; + p.exp_string("OK (batched, ready to commit)")?; + p.exp_regex("lsm> ")?; + + // Exit with uncommitted batch (should warn) + p.send_line("exit")?; + // Warning goes to stderr, but we should still see flush message + p.exp_string("OK (flushed)")?; + p.exp_eof()?; + + Ok(()) + } + + #[test] + fn test_interactive_weak_delete() -> Result<(), rexpect::error::Error> { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let mut p = spawn_shell(&db_path).expect("Failed to spawn shell"); + + p.exp_regex("lsm> ")?; + + // Set a value + p.send_line("set weakkey weakvalue")?; + p.exp_string("OK (set)")?; + p.exp_regex("lsm> ")?; + + // Weak delete + p.send_line("del --weak weakkey")?; + p.exp_string("OK")?; + p.exp_regex("lsm> ")?; + + // Verify with scan -l + p.send_line("scan -l")?; + p.exp_string("type=WeakTombstone")?; + p.exp_regex("lsm> ")?; + + p.send_line("exit")?; + p.exp_eof()?; + + Ok(()) + } + + #[test] + fn test_interactive_compact() -> Result<(), rexpect::error::Error> { + let db = temp_db(); + let db_path = db.path().join("test.db"); + + let mut p = spawn_shell(&db_path).expect("Failed to spawn shell"); + + p.exp_regex("lsm> ")?; + + // Add some data + p.send_line("set k1 v1")?; + p.exp_string("OK (set)")?; + p.exp_regex("lsm> ")?; + + p.send_line("set k2 v2")?; + p.exp_string("OK (set)")?; + p.exp_regex("lsm> ")?; + + // Flush to create tables + p.send_line("flush")?; + p.exp_string("OK (flushed)")?; + p.exp_regex("lsm> ")?; + + // Compact + p.send_line("compact")?; + p.exp_string("OK")?; + p.exp_regex("lsm> ")?; + + p.send_line("exit")?; + p.exp_eof()?; + + Ok(()) + } +}