diff --git a/Rakefile b/Rakefile index 70112c3..24cbdf5 100644 --- a/Rakefile +++ b/Rakefile @@ -10,13 +10,13 @@ spec = Gem::Specification.new do |s| s.authors = ["William Morgan"] s.summary = "a Ruby binding to LevelDB" s.homepage = "http://github.com/wmorgan/leveldb-ruby" - s.files = %w(README LICENSE ext/leveldb/extconf.rb ext/leveldb/platform.rb lib/leveldb.rb ext/leveldb/leveldb.cc leveldb/Makefile leveldb/build_detect_platform) - Find.find("leveldb") { |x| s.files += [x] if x =~ /\.(cc|h)$/} + s.files = %w(README LICENSE ext/leveldb/extconf.rb ext/leveldb/platform.rb lib/leveldb.rb ext/leveldb/leveldb.cc) s.extensions = %w(ext/leveldb/extconf.rb) s.executables = [] s.extra_rdoc_files = %w(README ext/leveldb/leveldb.cc) s.rdoc_options = %w(-c utf8 --main README --title LevelDB) s.description = "LevelDB-Ruby is a Ruby binding to LevelDB." + s.licenses = ["BSD-3-Clause"] end task :rdoc do |t| diff --git a/ext/leveldb/extconf.rb b/ext/leveldb/extconf.rb index 614511e..e5b4ee5 100644 --- a/ext/leveldb/extconf.rb +++ b/ext/leveldb/extconf.rb @@ -2,13 +2,9 @@ require 'fileutils' require "./platform.rb" -Dir.chdir "../../leveldb" -system "OPT=\"-O2 -DNDEBUG -fPIC\" make libleveldb.a" or abort -Dir.chdir "../ext/leveldb" - set_platform_specific_variables! -$CFLAGS << " -I../../leveldb/include" -$LIBS << " -L../../leveldb -lleveldb" +$INCFLAGS << ' ' << `pkg-config --cflags-only-I leveldb`.strip << ' ' +$LIBS << ' ' << `pkg-config --libs leveldb`.strip << ' ' create_makefile "leveldb/leveldb" diff --git a/leveldb/.gitignore b/leveldb/.gitignore deleted file mode 100644 index 46769e0..0000000 --- a/leveldb/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -build_config.mk -*.a -*.o -*_test -db_bench diff --git a/leveldb/AUTHORS b/leveldb/AUTHORS deleted file mode 100644 index 27a9407..0000000 --- a/leveldb/AUTHORS +++ /dev/null @@ -1,8 +0,0 @@ -# Names should be added to this file like so: -# Name or Organization - -Google Inc. - -# Initial version authors: -Jeffrey Dean -Sanjay Ghemawat diff --git a/leveldb/LICENSE b/leveldb/LICENSE deleted file mode 100644 index 8e80208..0000000 --- a/leveldb/LICENSE +++ /dev/null @@ -1,27 +0,0 @@ -Copyright (c) 2011 The LevelDB Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/leveldb/Makefile b/leveldb/Makefile deleted file mode 100644 index c648a28..0000000 --- a/leveldb/Makefile +++ /dev/null @@ -1,200 +0,0 @@ -# Copyright (c) 2011 The LevelDB Authors. All rights reserved. -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. See the AUTHORS file for names of contributors. - -# Inherit some settings from environment variables, if available -INSTALL_PATH ?= $(CURDIR) - -#----------------------------------------------- -# Uncomment exactly one of the lines labelled (A), (B), and (C) below -# to switch between compilation modes. - -OPT ?= -O2 -DNDEBUG # (A) Production use (optimized mode) -# OPT ?= -g2 # (B) Debug mode, w/ full line-level debugging symbols -# OPT ?= -O2 -g2 -DNDEBUG # (C) Profiling mode: opt, but w/debugging symbols -#----------------------------------------------- - -# detect what platform we're building on -$(shell ./build_detect_platform build_config.mk) -# this file is generated by the previous line to set build flags and sources -include build_config.mk - -CFLAGS += -I. -I./include $(PLATFORM_CCFLAGS) $(OPT) -CXXFLAGS += -I. -I./include $(PLATFORM_CXXFLAGS) $(OPT) - -LDFLAGS += $(PLATFORM_LDFLAGS) - -LIBOBJECTS = $(SOURCES:.cc=.o) -MEMENVOBJECTS = $(MEMENV_SOURCES:.cc=.o) - -TESTUTIL = ./util/testutil.o -TESTHARNESS = ./util/testharness.o $(TESTUTIL) - -TESTS = \ - arena_test \ - bloom_test \ - c_test \ - cache_test \ - coding_test \ - corruption_test \ - crc32c_test \ - db_test \ - dbformat_test \ - env_test \ - filename_test \ - filter_block_test \ - log_test \ - memenv_test \ - skiplist_test \ - table_test \ - version_edit_test \ - version_set_test \ - write_batch_test - -PROGRAMS = db_bench $(TESTS) -BENCHMARKS = db_bench_sqlite3 db_bench_tree_db - -LIBRARY = libleveldb.a -MEMENVLIBRARY = libmemenv.a - -default: all - -# Should we build shared libraries? -ifneq ($(PLATFORM_SHARED_EXT),) - -ifneq ($(PLATFORM_SHARED_VERSIONED),true) -SHARED1 = libleveldb.$(PLATFORM_SHARED_EXT) -SHARED2 = $(SHARED1) -SHARED3 = $(SHARED1) -SHARED = $(SHARED1) -else -# Update db.h if you change these. -SHARED_MAJOR = 1 -SHARED_MINOR = 5 -SHARED1 = libleveldb.$(PLATFORM_SHARED_EXT) -SHARED2 = $(SHARED1).$(SHARED_MAJOR) -SHARED3 = $(SHARED1).$(SHARED_MAJOR).$(SHARED_MINOR) -SHARED = $(SHARED1) $(SHARED2) $(SHARED3) -$(SHARED1): $(SHARED3) - ln -fs $(SHARED3) $(SHARED1) -$(SHARED2): $(SHARED3) - ln -fs $(SHARED3) $(SHARED2) -endif - -$(SHARED3): - $(CXX) $(LDFLAGS) $(PLATFORM_SHARED_LDFLAGS)$(SHARED2) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) $(SOURCES) -o $(SHARED3) - -endif # PLATFORM_SHARED_EXT - -all: $(SHARED) $(LIBRARY) - -check: all $(PROGRAMS) $(TESTS) - for t in $(TESTS); do echo "***** Running $$t"; ./$$t || exit 1; done - -clean: - -rm -f $(PROGRAMS) $(BENCHMARKS) $(LIBRARY) $(SHARED) $(MEMENVLIBRARY) */*.o */*/*.o ios-x86/*/*.o ios-arm/*/*.o build_config.mk - -rm -rf ios-x86/* ios-arm/* - -$(LIBRARY): $(LIBOBJECTS) - rm -f $@ - $(AR) -rs $@ $(LIBOBJECTS) - -db_bench: db/db_bench.o $(LIBOBJECTS) $(TESTUTIL) - $(CXX) db/db_bench.o $(LIBOBJECTS) $(TESTUTIL) -o $@ $(LDFLAGS) - -db_bench_sqlite3: doc/bench/db_bench_sqlite3.o $(LIBOBJECTS) $(TESTUTIL) - $(CXX) doc/bench/db_bench_sqlite3.o $(LIBOBJECTS) $(TESTUTIL) -o $@ $(LDFLAGS) -lsqlite3 - -db_bench_tree_db: doc/bench/db_bench_tree_db.o $(LIBOBJECTS) $(TESTUTIL) - $(CXX) doc/bench/db_bench_tree_db.o $(LIBOBJECTS) $(TESTUTIL) -o $@ $(LDFLAGS) -lkyotocabinet - -arena_test: util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(CXX) util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS) - -bloom_test: util/bloom_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(CXX) util/bloom_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS) - -c_test: db/c_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(CXX) db/c_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS) - -cache_test: util/cache_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(CXX) util/cache_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS) - -coding_test: util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(CXX) util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS) - -corruption_test: db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(CXX) db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS) - -crc32c_test: util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(CXX) util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS) - -db_test: db/db_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(CXX) db/db_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS) - -dbformat_test: db/dbformat_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(CXX) db/dbformat_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS) - -env_test: util/env_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(CXX) util/env_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS) - -filename_test: db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(CXX) db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS) - -filter_block_test: table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(CXX) table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS) - -log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(CXX) db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS) - -table_test: table/table_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(CXX) table/table_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS) - -skiplist_test: db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(CXX) db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS) - -version_edit_test: db/version_edit_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(CXX) db/version_edit_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS) - -version_set_test: db/version_set_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(CXX) db/version_set_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS) - -write_batch_test: db/write_batch_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(CXX) db/write_batch_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS) - -$(MEMENVLIBRARY) : $(MEMENVOBJECTS) - rm -f $@ - $(AR) -rs $@ $(MEMENVOBJECTS) - -memenv_test : helpers/memenv/memenv_test.o $(MEMENVLIBRARY) $(LIBRARY) $(TESTHARNESS) - $(CXX) helpers/memenv/memenv_test.o $(MEMENVLIBRARY) $(LIBRARY) $(TESTHARNESS) -o $@ $(LDFLAGS) - -ifeq ($(PLATFORM), IOS) -# For iOS, create universal object files to be used on both the simulator and -# a device. -PLATFORMSROOT=/Applications/Xcode.app/Contents/Developer/Platforms -SIMULATORROOT=$(PLATFORMSROOT)/iPhoneSimulator.platform/Developer -DEVICEROOT=$(PLATFORMSROOT)/iPhoneOS.platform/Developer -IOSVERSION=$(shell defaults read $(PLATFORMSROOT)/iPhoneOS.platform/version CFBundleShortVersionString) - -.cc.o: - mkdir -p ios-x86/$(dir $@) - $(SIMULATORROOT)/usr/bin/$(CXX) $(CXXFLAGS) -isysroot $(SIMULATORROOT)/SDKs/iPhoneSimulator$(IOSVERSION).sdk -arch i686 -c $< -o ios-x86/$@ - mkdir -p ios-arm/$(dir $@) - $(DEVICEROOT)/usr/bin/$(CXX) $(CXXFLAGS) -isysroot $(DEVICEROOT)/SDKs/iPhoneOS$(IOSVERSION).sdk -arch armv6 -arch armv7 -c $< -o ios-arm/$@ - lipo ios-x86/$@ ios-arm/$@ -create -output $@ - -.c.o: - mkdir -p ios-x86/$(dir $@) - $(SIMULATORROOT)/usr/bin/$(CC) $(CFLAGS) -isysroot $(SIMULATORROOT)/SDKs/iPhoneSimulator$(IOSVERSION).sdk -arch i686 -c $< -o ios-x86/$@ - mkdir -p ios-arm/$(dir $@) - $(DEVICEROOT)/usr/bin/$(CC) $(CFLAGS) -isysroot $(DEVICEROOT)/SDKs/iPhoneOS$(IOSVERSION).sdk -arch armv6 -arch armv7 -c $< -o ios-arm/$@ - lipo ios-x86/$@ ios-arm/$@ -create -output $@ - -else -.cc.o: - $(CXX) $(CXXFLAGS) -c $< -o $@ - -.c.o: - $(CC) $(CFLAGS) -c $< -o $@ -endif diff --git a/leveldb/NEWS b/leveldb/NEWS deleted file mode 100644 index 3fd9924..0000000 --- a/leveldb/NEWS +++ /dev/null @@ -1,17 +0,0 @@ -Release 1.2 2011-05-16 ----------------------- - -Fixes for larger databases (tested up to one billion 100-byte entries, -i.e., ~100GB). - -(1) Place hard limit on number of level-0 files. This fixes errors -of the form "too many open files". - -(2) Fixed memtable management. Before the fix, a heavy write burst -could cause unbounded memory usage. - -A fix for a logging bug where the reader would incorrectly complain -about corruption. - -Allow public access to WriteBatch contents so that users can easily -wrap a DB. diff --git a/leveldb/README b/leveldb/README deleted file mode 100644 index 3618ade..0000000 --- a/leveldb/README +++ /dev/null @@ -1,51 +0,0 @@ -leveldb: A key-value store -Authors: Sanjay Ghemawat (sanjay@google.com) and Jeff Dean (jeff@google.com) - -The code under this directory implements a system for maintaining a -persistent key/value store. - -See doc/index.html for more explanation. -See doc/impl.html for a brief overview of the implementation. - -The public interface is in include/*.h. Callers should not include or -rely on the details of any other header files in this package. Those -internal APIs may be changed without warning. - -Guide to header files: - -include/db.h - Main interface to the DB: Start here - -include/options.h - Control over the behavior of an entire database, and also - control over the behavior of individual reads and writes. - -include/comparator.h - Abstraction for user-specified comparison function. If you want - just bytewise comparison of keys, you can use the default comparator, - but clients can write their own comparator implementations if they - want custom ordering (e.g. to handle different character - encodings, etc.) - -include/iterator.h - Interface for iterating over data. You can get an iterator - from a DB object. - -include/write_batch.h - Interface for atomically applying multiple updates to a database. - -include/slice.h - A simple module for maintaining a pointer and a length into some - other byte array. - -include/status.h - Status is returned from many of the public interfaces and is used - to report success and various kinds of errors. - -include/env.h - Abstraction of the OS environment. A posix implementation of - this interface is in util/env_posix.cc - -include/table.h -include/table_builder.h - Lower-level modules that most clients probably won't use directly diff --git a/leveldb/TODO b/leveldb/TODO deleted file mode 100644 index 9130b6a..0000000 --- a/leveldb/TODO +++ /dev/null @@ -1,13 +0,0 @@ -ss -- Stats - -db -- Maybe implement DB::BulkDeleteForRange(start_key, end_key) - that would blow away files whose ranges are entirely contained - within [start_key..end_key]? For Chrome, deletion of obsolete - object stores, etc. can be done in the background anyway, so - probably not that important. - -After a range is completely deleted, what gets rid of the -corresponding files if we do no future changes to that range. Make -the conditions for triggering compactions fire in more situations? diff --git a/leveldb/build_detect_platform b/leveldb/build_detect_platform deleted file mode 100755 index 959a7d6..0000000 --- a/leveldb/build_detect_platform +++ /dev/null @@ -1,179 +0,0 @@ -#!/bin/sh -# -# Detects OS we're compiling on and outputs a file specified by the first -# argument, which in turn gets read while processing Makefile. -# -# The output will set the following variables: -# CC C Compiler path -# CXX C++ Compiler path -# PLATFORM_LDFLAGS Linker flags -# PLATFORM_SHARED_EXT Extension for shared libraries -# PLATFORM_SHARED_LDFLAGS Flags for building shared library -# PLATFORM_SHARED_CFLAGS Flags for compiling objects for shared library -# PLATFORM_CCFLAGS C compiler flags -# PLATFORM_CXXFLAGS C++ compiler flags. Will contain: -# PLATFORM_SHARED_VERSIONED Set to 'true' if platform supports versioned -# shared libraries, empty otherwise. -# -# The PLATFORM_CCFLAGS and PLATFORM_CXXFLAGS might include the following: -# -# -DLEVELDB_CSTDATOMIC_PRESENT if is present -# -DLEVELDB_PLATFORM_POSIX for Posix-based platforms -# -DSNAPPY if the Snappy library is present -# - -OUTPUT=$1 -if test -z "$OUTPUT"; then - echo "usage: $0 " >&2 - exit 1 -fi - -# Delete existing output, if it exists -rm -f $OUTPUT -touch $OUTPUT - -if test -z "$CC"; then - CC=cc -fi - -if test -z "$CXX"; then - CXX=g++ -fi - -# Detect OS -if test -z "$TARGET_OS"; then - TARGET_OS=`uname -s` -fi - -COMMON_FLAGS= -CROSS_COMPILE= -PLATFORM_CCFLAGS= -PLATFORM_CXXFLAGS= -PLATFORM_LDFLAGS= -PLATFORM_SHARED_EXT="so" -PLATFORM_SHARED_LDFLAGS="-shared -Wl,-soname -Wl," -PLATFORM_SHARED_CFLAGS="-fPIC" -PLATFORM_SHARED_VERSIONED=true - -# On GCC, we pick libc's memcmp over GCC's memcmp via -fno-builtin-memcmp -case "$TARGET_OS" in - Darwin) - PLATFORM=OS_MACOSX - COMMON_FLAGS="-fno-builtin-memcmp -DOS_MACOSX" - PLATFORM_SHARED_EXT=dylib - PLATFORM_SHARED_LDFLAGS="-dynamiclib -install_name " - PORT_FILE=port/port_posix.cc - ;; - Linux) - PLATFORM=OS_LINUX - COMMON_FLAGS="-fno-builtin-memcmp -pthread -DOS_LINUX" - PLATFORM_LDFLAGS="-pthread" - PORT_FILE=port/port_posix.cc - ;; - SunOS) - PLATFORM=OS_SOLARIS - COMMON_FLAGS="-fno-builtin-memcmp -D_REENTRANT -DOS_SOLARIS" - PLATFORM_LDFLAGS="-lpthread -lrt" - PORT_FILE=port/port_posix.cc - ;; - FreeBSD) - PLATFORM=OS_FREEBSD - COMMON_FLAGS="-fno-builtin-memcmp -D_REENTRANT -DOS_FREEBSD" - PLATFORM_LDFLAGS="-lpthread" - PORT_FILE=port/port_posix.cc - ;; - NetBSD) - PLATFORM=OS_NETBSD - COMMON_FLAGS="-fno-builtin-memcmp -D_REENTRANT -DOS_NETBSD" - PLATFORM_LDFLAGS="-lpthread -lgcc_s" - PORT_FILE=port/port_posix.cc - ;; - OpenBSD) - PLATFORM=OS_OPENBSD - COMMON_FLAGS="-fno-builtin-memcmp -D_REENTRANT -DOS_OPENBSD" - PLATFORM_LDFLAGS="-pthread" - PORT_FILE=port/port_posix.cc - ;; - DragonFly) - PLATFORM=OS_DRAGONFLYBSD - COMMON_FLAGS="-fno-builtin-memcmp -D_REENTRANT -DOS_DRAGONFLYBSD" - PLATFORM_LDFLAGS="-lpthread" - PORT_FILE=port/port_posix.cc - ;; - OS_ANDROID_CROSSCOMPILE) - PLATFORM=OS_ANDROID - COMMON_FLAGS="-fno-builtin-memcmp -D_REENTRANT -DOS_ANDROID -DLEVELDB_PLATFORM_POSIX" - PLATFORM_LDFLAGS="" # All pthread features are in the Android C library - PORT_FILE=port/port_posix.cc - CROSS_COMPILE=true - ;; - *) - echo "Unknown platform!" >&2 - exit 1 -esac - -# We want to make a list of all cc files within util, db, table, and helpers -# except for the test and benchmark files. By default, find will output a list -# of all files matching either rule, so we need to append -print to make the -# prune take effect. -DIRS="util db table" -set -f # temporarily disable globbing so that our patterns aren't expanded -PRUNE_TEST="-name *test*.cc -prune" -PRUNE_BENCH="-name *_bench.cc -prune" -PORTABLE_FILES=`find $DIRS $PRUNE_TEST -o $PRUNE_BENCH -o -name '*.cc' -print | sort | tr "\n" " "` -set +f # re-enable globbing - -# The sources consist of the portable files, plus the platform-specific port -# file. -echo "SOURCES=$PORTABLE_FILES $PORT_FILE" >> $OUTPUT -echo "MEMENV_SOURCES=helpers/memenv/memenv.cc" >> $OUTPUT - -if [ "$CROSS_COMPILE" = "true" ]; then - # Cross-compiling; do not try any compilation tests. - true -else - # If -std=c++0x works, use . Otherwise use port_posix.h. - $CXX $CFLAGS -std=c++0x -x c++ - -o /dev/null 2>/dev/null < - int main() {} -EOF - if [ "$?" = 0 ]; then - COMMON_FLAGS="$COMMON_FLAGS -DLEVELDB_PLATFORM_POSIX -DLEVELDB_CSTDATOMIC_PRESENT" - PLATFORM_CXXFLAGS="-std=c++0x" - else - COMMON_FLAGS="$COMMON_FLAGS -DLEVELDB_PLATFORM_POSIX" - fi - - # Test whether Snappy library is installed - # http://code.google.com/p/snappy/ - $CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null < - int main() {} -EOF - if [ "$?" = 0 ]; then - COMMON_FLAGS="$COMMON_FLAGS -DSNAPPY" - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lsnappy" - fi - - # Test whether tcmalloc is available - $CXX $CFLAGS -x c++ - -o /dev/null -ltcmalloc 2>/dev/null <> $OUTPUT -echo "CXX=$CXX" >> $OUTPUT -echo "PLATFORM=$PLATFORM" >> $OUTPUT -echo "PLATFORM_LDFLAGS=$PLATFORM_LDFLAGS" >> $OUTPUT -echo "PLATFORM_CCFLAGS=$PLATFORM_CCFLAGS" >> $OUTPUT -echo "PLATFORM_CXXFLAGS=$PLATFORM_CXXFLAGS" >> $OUTPUT -echo "PLATFORM_SHARED_CFLAGS=$PLATFORM_SHARED_CFLAGS" >> $OUTPUT -echo "PLATFORM_SHARED_EXT=$PLATFORM_SHARED_EXT" >> $OUTPUT -echo "PLATFORM_SHARED_LDFLAGS=$PLATFORM_SHARED_LDFLAGS" >> $OUTPUT -echo "PLATFORM_SHARED_VERSIONED=$PLATFORM_SHARED_VERSIONED" >> $OUTPUT diff --git a/leveldb/db/builder.cc b/leveldb/db/builder.cc deleted file mode 100644 index f419882..0000000 --- a/leveldb/db/builder.cc +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/builder.h" - -#include "db/filename.h" -#include "db/dbformat.h" -#include "db/table_cache.h" -#include "db/version_edit.h" -#include "leveldb/db.h" -#include "leveldb/env.h" -#include "leveldb/iterator.h" - -namespace leveldb { - -Status BuildTable(const std::string& dbname, - Env* env, - const Options& options, - TableCache* table_cache, - Iterator* iter, - FileMetaData* meta) { - Status s; - meta->file_size = 0; - iter->SeekToFirst(); - - std::string fname = TableFileName(dbname, meta->number); - if (iter->Valid()) { - WritableFile* file; - s = env->NewWritableFile(fname, &file); - if (!s.ok()) { - return s; - } - - TableBuilder* builder = new TableBuilder(options, file); - meta->smallest.DecodeFrom(iter->key()); - for (; iter->Valid(); iter->Next()) { - Slice key = iter->key(); - meta->largest.DecodeFrom(key); - builder->Add(key, iter->value()); - } - - // Finish and check for builder errors - if (s.ok()) { - s = builder->Finish(); - if (s.ok()) { - meta->file_size = builder->FileSize(); - assert(meta->file_size > 0); - } - } else { - builder->Abandon(); - } - delete builder; - - // Finish and check for file errors - if (s.ok()) { - s = file->Sync(); - } - if (s.ok()) { - s = file->Close(); - } - delete file; - file = NULL; - - if (s.ok()) { - // Verify that the table is usable - Iterator* it = table_cache->NewIterator(ReadOptions(), - meta->number, - meta->file_size); - s = it->status(); - delete it; - } - } - - // Check for input iterator errors - if (!iter->status().ok()) { - s = iter->status(); - } - - if (s.ok() && meta->file_size > 0) { - // Keep it - } else { - env->DeleteFile(fname); - } - return s; -} - -} // namespace leveldb diff --git a/leveldb/db/builder.h b/leveldb/db/builder.h deleted file mode 100644 index 62431fc..0000000 --- a/leveldb/db/builder.h +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_DB_BUILDER_H_ -#define STORAGE_LEVELDB_DB_BUILDER_H_ - -#include "leveldb/status.h" - -namespace leveldb { - -struct Options; -struct FileMetaData; - -class Env; -class Iterator; -class TableCache; -class VersionEdit; - -// Build a Table file from the contents of *iter. The generated file -// will be named according to meta->number. On success, the rest of -// *meta will be filled with metadata about the generated table. -// If no data is present in *iter, meta->file_size will be set to -// zero, and no Table file will be produced. -extern Status BuildTable(const std::string& dbname, - Env* env, - const Options& options, - TableCache* table_cache, - Iterator* iter, - FileMetaData* meta); - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_DB_BUILDER_H_ diff --git a/leveldb/db/c.cc b/leveldb/db/c.cc deleted file mode 100644 index 2dde400..0000000 --- a/leveldb/db/c.cc +++ /dev/null @@ -1,581 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "leveldb/c.h" - -#include -#include -#include "leveldb/cache.h" -#include "leveldb/comparator.h" -#include "leveldb/db.h" -#include "leveldb/env.h" -#include "leveldb/filter_policy.h" -#include "leveldb/iterator.h" -#include "leveldb/options.h" -#include "leveldb/status.h" -#include "leveldb/write_batch.h" - -using leveldb::Cache; -using leveldb::Comparator; -using leveldb::CompressionType; -using leveldb::DB; -using leveldb::Env; -using leveldb::FileLock; -using leveldb::FilterPolicy; -using leveldb::Iterator; -using leveldb::Logger; -using leveldb::NewBloomFilterPolicy; -using leveldb::NewLRUCache; -using leveldb::Options; -using leveldb::RandomAccessFile; -using leveldb::Range; -using leveldb::ReadOptions; -using leveldb::SequentialFile; -using leveldb::Slice; -using leveldb::Snapshot; -using leveldb::Status; -using leveldb::WritableFile; -using leveldb::WriteBatch; -using leveldb::WriteOptions; - -extern "C" { - -struct leveldb_t { DB* rep; }; -struct leveldb_iterator_t { Iterator* rep; }; -struct leveldb_writebatch_t { WriteBatch rep; }; -struct leveldb_snapshot_t { const Snapshot* rep; }; -struct leveldb_readoptions_t { ReadOptions rep; }; -struct leveldb_writeoptions_t { WriteOptions rep; }; -struct leveldb_options_t { Options rep; }; -struct leveldb_cache_t { Cache* rep; }; -struct leveldb_seqfile_t { SequentialFile* rep; }; -struct leveldb_randomfile_t { RandomAccessFile* rep; }; -struct leveldb_writablefile_t { WritableFile* rep; }; -struct leveldb_logger_t { Logger* rep; }; -struct leveldb_filelock_t { FileLock* rep; }; - -struct leveldb_comparator_t : public Comparator { - void* state_; - void (*destructor_)(void*); - int (*compare_)( - void*, - const char* a, size_t alen, - const char* b, size_t blen); - const char* (*name_)(void*); - - virtual ~leveldb_comparator_t() { - (*destructor_)(state_); - } - - virtual int Compare(const Slice& a, const Slice& b) const { - return (*compare_)(state_, a.data(), a.size(), b.data(), b.size()); - } - - virtual const char* Name() const { - return (*name_)(state_); - } - - // No-ops since the C binding does not support key shortening methods. - virtual void FindShortestSeparator(std::string*, const Slice&) const { } - virtual void FindShortSuccessor(std::string* key) const { } -}; - -struct leveldb_filterpolicy_t : public FilterPolicy { - void* state_; - void (*destructor_)(void*); - const char* (*name_)(void*); - char* (*create_)( - void*, - const char* const* key_array, const size_t* key_length_array, - int num_keys, - size_t* filter_length); - unsigned char (*key_match_)( - void*, - const char* key, size_t length, - const char* filter, size_t filter_length); - - virtual ~leveldb_filterpolicy_t() { - (*destructor_)(state_); - } - - virtual const char* Name() const { - return (*name_)(state_); - } - - virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const { - std::vector key_pointers(n); - std::vector key_sizes(n); - for (int i = 0; i < n; i++) { - key_pointers[i] = keys[i].data(); - key_sizes[i] = keys[i].size(); - } - size_t len; - char* filter = (*create_)(state_, &key_pointers[0], &key_sizes[0], n, &len); - dst->append(filter, len); - free(filter); - } - - virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const { - return (*key_match_)(state_, key.data(), key.size(), - filter.data(), filter.size()); - } -}; - -struct leveldb_env_t { - Env* rep; - bool is_default; -}; - -static bool SaveError(char** errptr, const Status& s) { - assert(errptr != NULL); - if (s.ok()) { - return false; - } else if (*errptr == NULL) { - *errptr = strdup(s.ToString().c_str()); - } else { - // TODO(sanjay): Merge with existing error? - free(*errptr); - *errptr = strdup(s.ToString().c_str()); - } - return true; -} - -static char* CopyString(const std::string& str) { - char* result = reinterpret_cast(malloc(sizeof(char) * str.size())); - memcpy(result, str.data(), sizeof(char) * str.size()); - return result; -} - -leveldb_t* leveldb_open( - const leveldb_options_t* options, - const char* name, - char** errptr) { - DB* db; - if (SaveError(errptr, DB::Open(options->rep, std::string(name), &db))) { - return NULL; - } - leveldb_t* result = new leveldb_t; - result->rep = db; - return result; -} - -void leveldb_close(leveldb_t* db) { - delete db->rep; - delete db; -} - -void leveldb_put( - leveldb_t* db, - const leveldb_writeoptions_t* options, - const char* key, size_t keylen, - const char* val, size_t vallen, - char** errptr) { - SaveError(errptr, - db->rep->Put(options->rep, Slice(key, keylen), Slice(val, vallen))); -} - -void leveldb_delete( - leveldb_t* db, - const leveldb_writeoptions_t* options, - const char* key, size_t keylen, - char** errptr) { - SaveError(errptr, db->rep->Delete(options->rep, Slice(key, keylen))); -} - - -void leveldb_write( - leveldb_t* db, - const leveldb_writeoptions_t* options, - leveldb_writebatch_t* batch, - char** errptr) { - SaveError(errptr, db->rep->Write(options->rep, &batch->rep)); -} - -char* leveldb_get( - leveldb_t* db, - const leveldb_readoptions_t* options, - const char* key, size_t keylen, - size_t* vallen, - char** errptr) { - char* result = NULL; - std::string tmp; - Status s = db->rep->Get(options->rep, Slice(key, keylen), &tmp); - if (s.ok()) { - *vallen = tmp.size(); - result = CopyString(tmp); - } else { - *vallen = 0; - if (!s.IsNotFound()) { - SaveError(errptr, s); - } - } - return result; -} - -leveldb_iterator_t* leveldb_create_iterator( - leveldb_t* db, - const leveldb_readoptions_t* options) { - leveldb_iterator_t* result = new leveldb_iterator_t; - result->rep = db->rep->NewIterator(options->rep); - return result; -} - -const leveldb_snapshot_t* leveldb_create_snapshot( - leveldb_t* db) { - leveldb_snapshot_t* result = new leveldb_snapshot_t; - result->rep = db->rep->GetSnapshot(); - return result; -} - -void leveldb_release_snapshot( - leveldb_t* db, - const leveldb_snapshot_t* snapshot) { - db->rep->ReleaseSnapshot(snapshot->rep); - delete snapshot; -} - -char* leveldb_property_value( - leveldb_t* db, - const char* propname) { - std::string tmp; - if (db->rep->GetProperty(Slice(propname), &tmp)) { - // We use strdup() since we expect human readable output. - return strdup(tmp.c_str()); - } else { - return NULL; - } -} - -void leveldb_approximate_sizes( - leveldb_t* db, - int num_ranges, - const char* const* range_start_key, const size_t* range_start_key_len, - const char* const* range_limit_key, const size_t* range_limit_key_len, - uint64_t* sizes) { - Range* ranges = new Range[num_ranges]; - for (int i = 0; i < num_ranges; i++) { - ranges[i].start = Slice(range_start_key[i], range_start_key_len[i]); - ranges[i].limit = Slice(range_limit_key[i], range_limit_key_len[i]); - } - db->rep->GetApproximateSizes(ranges, num_ranges, sizes); - delete[] ranges; -} - -void leveldb_compact_range( - leveldb_t* db, - const char* start_key, size_t start_key_len, - const char* limit_key, size_t limit_key_len) { - Slice a, b; - db->rep->CompactRange( - // Pass NULL Slice if corresponding "const char*" is NULL - (start_key ? (a = Slice(start_key, start_key_len), &a) : NULL), - (limit_key ? (b = Slice(limit_key, limit_key_len), &b) : NULL)); -} - -void leveldb_destroy_db( - const leveldb_options_t* options, - const char* name, - char** errptr) { - SaveError(errptr, DestroyDB(name, options->rep)); -} - -void leveldb_repair_db( - const leveldb_options_t* options, - const char* name, - char** errptr) { - SaveError(errptr, RepairDB(name, options->rep)); -} - -void leveldb_iter_destroy(leveldb_iterator_t* iter) { - delete iter->rep; - delete iter; -} - -unsigned char leveldb_iter_valid(const leveldb_iterator_t* iter) { - return iter->rep->Valid(); -} - -void leveldb_iter_seek_to_first(leveldb_iterator_t* iter) { - iter->rep->SeekToFirst(); -} - -void leveldb_iter_seek_to_last(leveldb_iterator_t* iter) { - iter->rep->SeekToLast(); -} - -void leveldb_iter_seek(leveldb_iterator_t* iter, const char* k, size_t klen) { - iter->rep->Seek(Slice(k, klen)); -} - -void leveldb_iter_next(leveldb_iterator_t* iter) { - iter->rep->Next(); -} - -void leveldb_iter_prev(leveldb_iterator_t* iter) { - iter->rep->Prev(); -} - -const char* leveldb_iter_key(const leveldb_iterator_t* iter, size_t* klen) { - Slice s = iter->rep->key(); - *klen = s.size(); - return s.data(); -} - -const char* leveldb_iter_value(const leveldb_iterator_t* iter, size_t* vlen) { - Slice s = iter->rep->value(); - *vlen = s.size(); - return s.data(); -} - -void leveldb_iter_get_error(const leveldb_iterator_t* iter, char** errptr) { - SaveError(errptr, iter->rep->status()); -} - -leveldb_writebatch_t* leveldb_writebatch_create() { - return new leveldb_writebatch_t; -} - -void leveldb_writebatch_destroy(leveldb_writebatch_t* b) { - delete b; -} - -void leveldb_writebatch_clear(leveldb_writebatch_t* b) { - b->rep.Clear(); -} - -void leveldb_writebatch_put( - leveldb_writebatch_t* b, - const char* key, size_t klen, - const char* val, size_t vlen) { - b->rep.Put(Slice(key, klen), Slice(val, vlen)); -} - -void leveldb_writebatch_delete( - leveldb_writebatch_t* b, - const char* key, size_t klen) { - b->rep.Delete(Slice(key, klen)); -} - -void leveldb_writebatch_iterate( - leveldb_writebatch_t* b, - void* state, - void (*put)(void*, const char* k, size_t klen, const char* v, size_t vlen), - void (*deleted)(void*, const char* k, size_t klen)) { - class H : public WriteBatch::Handler { - public: - void* state_; - void (*put_)(void*, const char* k, size_t klen, const char* v, size_t vlen); - void (*deleted_)(void*, const char* k, size_t klen); - virtual void Put(const Slice& key, const Slice& value) { - (*put_)(state_, key.data(), key.size(), value.data(), value.size()); - } - virtual void Delete(const Slice& key) { - (*deleted_)(state_, key.data(), key.size()); - } - }; - H handler; - handler.state_ = state; - handler.put_ = put; - handler.deleted_ = deleted; - b->rep.Iterate(&handler); -} - -leveldb_options_t* leveldb_options_create() { - return new leveldb_options_t; -} - -void leveldb_options_destroy(leveldb_options_t* options) { - delete options; -} - -void leveldb_options_set_comparator( - leveldb_options_t* opt, - leveldb_comparator_t* cmp) { - opt->rep.comparator = cmp; -} - -void leveldb_options_set_filter_policy( - leveldb_options_t* opt, - leveldb_filterpolicy_t* policy) { - opt->rep.filter_policy = policy; -} - -void leveldb_options_set_create_if_missing( - leveldb_options_t* opt, unsigned char v) { - opt->rep.create_if_missing = v; -} - -void leveldb_options_set_error_if_exists( - leveldb_options_t* opt, unsigned char v) { - opt->rep.error_if_exists = v; -} - -void leveldb_options_set_paranoid_checks( - leveldb_options_t* opt, unsigned char v) { - opt->rep.paranoid_checks = v; -} - -void leveldb_options_set_env(leveldb_options_t* opt, leveldb_env_t* env) { - opt->rep.env = (env ? env->rep : NULL); -} - -void leveldb_options_set_info_log(leveldb_options_t* opt, leveldb_logger_t* l) { - opt->rep.info_log = (l ? l->rep : NULL); -} - -void leveldb_options_set_write_buffer_size(leveldb_options_t* opt, size_t s) { - opt->rep.write_buffer_size = s; -} - -void leveldb_options_set_max_open_files(leveldb_options_t* opt, int n) { - opt->rep.max_open_files = n; -} - -void leveldb_options_set_cache(leveldb_options_t* opt, leveldb_cache_t* c) { - opt->rep.block_cache = c->rep; -} - -void leveldb_options_set_block_size(leveldb_options_t* opt, size_t s) { - opt->rep.block_size = s; -} - -void leveldb_options_set_block_restart_interval(leveldb_options_t* opt, int n) { - opt->rep.block_restart_interval = n; -} - -void leveldb_options_set_compression(leveldb_options_t* opt, int t) { - opt->rep.compression = static_cast(t); -} - -leveldb_comparator_t* leveldb_comparator_create( - void* state, - void (*destructor)(void*), - int (*compare)( - void*, - const char* a, size_t alen, - const char* b, size_t blen), - const char* (*name)(void*)) { - leveldb_comparator_t* result = new leveldb_comparator_t; - result->state_ = state; - result->destructor_ = destructor; - result->compare_ = compare; - result->name_ = name; - return result; -} - -void leveldb_comparator_destroy(leveldb_comparator_t* cmp) { - delete cmp; -} - -leveldb_filterpolicy_t* leveldb_filterpolicy_create( - void* state, - void (*destructor)(void*), - char* (*create_filter)( - void*, - const char* const* key_array, const size_t* key_length_array, - int num_keys, - size_t* filter_length), - unsigned char (*key_may_match)( - void*, - const char* key, size_t length, - const char* filter, size_t filter_length), - const char* (*name)(void*)) { - leveldb_filterpolicy_t* result = new leveldb_filterpolicy_t; - result->state_ = state; - result->destructor_ = destructor; - result->create_ = create_filter; - result->key_match_ = key_may_match; - result->name_ = name; - return result; -} - -void leveldb_filterpolicy_destroy(leveldb_filterpolicy_t* filter) { - delete filter; -} - -leveldb_filterpolicy_t* leveldb_filterpolicy_create_bloom(int bits_per_key) { - // Make a leveldb_filterpolicy_t, but override all of its methods so - // they delegate to a NewBloomFilterPolicy() instead of user - // supplied C functions. - struct Wrapper : public leveldb_filterpolicy_t { - const FilterPolicy* rep_; - ~Wrapper() { delete rep_; } - const char* Name() const { return rep_->Name(); } - void CreateFilter(const Slice* keys, int n, std::string* dst) const { - return rep_->CreateFilter(keys, n, dst); - } - bool KeyMayMatch(const Slice& key, const Slice& filter) const { - return rep_->KeyMayMatch(key, filter); - } - static void DoNothing(void*) { } - }; - Wrapper* wrapper = new Wrapper; - wrapper->rep_ = NewBloomFilterPolicy(bits_per_key); - wrapper->state_ = NULL; - wrapper->destructor_ = &Wrapper::DoNothing; - return wrapper; -} - -leveldb_readoptions_t* leveldb_readoptions_create() { - return new leveldb_readoptions_t; -} - -void leveldb_readoptions_destroy(leveldb_readoptions_t* opt) { - delete opt; -} - -void leveldb_readoptions_set_verify_checksums( - leveldb_readoptions_t* opt, - unsigned char v) { - opt->rep.verify_checksums = v; -} - -void leveldb_readoptions_set_fill_cache( - leveldb_readoptions_t* opt, unsigned char v) { - opt->rep.fill_cache = v; -} - -void leveldb_readoptions_set_snapshot( - leveldb_readoptions_t* opt, - const leveldb_snapshot_t* snap) { - opt->rep.snapshot = (snap ? snap->rep : NULL); -} - -leveldb_writeoptions_t* leveldb_writeoptions_create() { - return new leveldb_writeoptions_t; -} - -void leveldb_writeoptions_destroy(leveldb_writeoptions_t* opt) { - delete opt; -} - -void leveldb_writeoptions_set_sync( - leveldb_writeoptions_t* opt, unsigned char v) { - opt->rep.sync = v; -} - -leveldb_cache_t* leveldb_cache_create_lru(size_t capacity) { - leveldb_cache_t* c = new leveldb_cache_t; - c->rep = NewLRUCache(capacity); - return c; -} - -void leveldb_cache_destroy(leveldb_cache_t* cache) { - delete cache->rep; - delete cache; -} - -leveldb_env_t* leveldb_create_default_env() { - leveldb_env_t* result = new leveldb_env_t; - result->rep = Env::Default(); - result->is_default = true; - return result; -} - -void leveldb_env_destroy(leveldb_env_t* env) { - if (!env->is_default) delete env->rep; - delete env; -} - -} // end extern "C" diff --git a/leveldb/db/c_test.c b/leveldb/db/c_test.c deleted file mode 100644 index 9792447..0000000 --- a/leveldb/db/c_test.c +++ /dev/null @@ -1,381 +0,0 @@ -/* Copyright (c) 2011 The LevelDB Authors. All rights reserved. - Use of this source code is governed by a BSD-style license that can be - found in the LICENSE file. See the AUTHORS file for names of contributors. */ - -#include "leveldb/c.h" - -#include -#include -#include -#include -#include -#include - -const char* phase = ""; -static char dbname[200]; - -static void StartPhase(const char* name) { - fprintf(stderr, "=== Test %s\n", name); - phase = name; -} - -static const char* GetTempDir(void) { - const char* ret = getenv("TEST_TMPDIR"); - if (ret == NULL || ret[0] == '\0') - ret = "/tmp"; - return ret; -} - -#define CheckNoError(err) \ - if ((err) != NULL) { \ - fprintf(stderr, "%s:%d: %s: %s\n", __FILE__, __LINE__, phase, (err)); \ - abort(); \ - } - -#define CheckCondition(cond) \ - if (!(cond)) { \ - fprintf(stderr, "%s:%d: %s: %s\n", __FILE__, __LINE__, phase, #cond); \ - abort(); \ - } - -static void CheckEqual(const char* expected, const char* v, size_t n) { - if (expected == NULL && v == NULL) { - // ok - } else if (expected != NULL && v != NULL && n == strlen(expected) && - memcmp(expected, v, n) == 0) { - // ok - return; - } else { - fprintf(stderr, "%s: expected '%s', got '%s'\n", - phase, - (expected ? expected : "(null)"), - (v ? v : "(null")); - abort(); - } -} - -static void Free(char** ptr) { - if (*ptr) { - free(*ptr); - *ptr = NULL; - } -} - -static void CheckGet( - leveldb_t* db, - const leveldb_readoptions_t* options, - const char* key, - const char* expected) { - char* err = NULL; - size_t val_len; - char* val; - val = leveldb_get(db, options, key, strlen(key), &val_len, &err); - CheckNoError(err); - CheckEqual(expected, val, val_len); - Free(&val); -} - -static void CheckIter(leveldb_iterator_t* iter, - const char* key, const char* val) { - size_t len; - const char* str; - str = leveldb_iter_key(iter, &len); - CheckEqual(key, str, len); - str = leveldb_iter_value(iter, &len); - CheckEqual(val, str, len); -} - -// Callback from leveldb_writebatch_iterate() -static void CheckPut(void* ptr, - const char* k, size_t klen, - const char* v, size_t vlen) { - int* state = (int*) ptr; - CheckCondition(*state < 2); - switch (*state) { - case 0: - CheckEqual("bar", k, klen); - CheckEqual("b", v, vlen); - break; - case 1: - CheckEqual("box", k, klen); - CheckEqual("c", v, vlen); - break; - } - (*state)++; -} - -// Callback from leveldb_writebatch_iterate() -static void CheckDel(void* ptr, const char* k, size_t klen) { - int* state = (int*) ptr; - CheckCondition(*state == 2); - CheckEqual("bar", k, klen); - (*state)++; -} - -static void CmpDestroy(void* arg) { } - -static int CmpCompare(void* arg, const char* a, size_t alen, - const char* b, size_t blen) { - int n = (alen < blen) ? alen : blen; - int r = memcmp(a, b, n); - if (r == 0) { - if (alen < blen) r = -1; - else if (alen > blen) r = +1; - } - return r; -} - -static const char* CmpName(void* arg) { - return "foo"; -} - -// Custom filter policy -static unsigned char fake_filter_result = 1; -static void FilterDestroy(void* arg) { } -static const char* FilterName(void* arg) { - return "TestFilter"; -} -static char* FilterCreate( - void* arg, - const char* const* key_array, const size_t* key_length_array, - int num_keys, - size_t* filter_length) { - *filter_length = 4; - char* result = malloc(4); - memcpy(result, "fake", 4); - return result; -} -unsigned char FilterKeyMatch( - void* arg, - const char* key, size_t length, - const char* filter, size_t filter_length) { - CheckCondition(filter_length == 4); - CheckCondition(memcmp(filter, "fake", 4) == 0); - return fake_filter_result; -} - -int main(int argc, char** argv) { - leveldb_t* db; - leveldb_comparator_t* cmp; - leveldb_cache_t* cache; - leveldb_env_t* env; - leveldb_options_t* options; - leveldb_readoptions_t* roptions; - leveldb_writeoptions_t* woptions; - char* err = NULL; - int run = -1; - - snprintf(dbname, sizeof(dbname), - "%s/leveldb_c_test-%d", - GetTempDir(), - ((int) geteuid())); - - StartPhase("create_objects"); - cmp = leveldb_comparator_create(NULL, CmpDestroy, CmpCompare, CmpName); - env = leveldb_create_default_env(); - cache = leveldb_cache_create_lru(100000); - - options = leveldb_options_create(); - leveldb_options_set_comparator(options, cmp); - leveldb_options_set_error_if_exists(options, 1); - leveldb_options_set_cache(options, cache); - leveldb_options_set_env(options, env); - leveldb_options_set_info_log(options, NULL); - leveldb_options_set_write_buffer_size(options, 100000); - leveldb_options_set_paranoid_checks(options, 1); - leveldb_options_set_max_open_files(options, 10); - leveldb_options_set_block_size(options, 1024); - leveldb_options_set_block_restart_interval(options, 8); - leveldb_options_set_compression(options, leveldb_no_compression); - - roptions = leveldb_readoptions_create(); - leveldb_readoptions_set_verify_checksums(roptions, 1); - leveldb_readoptions_set_fill_cache(roptions, 0); - - woptions = leveldb_writeoptions_create(); - leveldb_writeoptions_set_sync(woptions, 1); - - StartPhase("destroy"); - leveldb_destroy_db(options, dbname, &err); - Free(&err); - - StartPhase("open_error"); - db = leveldb_open(options, dbname, &err); - CheckCondition(err != NULL); - Free(&err); - - StartPhase("open"); - leveldb_options_set_create_if_missing(options, 1); - db = leveldb_open(options, dbname, &err); - CheckNoError(err); - CheckGet(db, roptions, "foo", NULL); - - StartPhase("put"); - leveldb_put(db, woptions, "foo", 3, "hello", 5, &err); - CheckNoError(err); - CheckGet(db, roptions, "foo", "hello"); - - StartPhase("compactall"); - leveldb_compact_range(db, NULL, 0, NULL, 0); - CheckGet(db, roptions, "foo", "hello"); - - StartPhase("compactrange"); - leveldb_compact_range(db, "a", 1, "z", 1); - CheckGet(db, roptions, "foo", "hello"); - - StartPhase("writebatch"); - { - leveldb_writebatch_t* wb = leveldb_writebatch_create(); - leveldb_writebatch_put(wb, "foo", 3, "a", 1); - leveldb_writebatch_clear(wb); - leveldb_writebatch_put(wb, "bar", 3, "b", 1); - leveldb_writebatch_put(wb, "box", 3, "c", 1); - leveldb_writebatch_delete(wb, "bar", 3); - leveldb_write(db, woptions, wb, &err); - CheckNoError(err); - CheckGet(db, roptions, "foo", "hello"); - CheckGet(db, roptions, "bar", NULL); - CheckGet(db, roptions, "box", "c"); - int pos = 0; - leveldb_writebatch_iterate(wb, &pos, CheckPut, CheckDel); - CheckCondition(pos == 3); - leveldb_writebatch_destroy(wb); - } - - StartPhase("iter"); - { - leveldb_iterator_t* iter = leveldb_create_iterator(db, roptions); - CheckCondition(!leveldb_iter_valid(iter)); - leveldb_iter_seek_to_first(iter); - CheckCondition(leveldb_iter_valid(iter)); - CheckIter(iter, "box", "c"); - leveldb_iter_next(iter); - CheckIter(iter, "foo", "hello"); - leveldb_iter_prev(iter); - CheckIter(iter, "box", "c"); - leveldb_iter_prev(iter); - CheckCondition(!leveldb_iter_valid(iter)); - leveldb_iter_seek_to_last(iter); - CheckIter(iter, "foo", "hello"); - leveldb_iter_seek(iter, "b", 1); - CheckIter(iter, "box", "c"); - leveldb_iter_get_error(iter, &err); - CheckNoError(err); - leveldb_iter_destroy(iter); - } - - StartPhase("approximate_sizes"); - { - int i; - int n = 20000; - char keybuf[100]; - char valbuf[100]; - uint64_t sizes[2]; - const char* start[2] = { "a", "k00000000000000010000" }; - size_t start_len[2] = { 1, 21 }; - const char* limit[2] = { "k00000000000000010000", "z" }; - size_t limit_len[2] = { 21, 1 }; - leveldb_writeoptions_set_sync(woptions, 0); - for (i = 0; i < n; i++) { - snprintf(keybuf, sizeof(keybuf), "k%020d", i); - snprintf(valbuf, sizeof(valbuf), "v%020d", i); - leveldb_put(db, woptions, keybuf, strlen(keybuf), valbuf, strlen(valbuf), - &err); - CheckNoError(err); - } - leveldb_approximate_sizes(db, 2, start, start_len, limit, limit_len, sizes); - CheckCondition(sizes[0] > 0); - CheckCondition(sizes[1] > 0); - } - - StartPhase("property"); - { - char* prop = leveldb_property_value(db, "nosuchprop"); - CheckCondition(prop == NULL); - prop = leveldb_property_value(db, "leveldb.stats"); - CheckCondition(prop != NULL); - Free(&prop); - } - - StartPhase("snapshot"); - { - const leveldb_snapshot_t* snap; - snap = leveldb_create_snapshot(db); - leveldb_delete(db, woptions, "foo", 3, &err); - CheckNoError(err); - leveldb_readoptions_set_snapshot(roptions, snap); - CheckGet(db, roptions, "foo", "hello"); - leveldb_readoptions_set_snapshot(roptions, NULL); - CheckGet(db, roptions, "foo", NULL); - leveldb_release_snapshot(db, snap); - } - - StartPhase("repair"); - { - leveldb_close(db); - leveldb_options_set_create_if_missing(options, 0); - leveldb_options_set_error_if_exists(options, 0); - leveldb_repair_db(options, dbname, &err); - CheckNoError(err); - db = leveldb_open(options, dbname, &err); - CheckNoError(err); - CheckGet(db, roptions, "foo", NULL); - CheckGet(db, roptions, "bar", NULL); - CheckGet(db, roptions, "box", "c"); - leveldb_options_set_create_if_missing(options, 1); - leveldb_options_set_error_if_exists(options, 1); - } - - StartPhase("filter"); - for (run = 0; run < 2; run++) { - // First run uses custom filter, second run uses bloom filter - CheckNoError(err); - leveldb_filterpolicy_t* policy; - if (run == 0) { - policy = leveldb_filterpolicy_create( - NULL, FilterDestroy, FilterCreate, FilterKeyMatch, FilterName); - } else { - policy = leveldb_filterpolicy_create_bloom(10); - } - - // Create new database - leveldb_close(db); - leveldb_destroy_db(options, dbname, &err); - leveldb_options_set_filter_policy(options, policy); - db = leveldb_open(options, dbname, &err); - CheckNoError(err); - leveldb_put(db, woptions, "foo", 3, "foovalue", 8, &err); - CheckNoError(err); - leveldb_put(db, woptions, "bar", 3, "barvalue", 8, &err); - CheckNoError(err); - leveldb_compact_range(db, NULL, 0, NULL, 0); - - fake_filter_result = 1; - CheckGet(db, roptions, "foo", "foovalue"); - CheckGet(db, roptions, "bar", "barvalue"); - if (phase == 0) { - // Must not find value when custom filter returns false - fake_filter_result = 0; - CheckGet(db, roptions, "foo", NULL); - CheckGet(db, roptions, "bar", NULL); - fake_filter_result = 1; - - CheckGet(db, roptions, "foo", "foovalue"); - CheckGet(db, roptions, "bar", "barvalue"); - } - leveldb_options_set_filter_policy(options, NULL); - leveldb_filterpolicy_destroy(policy); - } - - StartPhase("cleanup"); - leveldb_close(db); - leveldb_options_destroy(options); - leveldb_readoptions_destroy(roptions); - leveldb_writeoptions_destroy(woptions); - leveldb_cache_destroy(cache); - leveldb_comparator_destroy(cmp); - leveldb_env_destroy(env); - - fprintf(stderr, "PASS\n"); - return 0; -} diff --git a/leveldb/db/corruption_test.cc b/leveldb/db/corruption_test.cc deleted file mode 100644 index 31b2d5f..0000000 --- a/leveldb/db/corruption_test.cc +++ /dev/null @@ -1,359 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "leveldb/db.h" - -#include -#include -#include -#include -#include "leveldb/cache.h" -#include "leveldb/env.h" -#include "leveldb/table.h" -#include "leveldb/write_batch.h" -#include "db/db_impl.h" -#include "db/filename.h" -#include "db/log_format.h" -#include "db/version_set.h" -#include "util/logging.h" -#include "util/testharness.h" -#include "util/testutil.h" - -namespace leveldb { - -static const int kValueSize = 1000; - -class CorruptionTest { - public: - test::ErrorEnv env_; - std::string dbname_; - Cache* tiny_cache_; - Options options_; - DB* db_; - - CorruptionTest() { - tiny_cache_ = NewLRUCache(100); - options_.env = &env_; - dbname_ = test::TmpDir() + "/db_test"; - DestroyDB(dbname_, options_); - - db_ = NULL; - options_.create_if_missing = true; - Reopen(); - options_.create_if_missing = false; - } - - ~CorruptionTest() { - delete db_; - DestroyDB(dbname_, Options()); - delete tiny_cache_; - } - - Status TryReopen(Options* options = NULL) { - delete db_; - db_ = NULL; - Options opt = (options ? *options : options_); - opt.env = &env_; - opt.block_cache = tiny_cache_; - return DB::Open(opt, dbname_, &db_); - } - - void Reopen(Options* options = NULL) { - ASSERT_OK(TryReopen(options)); - } - - void RepairDB() { - delete db_; - db_ = NULL; - ASSERT_OK(::leveldb::RepairDB(dbname_, options_)); - } - - void Build(int n) { - std::string key_space, value_space; - WriteBatch batch; - for (int i = 0; i < n; i++) { - //if ((i % 100) == 0) fprintf(stderr, "@ %d of %d\n", i, n); - Slice key = Key(i, &key_space); - batch.Clear(); - batch.Put(key, Value(i, &value_space)); - ASSERT_OK(db_->Write(WriteOptions(), &batch)); - } - } - - void Check(int min_expected, int max_expected) { - int next_expected = 0; - int missed = 0; - int bad_keys = 0; - int bad_values = 0; - int correct = 0; - std::string value_space; - Iterator* iter = db_->NewIterator(ReadOptions()); - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - uint64_t key; - Slice in(iter->key()); - if (!ConsumeDecimalNumber(&in, &key) || - !in.empty() || - key < next_expected) { - bad_keys++; - continue; - } - missed += (key - next_expected); - next_expected = key + 1; - if (iter->value() != Value(key, &value_space)) { - bad_values++; - } else { - correct++; - } - } - delete iter; - - fprintf(stderr, - "expected=%d..%d; got=%d; bad_keys=%d; bad_values=%d; missed=%d\n", - min_expected, max_expected, correct, bad_keys, bad_values, missed); - ASSERT_LE(min_expected, correct); - ASSERT_GE(max_expected, correct); - } - - void Corrupt(FileType filetype, int offset, int bytes_to_corrupt) { - // Pick file to corrupt - std::vector filenames; - ASSERT_OK(env_.GetChildren(dbname_, &filenames)); - uint64_t number; - FileType type; - std::string fname; - int picked_number = -1; - for (int i = 0; i < filenames.size(); i++) { - if (ParseFileName(filenames[i], &number, &type) && - type == filetype && - int(number) > picked_number) { // Pick latest file - fname = dbname_ + "/" + filenames[i]; - picked_number = number; - } - } - ASSERT_TRUE(!fname.empty()) << filetype; - - struct stat sbuf; - if (stat(fname.c_str(), &sbuf) != 0) { - const char* msg = strerror(errno); - ASSERT_TRUE(false) << fname << ": " << msg; - } - - if (offset < 0) { - // Relative to end of file; make it absolute - if (-offset > sbuf.st_size) { - offset = 0; - } else { - offset = sbuf.st_size + offset; - } - } - if (offset > sbuf.st_size) { - offset = sbuf.st_size; - } - if (offset + bytes_to_corrupt > sbuf.st_size) { - bytes_to_corrupt = sbuf.st_size - offset; - } - - // Do it - std::string contents; - Status s = ReadFileToString(Env::Default(), fname, &contents); - ASSERT_TRUE(s.ok()) << s.ToString(); - for (int i = 0; i < bytes_to_corrupt; i++) { - contents[i + offset] ^= 0x80; - } - s = WriteStringToFile(Env::Default(), contents, fname); - ASSERT_TRUE(s.ok()) << s.ToString(); - } - - int Property(const std::string& name) { - std::string property; - int result; - if (db_->GetProperty(name, &property) && - sscanf(property.c_str(), "%d", &result) == 1) { - return result; - } else { - return -1; - } - } - - // Return the ith key - Slice Key(int i, std::string* storage) { - char buf[100]; - snprintf(buf, sizeof(buf), "%016d", i); - storage->assign(buf, strlen(buf)); - return Slice(*storage); - } - - // Return the value to associate with the specified key - Slice Value(int k, std::string* storage) { - Random r(k); - return test::RandomString(&r, kValueSize, storage); - } -}; - -TEST(CorruptionTest, Recovery) { - Build(100); - Check(100, 100); - Corrupt(kLogFile, 19, 1); // WriteBatch tag for first record - Corrupt(kLogFile, log::kBlockSize + 1000, 1); // Somewhere in second block - Reopen(); - - // The 64 records in the first two log blocks are completely lost. - Check(36, 36); -} - -TEST(CorruptionTest, RecoverWriteError) { - env_.writable_file_error_ = true; - Status s = TryReopen(); - ASSERT_TRUE(!s.ok()); -} - -TEST(CorruptionTest, NewFileErrorDuringWrite) { - // Do enough writing to force minor compaction - env_.writable_file_error_ = true; - const int num = 3 + (Options().write_buffer_size / kValueSize); - std::string value_storage; - Status s; - for (int i = 0; s.ok() && i < num; i++) { - WriteBatch batch; - batch.Put("a", Value(100, &value_storage)); - s = db_->Write(WriteOptions(), &batch); - } - ASSERT_TRUE(!s.ok()); - ASSERT_GE(env_.num_writable_file_errors_, 1); - env_.writable_file_error_ = false; - Reopen(); -} - -TEST(CorruptionTest, TableFile) { - Build(100); - DBImpl* dbi = reinterpret_cast(db_); - dbi->TEST_CompactMemTable(); - dbi->TEST_CompactRange(0, NULL, NULL); - dbi->TEST_CompactRange(1, NULL, NULL); - - Corrupt(kTableFile, 100, 1); - Check(99, 99); -} - -TEST(CorruptionTest, TableFileIndexData) { - Build(10000); // Enough to build multiple Tables - DBImpl* dbi = reinterpret_cast(db_); - dbi->TEST_CompactMemTable(); - - Corrupt(kTableFile, -2000, 500); - Reopen(); - Check(5000, 9999); -} - -TEST(CorruptionTest, MissingDescriptor) { - Build(1000); - RepairDB(); - Reopen(); - Check(1000, 1000); -} - -TEST(CorruptionTest, SequenceNumberRecovery) { - ASSERT_OK(db_->Put(WriteOptions(), "foo", "v1")); - ASSERT_OK(db_->Put(WriteOptions(), "foo", "v2")); - ASSERT_OK(db_->Put(WriteOptions(), "foo", "v3")); - ASSERT_OK(db_->Put(WriteOptions(), "foo", "v4")); - ASSERT_OK(db_->Put(WriteOptions(), "foo", "v5")); - RepairDB(); - Reopen(); - std::string v; - ASSERT_OK(db_->Get(ReadOptions(), "foo", &v)); - ASSERT_EQ("v5", v); - // Write something. If sequence number was not recovered properly, - // it will be hidden by an earlier write. - ASSERT_OK(db_->Put(WriteOptions(), "foo", "v6")); - ASSERT_OK(db_->Get(ReadOptions(), "foo", &v)); - ASSERT_EQ("v6", v); - Reopen(); - ASSERT_OK(db_->Get(ReadOptions(), "foo", &v)); - ASSERT_EQ("v6", v); -} - -TEST(CorruptionTest, CorruptedDescriptor) { - ASSERT_OK(db_->Put(WriteOptions(), "foo", "hello")); - DBImpl* dbi = reinterpret_cast(db_); - dbi->TEST_CompactMemTable(); - dbi->TEST_CompactRange(0, NULL, NULL); - - Corrupt(kDescriptorFile, 0, 1000); - Status s = TryReopen(); - ASSERT_TRUE(!s.ok()); - - RepairDB(); - Reopen(); - std::string v; - ASSERT_OK(db_->Get(ReadOptions(), "foo", &v)); - ASSERT_EQ("hello", v); -} - -TEST(CorruptionTest, CompactionInputError) { - Build(10); - DBImpl* dbi = reinterpret_cast(db_); - dbi->TEST_CompactMemTable(); - const int last = config::kMaxMemCompactLevel; - ASSERT_EQ(1, Property("leveldb.num-files-at-level" + NumberToString(last))); - - Corrupt(kTableFile, 100, 1); - Check(9, 9); - - // Force compactions by writing lots of values - Build(10000); - Check(10000, 10000); -} - -TEST(CorruptionTest, CompactionInputErrorParanoid) { - Options options; - options.paranoid_checks = true; - options.write_buffer_size = 1048576; - Reopen(&options); - DBImpl* dbi = reinterpret_cast(db_); - - // Fill levels >= 1 so memtable compaction outputs to level 1 - for (int level = 1; level < config::kNumLevels; level++) { - dbi->Put(WriteOptions(), "", "begin"); - dbi->Put(WriteOptions(), "~", "end"); - dbi->TEST_CompactMemTable(); - } - - Build(10); - dbi->TEST_CompactMemTable(); - ASSERT_EQ(1, Property("leveldb.num-files-at-level0")); - - Corrupt(kTableFile, 100, 1); - Check(9, 9); - - // Write must eventually fail because of corrupted table - Status s; - std::string tmp1, tmp2; - for (int i = 0; i < 10000 && s.ok(); i++) { - s = db_->Put(WriteOptions(), Key(i, &tmp1), Value(i, &tmp2)); - } - ASSERT_TRUE(!s.ok()) << "write did not fail in corrupted paranoid db"; -} - -TEST(CorruptionTest, UnrelatedKeys) { - Build(10); - DBImpl* dbi = reinterpret_cast(db_); - dbi->TEST_CompactMemTable(); - Corrupt(kTableFile, 100, 1); - - std::string tmp1, tmp2; - ASSERT_OK(db_->Put(WriteOptions(), Key(1000, &tmp1), Value(1000, &tmp2))); - std::string v; - ASSERT_OK(db_->Get(ReadOptions(), Key(1000, &tmp1), &v)); - ASSERT_EQ(Value(1000, &tmp2).ToString(), v); - dbi->TEST_CompactMemTable(); - ASSERT_OK(db_->Get(ReadOptions(), Key(1000, &tmp1), &v)); - ASSERT_EQ(Value(1000, &tmp2).ToString(), v); -} - -} // namespace leveldb - -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} diff --git a/leveldb/db/db_bench.cc b/leveldb/db/db_bench.cc deleted file mode 100644 index 21d3e25..0000000 --- a/leveldb/db/db_bench.cc +++ /dev/null @@ -1,978 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include -#include -#include -#include "db/db_impl.h" -#include "db/version_set.h" -#include "leveldb/cache.h" -#include "leveldb/db.h" -#include "leveldb/env.h" -#include "leveldb/write_batch.h" -#include "port/port.h" -#include "util/crc32c.h" -#include "util/histogram.h" -#include "util/mutexlock.h" -#include "util/random.h" -#include "util/testutil.h" - -// Comma-separated list of operations to run in the specified order -// Actual benchmarks: -// fillseq -- write N values in sequential key order in async mode -// fillrandom -- write N values in random key order in async mode -// overwrite -- overwrite N values in random key order in async mode -// fillsync -- write N/100 values in random key order in sync mode -// fill100K -- write N/1000 100K values in random order in async mode -// deleteseq -- delete N keys in sequential order -// deleterandom -- delete N keys in random order -// readseq -- read N times sequentially -// readreverse -- read N times in reverse order -// readrandom -- read N times in random order -// readmissing -- read N missing keys in random order -// readhot -- read N times in random order from 1% section of DB -// seekrandom -- N random seeks -// crc32c -- repeated crc32c of 4K of data -// acquireload -- load N*1000 times -// Meta operations: -// compact -- Compact the entire DB -// stats -- Print DB stats -// sstables -- Print sstable info -// heapprofile -- Dump a heap profile (if supported by this port) -static const char* FLAGS_benchmarks = - "fillseq," - "fillsync," - "fillrandom," - "overwrite," - "readrandom," - "readrandom," // Extra run to allow previous compactions to quiesce - "readseq," - "readreverse," - "compact," - "readrandom," - "readseq," - "readreverse," - "fill100K," - "crc32c," - "snappycomp," - "snappyuncomp," - "acquireload," - ; - -// Number of key/values to place in database -static int FLAGS_num = 1000000; - -// Number of read operations to do. If negative, do FLAGS_num reads. -static int FLAGS_reads = -1; - -// Number of concurrent threads to run. -static int FLAGS_threads = 1; - -// Size of each value -static int FLAGS_value_size = 100; - -// Arrange to generate values that shrink to this fraction of -// their original size after compression -static double FLAGS_compression_ratio = 0.5; - -// Print histogram of operation timings -static bool FLAGS_histogram = false; - -// Number of bytes to buffer in memtable before compacting -// (initialized to default value by "main") -static int FLAGS_write_buffer_size = 0; - -// Number of bytes to use as a cache of uncompressed data. -// Negative means use default settings. -static int FLAGS_cache_size = -1; - -// Maximum number of files to keep open at the same time (use default if == 0) -static int FLAGS_open_files = 0; - -// Bloom filter bits per key. -// Negative means use default settings. -static int FLAGS_bloom_bits = -1; - -// If true, do not destroy the existing database. If you set this -// flag and also specify a benchmark that wants a fresh database, that -// benchmark will fail. -static bool FLAGS_use_existing_db = false; - -// Use the db with the following name. -static const char* FLAGS_db = NULL; - -namespace leveldb { - -namespace { - -// Helper for quickly generating random data. -class RandomGenerator { - private: - std::string data_; - int pos_; - - public: - RandomGenerator() { - // We use a limited amount of data over and over again and ensure - // that it is larger than the compression window (32KB), and also - // large enough to serve all typical value sizes we want to write. - Random rnd(301); - std::string piece; - while (data_.size() < 1048576) { - // Add a short fragment that is as compressible as specified - // by FLAGS_compression_ratio. - test::CompressibleString(&rnd, FLAGS_compression_ratio, 100, &piece); - data_.append(piece); - } - pos_ = 0; - } - - Slice Generate(int len) { - if (pos_ + len > data_.size()) { - pos_ = 0; - assert(len < data_.size()); - } - pos_ += len; - return Slice(data_.data() + pos_ - len, len); - } -}; - -static Slice TrimSpace(Slice s) { - int start = 0; - while (start < s.size() && isspace(s[start])) { - start++; - } - int limit = s.size(); - while (limit > start && isspace(s[limit-1])) { - limit--; - } - return Slice(s.data() + start, limit - start); -} - -static void AppendWithSpace(std::string* str, Slice msg) { - if (msg.empty()) return; - if (!str->empty()) { - str->push_back(' '); - } - str->append(msg.data(), msg.size()); -} - -class Stats { - private: - double start_; - double finish_; - double seconds_; - int done_; - int next_report_; - int64_t bytes_; - double last_op_finish_; - Histogram hist_; - std::string message_; - - public: - Stats() { Start(); } - - void Start() { - next_report_ = 100; - last_op_finish_ = start_; - hist_.Clear(); - done_ = 0; - bytes_ = 0; - seconds_ = 0; - start_ = Env::Default()->NowMicros(); - finish_ = start_; - message_.clear(); - } - - void Merge(const Stats& other) { - hist_.Merge(other.hist_); - done_ += other.done_; - bytes_ += other.bytes_; - seconds_ += other.seconds_; - if (other.start_ < start_) start_ = other.start_; - if (other.finish_ > finish_) finish_ = other.finish_; - - // Just keep the messages from one thread - if (message_.empty()) message_ = other.message_; - } - - void Stop() { - finish_ = Env::Default()->NowMicros(); - seconds_ = (finish_ - start_) * 1e-6; - } - - void AddMessage(Slice msg) { - AppendWithSpace(&message_, msg); - } - - void FinishedSingleOp() { - if (FLAGS_histogram) { - double now = Env::Default()->NowMicros(); - double micros = now - last_op_finish_; - hist_.Add(micros); - if (micros > 20000) { - fprintf(stderr, "long op: %.1f micros%30s\r", micros, ""); - fflush(stderr); - } - last_op_finish_ = now; - } - - done_++; - if (done_ >= next_report_) { - if (next_report_ < 1000) next_report_ += 100; - else if (next_report_ < 5000) next_report_ += 500; - else if (next_report_ < 10000) next_report_ += 1000; - else if (next_report_ < 50000) next_report_ += 5000; - else if (next_report_ < 100000) next_report_ += 10000; - else if (next_report_ < 500000) next_report_ += 50000; - else next_report_ += 100000; - fprintf(stderr, "... finished %d ops%30s\r", done_, ""); - fflush(stderr); - } - } - - void AddBytes(int64_t n) { - bytes_ += n; - } - - void Report(const Slice& name) { - // Pretend at least one op was done in case we are running a benchmark - // that does not call FinishedSingleOp(). - if (done_ < 1) done_ = 1; - - std::string extra; - if (bytes_ > 0) { - // Rate is computed on actual elapsed time, not the sum of per-thread - // elapsed times. - double elapsed = (finish_ - start_) * 1e-6; - char rate[100]; - snprintf(rate, sizeof(rate), "%6.1f MB/s", - (bytes_ / 1048576.0) / elapsed); - extra = rate; - } - AppendWithSpace(&extra, message_); - - fprintf(stdout, "%-12s : %11.3f micros/op;%s%s\n", - name.ToString().c_str(), - seconds_ * 1e6 / done_, - (extra.empty() ? "" : " "), - extra.c_str()); - if (FLAGS_histogram) { - fprintf(stdout, "Microseconds per op:\n%s\n", hist_.ToString().c_str()); - } - fflush(stdout); - } -}; - -// State shared by all concurrent executions of the same benchmark. -struct SharedState { - port::Mutex mu; - port::CondVar cv; - int total; - - // Each thread goes through the following states: - // (1) initializing - // (2) waiting for others to be initialized - // (3) running - // (4) done - - int num_initialized; - int num_done; - bool start; - - SharedState() : cv(&mu) { } -}; - -// Per-thread state for concurrent executions of the same benchmark. -struct ThreadState { - int tid; // 0..n-1 when running in n threads - Random rand; // Has different seeds for different threads - Stats stats; - SharedState* shared; - - ThreadState(int index) - : tid(index), - rand(1000 + index) { - } -}; - -} // namespace - -class Benchmark { - private: - Cache* cache_; - const FilterPolicy* filter_policy_; - DB* db_; - int num_; - int value_size_; - int entries_per_batch_; - WriteOptions write_options_; - int reads_; - int heap_counter_; - - void PrintHeader() { - const int kKeySize = 16; - PrintEnvironment(); - fprintf(stdout, "Keys: %d bytes each\n", kKeySize); - fprintf(stdout, "Values: %d bytes each (%d bytes after compression)\n", - FLAGS_value_size, - static_cast(FLAGS_value_size * FLAGS_compression_ratio + 0.5)); - fprintf(stdout, "Entries: %d\n", num_); - fprintf(stdout, "RawSize: %.1f MB (estimated)\n", - ((static_cast(kKeySize + FLAGS_value_size) * num_) - / 1048576.0)); - fprintf(stdout, "FileSize: %.1f MB (estimated)\n", - (((kKeySize + FLAGS_value_size * FLAGS_compression_ratio) * num_) - / 1048576.0)); - PrintWarnings(); - fprintf(stdout, "------------------------------------------------\n"); - } - - void PrintWarnings() { -#if defined(__GNUC__) && !defined(__OPTIMIZE__) - fprintf(stdout, - "WARNING: Optimization is disabled: benchmarks unnecessarily slow\n" - ); -#endif -#ifndef NDEBUG - fprintf(stdout, - "WARNING: Assertions are enabled; benchmarks unnecessarily slow\n"); -#endif - - // See if snappy is working by attempting to compress a compressible string - const char text[] = "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy"; - std::string compressed; - if (!port::Snappy_Compress(text, sizeof(text), &compressed)) { - fprintf(stdout, "WARNING: Snappy compression is not enabled\n"); - } else if (compressed.size() >= sizeof(text)) { - fprintf(stdout, "WARNING: Snappy compression is not effective\n"); - } - } - - void PrintEnvironment() { - fprintf(stderr, "LevelDB: version %d.%d\n", - kMajorVersion, kMinorVersion); - -#if defined(__linux) - time_t now = time(NULL); - fprintf(stderr, "Date: %s", ctime(&now)); // ctime() adds newline - - FILE* cpuinfo = fopen("/proc/cpuinfo", "r"); - if (cpuinfo != NULL) { - char line[1000]; - int num_cpus = 0; - std::string cpu_type; - std::string cache_size; - while (fgets(line, sizeof(line), cpuinfo) != NULL) { - const char* sep = strchr(line, ':'); - if (sep == NULL) { - continue; - } - Slice key = TrimSpace(Slice(line, sep - 1 - line)); - Slice val = TrimSpace(Slice(sep + 1)); - if (key == "model name") { - ++num_cpus; - cpu_type = val.ToString(); - } else if (key == "cache size") { - cache_size = val.ToString(); - } - } - fclose(cpuinfo); - fprintf(stderr, "CPU: %d * %s\n", num_cpus, cpu_type.c_str()); - fprintf(stderr, "CPUCache: %s\n", cache_size.c_str()); - } -#endif - } - - public: - Benchmark() - : cache_(FLAGS_cache_size >= 0 ? NewLRUCache(FLAGS_cache_size) : NULL), - filter_policy_(FLAGS_bloom_bits >= 0 - ? NewBloomFilterPolicy(FLAGS_bloom_bits) - : NULL), - db_(NULL), - num_(FLAGS_num), - value_size_(FLAGS_value_size), - entries_per_batch_(1), - reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads), - heap_counter_(0) { - std::vector files; - Env::Default()->GetChildren(FLAGS_db, &files); - for (int i = 0; i < files.size(); i++) { - if (Slice(files[i]).starts_with("heap-")) { - Env::Default()->DeleteFile(std::string(FLAGS_db) + "/" + files[i]); - } - } - if (!FLAGS_use_existing_db) { - DestroyDB(FLAGS_db, Options()); - } - } - - ~Benchmark() { - delete db_; - delete cache_; - delete filter_policy_; - } - - void Run() { - PrintHeader(); - Open(); - - const char* benchmarks = FLAGS_benchmarks; - while (benchmarks != NULL) { - const char* sep = strchr(benchmarks, ','); - Slice name; - if (sep == NULL) { - name = benchmarks; - benchmarks = NULL; - } else { - name = Slice(benchmarks, sep - benchmarks); - benchmarks = sep + 1; - } - - // Reset parameters that may be overriddden bwlow - num_ = FLAGS_num; - reads_ = (FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads); - value_size_ = FLAGS_value_size; - entries_per_batch_ = 1; - write_options_ = WriteOptions(); - - void (Benchmark::*method)(ThreadState*) = NULL; - bool fresh_db = false; - int num_threads = FLAGS_threads; - - if (name == Slice("fillseq")) { - fresh_db = true; - method = &Benchmark::WriteSeq; - } else if (name == Slice("fillbatch")) { - fresh_db = true; - entries_per_batch_ = 1000; - method = &Benchmark::WriteSeq; - } else if (name == Slice("fillrandom")) { - fresh_db = true; - method = &Benchmark::WriteRandom; - } else if (name == Slice("overwrite")) { - fresh_db = false; - method = &Benchmark::WriteRandom; - } else if (name == Slice("fillsync")) { - fresh_db = true; - num_ /= 1000; - write_options_.sync = true; - method = &Benchmark::WriteRandom; - } else if (name == Slice("fill100K")) { - fresh_db = true; - num_ /= 1000; - value_size_ = 100 * 1000; - method = &Benchmark::WriteRandom; - } else if (name == Slice("readseq")) { - method = &Benchmark::ReadSequential; - } else if (name == Slice("readreverse")) { - method = &Benchmark::ReadReverse; - } else if (name == Slice("readrandom")) { - method = &Benchmark::ReadRandom; - } else if (name == Slice("readmissing")) { - method = &Benchmark::ReadMissing; - } else if (name == Slice("seekrandom")) { - method = &Benchmark::SeekRandom; - } else if (name == Slice("readhot")) { - method = &Benchmark::ReadHot; - } else if (name == Slice("readrandomsmall")) { - reads_ /= 1000; - method = &Benchmark::ReadRandom; - } else if (name == Slice("deleteseq")) { - method = &Benchmark::DeleteSeq; - } else if (name == Slice("deleterandom")) { - method = &Benchmark::DeleteRandom; - } else if (name == Slice("readwhilewriting")) { - num_threads++; // Add extra thread for writing - method = &Benchmark::ReadWhileWriting; - } else if (name == Slice("compact")) { - method = &Benchmark::Compact; - } else if (name == Slice("crc32c")) { - method = &Benchmark::Crc32c; - } else if (name == Slice("acquireload")) { - method = &Benchmark::AcquireLoad; - } else if (name == Slice("snappycomp")) { - method = &Benchmark::SnappyCompress; - } else if (name == Slice("snappyuncomp")) { - method = &Benchmark::SnappyUncompress; - } else if (name == Slice("heapprofile")) { - HeapProfile(); - } else if (name == Slice("stats")) { - PrintStats("leveldb.stats"); - } else if (name == Slice("sstables")) { - PrintStats("leveldb.sstables"); - } else { - if (name != Slice()) { // No error message for empty name - fprintf(stderr, "unknown benchmark '%s'\n", name.ToString().c_str()); - } - } - - if (fresh_db) { - if (FLAGS_use_existing_db) { - fprintf(stdout, "%-12s : skipped (--use_existing_db is true)\n", - name.ToString().c_str()); - method = NULL; - } else { - delete db_; - db_ = NULL; - DestroyDB(FLAGS_db, Options()); - Open(); - } - } - - if (method != NULL) { - RunBenchmark(num_threads, name, method); - } - } - } - - private: - struct ThreadArg { - Benchmark* bm; - SharedState* shared; - ThreadState* thread; - void (Benchmark::*method)(ThreadState*); - }; - - static void ThreadBody(void* v) { - ThreadArg* arg = reinterpret_cast(v); - SharedState* shared = arg->shared; - ThreadState* thread = arg->thread; - { - MutexLock l(&shared->mu); - shared->num_initialized++; - if (shared->num_initialized >= shared->total) { - shared->cv.SignalAll(); - } - while (!shared->start) { - shared->cv.Wait(); - } - } - - thread->stats.Start(); - (arg->bm->*(arg->method))(thread); - thread->stats.Stop(); - - { - MutexLock l(&shared->mu); - shared->num_done++; - if (shared->num_done >= shared->total) { - shared->cv.SignalAll(); - } - } - } - - void RunBenchmark(int n, Slice name, - void (Benchmark::*method)(ThreadState*)) { - SharedState shared; - shared.total = n; - shared.num_initialized = 0; - shared.num_done = 0; - shared.start = false; - - ThreadArg* arg = new ThreadArg[n]; - for (int i = 0; i < n; i++) { - arg[i].bm = this; - arg[i].method = method; - arg[i].shared = &shared; - arg[i].thread = new ThreadState(i); - arg[i].thread->shared = &shared; - Env::Default()->StartThread(ThreadBody, &arg[i]); - } - - shared.mu.Lock(); - while (shared.num_initialized < n) { - shared.cv.Wait(); - } - - shared.start = true; - shared.cv.SignalAll(); - while (shared.num_done < n) { - shared.cv.Wait(); - } - shared.mu.Unlock(); - - for (int i = 1; i < n; i++) { - arg[0].thread->stats.Merge(arg[i].thread->stats); - } - arg[0].thread->stats.Report(name); - - for (int i = 0; i < n; i++) { - delete arg[i].thread; - } - delete[] arg; - } - - void Crc32c(ThreadState* thread) { - // Checksum about 500MB of data total - const int size = 4096; - const char* label = "(4K per op)"; - std::string data(size, 'x'); - int64_t bytes = 0; - uint32_t crc = 0; - while (bytes < 500 * 1048576) { - crc = crc32c::Value(data.data(), size); - thread->stats.FinishedSingleOp(); - bytes += size; - } - // Print so result is not dead - fprintf(stderr, "... crc=0x%x\r", static_cast(crc)); - - thread->stats.AddBytes(bytes); - thread->stats.AddMessage(label); - } - - void AcquireLoad(ThreadState* thread) { - int dummy; - port::AtomicPointer ap(&dummy); - int count = 0; - void *ptr = NULL; - thread->stats.AddMessage("(each op is 1000 loads)"); - while (count < 100000) { - for (int i = 0; i < 1000; i++) { - ptr = ap.Acquire_Load(); - } - count++; - thread->stats.FinishedSingleOp(); - } - if (ptr == NULL) exit(1); // Disable unused variable warning. - } - - void SnappyCompress(ThreadState* thread) { - RandomGenerator gen; - Slice input = gen.Generate(Options().block_size); - int64_t bytes = 0; - int64_t produced = 0; - bool ok = true; - std::string compressed; - while (ok && bytes < 1024 * 1048576) { // Compress 1G - ok = port::Snappy_Compress(input.data(), input.size(), &compressed); - produced += compressed.size(); - bytes += input.size(); - thread->stats.FinishedSingleOp(); - } - - if (!ok) { - thread->stats.AddMessage("(snappy failure)"); - } else { - char buf[100]; - snprintf(buf, sizeof(buf), "(output: %.1f%%)", - (produced * 100.0) / bytes); - thread->stats.AddMessage(buf); - thread->stats.AddBytes(bytes); - } - } - - void SnappyUncompress(ThreadState* thread) { - RandomGenerator gen; - Slice input = gen.Generate(Options().block_size); - std::string compressed; - bool ok = port::Snappy_Compress(input.data(), input.size(), &compressed); - int64_t bytes = 0; - char* uncompressed = new char[input.size()]; - while (ok && bytes < 1024 * 1048576) { // Compress 1G - ok = port::Snappy_Uncompress(compressed.data(), compressed.size(), - uncompressed); - bytes += input.size(); - thread->stats.FinishedSingleOp(); - } - delete[] uncompressed; - - if (!ok) { - thread->stats.AddMessage("(snappy failure)"); - } else { - thread->stats.AddBytes(bytes); - } - } - - void Open() { - assert(db_ == NULL); - Options options; - options.create_if_missing = !FLAGS_use_existing_db; - options.block_cache = cache_; - options.write_buffer_size = FLAGS_write_buffer_size; - options.filter_policy = filter_policy_; - Status s = DB::Open(options, FLAGS_db, &db_); - if (!s.ok()) { - fprintf(stderr, "open error: %s\n", s.ToString().c_str()); - exit(1); - } - } - - void WriteSeq(ThreadState* thread) { - DoWrite(thread, true); - } - - void WriteRandom(ThreadState* thread) { - DoWrite(thread, false); - } - - void DoWrite(ThreadState* thread, bool seq) { - if (num_ != FLAGS_num) { - char msg[100]; - snprintf(msg, sizeof(msg), "(%d ops)", num_); - thread->stats.AddMessage(msg); - } - - RandomGenerator gen; - WriteBatch batch; - Status s; - int64_t bytes = 0; - for (int i = 0; i < num_; i += entries_per_batch_) { - batch.Clear(); - for (int j = 0; j < entries_per_batch_; j++) { - const int k = seq ? i+j : (thread->rand.Next() % FLAGS_num); - char key[100]; - snprintf(key, sizeof(key), "%016d", k); - batch.Put(key, gen.Generate(value_size_)); - bytes += value_size_ + strlen(key); - thread->stats.FinishedSingleOp(); - } - s = db_->Write(write_options_, &batch); - if (!s.ok()) { - fprintf(stderr, "put error: %s\n", s.ToString().c_str()); - exit(1); - } - } - thread->stats.AddBytes(bytes); - } - - void ReadSequential(ThreadState* thread) { - Iterator* iter = db_->NewIterator(ReadOptions()); - int i = 0; - int64_t bytes = 0; - for (iter->SeekToFirst(); i < reads_ && iter->Valid(); iter->Next()) { - bytes += iter->key().size() + iter->value().size(); - thread->stats.FinishedSingleOp(); - ++i; - } - delete iter; - thread->stats.AddBytes(bytes); - } - - void ReadReverse(ThreadState* thread) { - Iterator* iter = db_->NewIterator(ReadOptions()); - int i = 0; - int64_t bytes = 0; - for (iter->SeekToLast(); i < reads_ && iter->Valid(); iter->Prev()) { - bytes += iter->key().size() + iter->value().size(); - thread->stats.FinishedSingleOp(); - ++i; - } - delete iter; - thread->stats.AddBytes(bytes); - } - - void ReadRandom(ThreadState* thread) { - ReadOptions options; - std::string value; - int found = 0; - for (int i = 0; i < reads_; i++) { - char key[100]; - const int k = thread->rand.Next() % FLAGS_num; - snprintf(key, sizeof(key), "%016d", k); - if (db_->Get(options, key, &value).ok()) { - found++; - } - thread->stats.FinishedSingleOp(); - } - char msg[100]; - snprintf(msg, sizeof(msg), "(%d of %d found)", found, num_); - thread->stats.AddMessage(msg); - } - - void ReadMissing(ThreadState* thread) { - ReadOptions options; - std::string value; - for (int i = 0; i < reads_; i++) { - char key[100]; - const int k = thread->rand.Next() % FLAGS_num; - snprintf(key, sizeof(key), "%016d.", k); - db_->Get(options, key, &value); - thread->stats.FinishedSingleOp(); - } - } - - void ReadHot(ThreadState* thread) { - ReadOptions options; - std::string value; - const int range = (FLAGS_num + 99) / 100; - for (int i = 0; i < reads_; i++) { - char key[100]; - const int k = thread->rand.Next() % range; - snprintf(key, sizeof(key), "%016d", k); - db_->Get(options, key, &value); - thread->stats.FinishedSingleOp(); - } - } - - void SeekRandom(ThreadState* thread) { - ReadOptions options; - std::string value; - int found = 0; - for (int i = 0; i < reads_; i++) { - Iterator* iter = db_->NewIterator(options); - char key[100]; - const int k = thread->rand.Next() % FLAGS_num; - snprintf(key, sizeof(key), "%016d", k); - iter->Seek(key); - if (iter->Valid() && iter->key() == key) found++; - delete iter; - thread->stats.FinishedSingleOp(); - } - char msg[100]; - snprintf(msg, sizeof(msg), "(%d of %d found)", found, num_); - thread->stats.AddMessage(msg); - } - - void DoDelete(ThreadState* thread, bool seq) { - RandomGenerator gen; - WriteBatch batch; - Status s; - for (int i = 0; i < num_; i += entries_per_batch_) { - batch.Clear(); - for (int j = 0; j < entries_per_batch_; j++) { - const int k = seq ? i+j : (thread->rand.Next() % FLAGS_num); - char key[100]; - snprintf(key, sizeof(key), "%016d", k); - batch.Delete(key); - thread->stats.FinishedSingleOp(); - } - s = db_->Write(write_options_, &batch); - if (!s.ok()) { - fprintf(stderr, "del error: %s\n", s.ToString().c_str()); - exit(1); - } - } - } - - void DeleteSeq(ThreadState* thread) { - DoDelete(thread, true); - } - - void DeleteRandom(ThreadState* thread) { - DoDelete(thread, false); - } - - void ReadWhileWriting(ThreadState* thread) { - if (thread->tid > 0) { - ReadRandom(thread); - } else { - // Special thread that keeps writing until other threads are done. - RandomGenerator gen; - while (true) { - { - MutexLock l(&thread->shared->mu); - if (thread->shared->num_done + 1 >= thread->shared->num_initialized) { - // Other threads have finished - break; - } - } - - const int k = thread->rand.Next() % FLAGS_num; - char key[100]; - snprintf(key, sizeof(key), "%016d", k); - Status s = db_->Put(write_options_, key, gen.Generate(value_size_)); - if (!s.ok()) { - fprintf(stderr, "put error: %s\n", s.ToString().c_str()); - exit(1); - } - } - - // Do not count any of the preceding work/delay in stats. - thread->stats.Start(); - } - } - - void Compact(ThreadState* thread) { - db_->CompactRange(NULL, NULL); - } - - void PrintStats(const char* key) { - std::string stats; - if (!db_->GetProperty(key, &stats)) { - stats = "(failed)"; - } - fprintf(stdout, "\n%s\n", stats.c_str()); - } - - static void WriteToFile(void* arg, const char* buf, int n) { - reinterpret_cast(arg)->Append(Slice(buf, n)); - } - - void HeapProfile() { - char fname[100]; - snprintf(fname, sizeof(fname), "%s/heap-%04d", FLAGS_db, ++heap_counter_); - WritableFile* file; - Status s = Env::Default()->NewWritableFile(fname, &file); - if (!s.ok()) { - fprintf(stderr, "%s\n", s.ToString().c_str()); - return; - } - bool ok = port::GetHeapProfile(WriteToFile, file); - delete file; - if (!ok) { - fprintf(stderr, "heap profiling not supported\n"); - Env::Default()->DeleteFile(fname); - } - } -}; - -} // namespace leveldb - -int main(int argc, char** argv) { - FLAGS_write_buffer_size = leveldb::Options().write_buffer_size; - FLAGS_open_files = leveldb::Options().max_open_files; - std::string default_db_path; - - for (int i = 1; i < argc; i++) { - double d; - int n; - char junk; - if (leveldb::Slice(argv[i]).starts_with("--benchmarks=")) { - FLAGS_benchmarks = argv[i] + strlen("--benchmarks="); - } else if (sscanf(argv[i], "--compression_ratio=%lf%c", &d, &junk) == 1) { - FLAGS_compression_ratio = d; - } else if (sscanf(argv[i], "--histogram=%d%c", &n, &junk) == 1 && - (n == 0 || n == 1)) { - FLAGS_histogram = n; - } else if (sscanf(argv[i], "--use_existing_db=%d%c", &n, &junk) == 1 && - (n == 0 || n == 1)) { - FLAGS_use_existing_db = n; - } else if (sscanf(argv[i], "--num=%d%c", &n, &junk) == 1) { - FLAGS_num = n; - } else if (sscanf(argv[i], "--reads=%d%c", &n, &junk) == 1) { - FLAGS_reads = n; - } else if (sscanf(argv[i], "--threads=%d%c", &n, &junk) == 1) { - FLAGS_threads = n; - } else if (sscanf(argv[i], "--value_size=%d%c", &n, &junk) == 1) { - FLAGS_value_size = n; - } else if (sscanf(argv[i], "--write_buffer_size=%d%c", &n, &junk) == 1) { - FLAGS_write_buffer_size = n; - } else if (sscanf(argv[i], "--cache_size=%d%c", &n, &junk) == 1) { - FLAGS_cache_size = n; - } else if (sscanf(argv[i], "--bloom_bits=%d%c", &n, &junk) == 1) { - FLAGS_bloom_bits = n; - } else if (sscanf(argv[i], "--open_files=%d%c", &n, &junk) == 1) { - FLAGS_open_files = n; - } else if (strncmp(argv[i], "--db=", 5) == 0) { - FLAGS_db = argv[i] + 5; - } else { - fprintf(stderr, "Invalid flag '%s'\n", argv[i]); - exit(1); - } - } - - // Choose a location for the test database if none given with --db= - if (FLAGS_db == NULL) { - leveldb::Env::Default()->GetTestDirectory(&default_db_path); - default_db_path += "/dbbench"; - FLAGS_db = default_db_path.c_str(); - } - - leveldb::Benchmark benchmark; - benchmark.Run(); - return 0; -} diff --git a/leveldb/db/db_impl.cc b/leveldb/db/db_impl.cc deleted file mode 100644 index 90c1c81..0000000 --- a/leveldb/db/db_impl.cc +++ /dev/null @@ -1,1463 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/db_impl.h" - -#include -#include -#include -#include -#include -#include -#include "db/builder.h" -#include "db/db_iter.h" -#include "db/dbformat.h" -#include "db/filename.h" -#include "db/log_reader.h" -#include "db/log_writer.h" -#include "db/memtable.h" -#include "db/table_cache.h" -#include "db/version_set.h" -#include "db/write_batch_internal.h" -#include "leveldb/db.h" -#include "leveldb/env.h" -#include "leveldb/status.h" -#include "leveldb/table.h" -#include "leveldb/table_builder.h" -#include "port/port.h" -#include "table/block.h" -#include "table/merger.h" -#include "table/two_level_iterator.h" -#include "util/coding.h" -#include "util/logging.h" -#include "util/mutexlock.h" - -namespace leveldb { - -// Information kept for every waiting writer -struct DBImpl::Writer { - Status status; - WriteBatch* batch; - bool sync; - bool done; - port::CondVar cv; - - explicit Writer(port::Mutex* mu) : cv(mu) { } -}; - -struct DBImpl::CompactionState { - Compaction* const compaction; - - // Sequence numbers < smallest_snapshot are not significant since we - // will never have to service a snapshot below smallest_snapshot. - // Therefore if we have seen a sequence number S <= smallest_snapshot, - // we can drop all entries for the same key with sequence numbers < S. - SequenceNumber smallest_snapshot; - - // Files produced by compaction - struct Output { - uint64_t number; - uint64_t file_size; - InternalKey smallest, largest; - }; - std::vector outputs; - - // State kept for output being generated - WritableFile* outfile; - TableBuilder* builder; - - uint64_t total_bytes; - - Output* current_output() { return &outputs[outputs.size()-1]; } - - explicit CompactionState(Compaction* c) - : compaction(c), - outfile(NULL), - builder(NULL), - total_bytes(0) { - } -}; - -// Fix user-supplied options to be reasonable -template -static void ClipToRange(T* ptr, V minvalue, V maxvalue) { - if (static_cast(*ptr) > maxvalue) *ptr = maxvalue; - if (static_cast(*ptr) < minvalue) *ptr = minvalue; -} -Options SanitizeOptions(const std::string& dbname, - const InternalKeyComparator* icmp, - const InternalFilterPolicy* ipolicy, - const Options& src) { - Options result = src; - result.comparator = icmp; - result.filter_policy = (src.filter_policy != NULL) ? ipolicy : NULL; - ClipToRange(&result.max_open_files, 20, 50000); - ClipToRange(&result.write_buffer_size, 64<<10, 1<<30); - ClipToRange(&result.block_size, 1<<10, 4<<20); - if (result.info_log == NULL) { - // Open a log file in the same directory as the db - src.env->CreateDir(dbname); // In case it does not exist - src.env->RenameFile(InfoLogFileName(dbname), OldInfoLogFileName(dbname)); - Status s = src.env->NewLogger(InfoLogFileName(dbname), &result.info_log); - if (!s.ok()) { - // No place suitable for logging - result.info_log = NULL; - } - } - if (result.block_cache == NULL) { - result.block_cache = NewLRUCache(8 << 20); - } - return result; -} - -DBImpl::DBImpl(const Options& options, const std::string& dbname) - : env_(options.env), - internal_comparator_(options.comparator), - internal_filter_policy_(options.filter_policy), - options_(SanitizeOptions( - dbname, &internal_comparator_, &internal_filter_policy_, options)), - owns_info_log_(options_.info_log != options.info_log), - owns_cache_(options_.block_cache != options.block_cache), - dbname_(dbname), - db_lock_(NULL), - shutting_down_(NULL), - bg_cv_(&mutex_), - mem_(new MemTable(internal_comparator_)), - imm_(NULL), - logfile_(NULL), - logfile_number_(0), - log_(NULL), - tmp_batch_(new WriteBatch), - bg_compaction_scheduled_(false), - manual_compaction_(NULL) { - mem_->Ref(); - has_imm_.Release_Store(NULL); - - // Reserve ten files or so for other uses and give the rest to TableCache. - const int table_cache_size = options.max_open_files - 10; - table_cache_ = new TableCache(dbname_, &options_, table_cache_size); - - versions_ = new VersionSet(dbname_, &options_, table_cache_, - &internal_comparator_); -} - -DBImpl::~DBImpl() { - // Wait for background work to finish - mutex_.Lock(); - shutting_down_.Release_Store(this); // Any non-NULL value is ok - while (bg_compaction_scheduled_) { - bg_cv_.Wait(); - } - mutex_.Unlock(); - - if (db_lock_ != NULL) { - env_->UnlockFile(db_lock_); - } - - delete versions_; - if (mem_ != NULL) mem_->Unref(); - if (imm_ != NULL) imm_->Unref(); - delete tmp_batch_; - delete log_; - delete logfile_; - delete table_cache_; - - if (owns_info_log_) { - delete options_.info_log; - } - if (owns_cache_) { - delete options_.block_cache; - } -} - -Status DBImpl::NewDB() { - VersionEdit new_db; - new_db.SetComparatorName(user_comparator()->Name()); - new_db.SetLogNumber(0); - new_db.SetNextFile(2); - new_db.SetLastSequence(0); - - const std::string manifest = DescriptorFileName(dbname_, 1); - WritableFile* file; - Status s = env_->NewWritableFile(manifest, &file); - if (!s.ok()) { - return s; - } - { - log::Writer log(file); - std::string record; - new_db.EncodeTo(&record); - s = log.AddRecord(record); - if (s.ok()) { - s = file->Close(); - } - } - delete file; - if (s.ok()) { - // Make "CURRENT" file that points to the new manifest file. - s = SetCurrentFile(env_, dbname_, 1); - } else { - env_->DeleteFile(manifest); - } - return s; -} - -void DBImpl::MaybeIgnoreError(Status* s) const { - if (s->ok() || options_.paranoid_checks) { - // No change needed - } else { - Log(options_.info_log, "Ignoring error %s", s->ToString().c_str()); - *s = Status::OK(); - } -} - -void DBImpl::DeleteObsoleteFiles() { - // Make a set of all of the live files - std::set live = pending_outputs_; - versions_->AddLiveFiles(&live); - - std::vector filenames; - env_->GetChildren(dbname_, &filenames); // Ignoring errors on purpose - uint64_t number; - FileType type; - for (size_t i = 0; i < filenames.size(); i++) { - if (ParseFileName(filenames[i], &number, &type)) { - bool keep = true; - switch (type) { - case kLogFile: - keep = ((number >= versions_->LogNumber()) || - (number == versions_->PrevLogNumber())); - break; - case kDescriptorFile: - // Keep my manifest file, and any newer incarnations' - // (in case there is a race that allows other incarnations) - keep = (number >= versions_->ManifestFileNumber()); - break; - case kTableFile: - keep = (live.find(number) != live.end()); - break; - case kTempFile: - // Any temp files that are currently being written to must - // be recorded in pending_outputs_, which is inserted into "live" - keep = (live.find(number) != live.end()); - break; - case kCurrentFile: - case kDBLockFile: - case kInfoLogFile: - keep = true; - break; - } - - if (!keep) { - if (type == kTableFile) { - table_cache_->Evict(number); - } - Log(options_.info_log, "Delete type=%d #%lld\n", - int(type), - static_cast(number)); - env_->DeleteFile(dbname_ + "/" + filenames[i]); - } - } - } -} - -Status DBImpl::Recover(VersionEdit* edit) { - mutex_.AssertHeld(); - - // Ignore error from CreateDir since the creation of the DB is - // committed only when the descriptor is created, and this directory - // may already exist from a previous failed creation attempt. - env_->CreateDir(dbname_); - assert(db_lock_ == NULL); - Status s = env_->LockFile(LockFileName(dbname_), &db_lock_); - if (!s.ok()) { - return s; - } - - if (!env_->FileExists(CurrentFileName(dbname_))) { - if (options_.create_if_missing) { - s = NewDB(); - if (!s.ok()) { - return s; - } - } else { - return Status::InvalidArgument( - dbname_, "does not exist (create_if_missing is false)"); - } - } else { - if (options_.error_if_exists) { - return Status::InvalidArgument( - dbname_, "exists (error_if_exists is true)"); - } - } - - s = versions_->Recover(); - if (s.ok()) { - SequenceNumber max_sequence(0); - - // Recover from all newer log files than the ones named in the - // descriptor (new log files may have been added by the previous - // incarnation without registering them in the descriptor). - // - // Note that PrevLogNumber() is no longer used, but we pay - // attention to it in case we are recovering a database - // produced by an older version of leveldb. - const uint64_t min_log = versions_->LogNumber(); - const uint64_t prev_log = versions_->PrevLogNumber(); - std::vector filenames; - s = env_->GetChildren(dbname_, &filenames); - if (!s.ok()) { - return s; - } - uint64_t number; - FileType type; - std::vector logs; - for (size_t i = 0; i < filenames.size(); i++) { - if (ParseFileName(filenames[i], &number, &type) - && type == kLogFile - && ((number >= min_log) || (number == prev_log))) { - logs.push_back(number); - } - } - - // Recover in the order in which the logs were generated - std::sort(logs.begin(), logs.end()); - for (size_t i = 0; i < logs.size(); i++) { - s = RecoverLogFile(logs[i], edit, &max_sequence); - - // The previous incarnation may not have written any MANIFEST - // records after allocating this log number. So we manually - // update the file number allocation counter in VersionSet. - versions_->MarkFileNumberUsed(logs[i]); - } - - if (s.ok()) { - if (versions_->LastSequence() < max_sequence) { - versions_->SetLastSequence(max_sequence); - } - } - } - - return s; -} - -Status DBImpl::RecoverLogFile(uint64_t log_number, - VersionEdit* edit, - SequenceNumber* max_sequence) { - struct LogReporter : public log::Reader::Reporter { - Env* env; - Logger* info_log; - const char* fname; - Status* status; // NULL if options_.paranoid_checks==false - virtual void Corruption(size_t bytes, const Status& s) { - Log(info_log, "%s%s: dropping %d bytes; %s", - (this->status == NULL ? "(ignoring error) " : ""), - fname, static_cast(bytes), s.ToString().c_str()); - if (this->status != NULL && this->status->ok()) *this->status = s; - } - }; - - mutex_.AssertHeld(); - - // Open the log file - std::string fname = LogFileName(dbname_, log_number); - SequentialFile* file; - Status status = env_->NewSequentialFile(fname, &file); - if (!status.ok()) { - MaybeIgnoreError(&status); - return status; - } - - // Create the log reader. - LogReporter reporter; - reporter.env = env_; - reporter.info_log = options_.info_log; - reporter.fname = fname.c_str(); - reporter.status = (options_.paranoid_checks ? &status : NULL); - // We intentially make log::Reader do checksumming even if - // paranoid_checks==false so that corruptions cause entire commits - // to be skipped instead of propagating bad information (like overly - // large sequence numbers). - log::Reader reader(file, &reporter, true/*checksum*/, - 0/*initial_offset*/); - Log(options_.info_log, "Recovering log #%llu", - (unsigned long long) log_number); - - // Read all the records and add to a memtable - std::string scratch; - Slice record; - WriteBatch batch; - MemTable* mem = NULL; - while (reader.ReadRecord(&record, &scratch) && - status.ok()) { - if (record.size() < 12) { - reporter.Corruption( - record.size(), Status::Corruption("log record too small")); - continue; - } - WriteBatchInternal::SetContents(&batch, record); - - if (mem == NULL) { - mem = new MemTable(internal_comparator_); - mem->Ref(); - } - status = WriteBatchInternal::InsertInto(&batch, mem); - MaybeIgnoreError(&status); - if (!status.ok()) { - break; - } - const SequenceNumber last_seq = - WriteBatchInternal::Sequence(&batch) + - WriteBatchInternal::Count(&batch) - 1; - if (last_seq > *max_sequence) { - *max_sequence = last_seq; - } - - if (mem->ApproximateMemoryUsage() > options_.write_buffer_size) { - status = WriteLevel0Table(mem, edit, NULL); - if (!status.ok()) { - // Reflect errors immediately so that conditions like full - // file-systems cause the DB::Open() to fail. - break; - } - mem->Unref(); - mem = NULL; - } - } - - if (status.ok() && mem != NULL) { - status = WriteLevel0Table(mem, edit, NULL); - // Reflect errors immediately so that conditions like full - // file-systems cause the DB::Open() to fail. - } - - if (mem != NULL) mem->Unref(); - delete file; - return status; -} - -Status DBImpl::WriteLevel0Table(MemTable* mem, VersionEdit* edit, - Version* base) { - mutex_.AssertHeld(); - const uint64_t start_micros = env_->NowMicros(); - FileMetaData meta; - meta.number = versions_->NewFileNumber(); - pending_outputs_.insert(meta.number); - Iterator* iter = mem->NewIterator(); - Log(options_.info_log, "Level-0 table #%llu: started", - (unsigned long long) meta.number); - - Status s; - { - mutex_.Unlock(); - s = BuildTable(dbname_, env_, options_, table_cache_, iter, &meta); - mutex_.Lock(); - } - - Log(options_.info_log, "Level-0 table #%llu: %lld bytes %s", - (unsigned long long) meta.number, - (unsigned long long) meta.file_size, - s.ToString().c_str()); - delete iter; - pending_outputs_.erase(meta.number); - - - // Note that if file_size is zero, the file has been deleted and - // should not be added to the manifest. - int level = 0; - if (s.ok() && meta.file_size > 0) { - const Slice min_user_key = meta.smallest.user_key(); - const Slice max_user_key = meta.largest.user_key(); - if (base != NULL) { - level = base->PickLevelForMemTableOutput(min_user_key, max_user_key); - } - edit->AddFile(level, meta.number, meta.file_size, - meta.smallest, meta.largest); - } - - CompactionStats stats; - stats.micros = env_->NowMicros() - start_micros; - stats.bytes_written = meta.file_size; - stats_[level].Add(stats); - return s; -} - -Status DBImpl::CompactMemTable() { - mutex_.AssertHeld(); - assert(imm_ != NULL); - - // Save the contents of the memtable as a new Table - VersionEdit edit; - Version* base = versions_->current(); - base->Ref(); - Status s = WriteLevel0Table(imm_, &edit, base); - base->Unref(); - - if (s.ok() && shutting_down_.Acquire_Load()) { - s = Status::IOError("Deleting DB during memtable compaction"); - } - - // Replace immutable memtable with the generated Table - if (s.ok()) { - edit.SetPrevLogNumber(0); - edit.SetLogNumber(logfile_number_); // Earlier logs no longer needed - s = versions_->LogAndApply(&edit, &mutex_); - } - - if (s.ok()) { - // Commit to the new state - imm_->Unref(); - imm_ = NULL; - has_imm_.Release_Store(NULL); - DeleteObsoleteFiles(); - } - - return s; -} - -void DBImpl::CompactRange(const Slice* begin, const Slice* end) { - int max_level_with_files = 1; - { - MutexLock l(&mutex_); - Version* base = versions_->current(); - for (int level = 1; level < config::kNumLevels; level++) { - if (base->OverlapInLevel(level, begin, end)) { - max_level_with_files = level; - } - } - } - TEST_CompactMemTable(); // TODO(sanjay): Skip if memtable does not overlap - for (int level = 0; level < max_level_with_files; level++) { - TEST_CompactRange(level, begin, end); - } -} - -void DBImpl::TEST_CompactRange(int level, const Slice* begin,const Slice* end) { - assert(level >= 0); - assert(level + 1 < config::kNumLevels); - - InternalKey begin_storage, end_storage; - - ManualCompaction manual; - manual.level = level; - manual.done = false; - if (begin == NULL) { - manual.begin = NULL; - } else { - begin_storage = InternalKey(*begin, kMaxSequenceNumber, kValueTypeForSeek); - manual.begin = &begin_storage; - } - if (end == NULL) { - manual.end = NULL; - } else { - end_storage = InternalKey(*end, 0, static_cast(0)); - manual.end = &end_storage; - } - - MutexLock l(&mutex_); - while (!manual.done) { - while (manual_compaction_ != NULL) { - bg_cv_.Wait(); - } - manual_compaction_ = &manual; - MaybeScheduleCompaction(); - while (manual_compaction_ == &manual) { - bg_cv_.Wait(); - } - } -} - -Status DBImpl::TEST_CompactMemTable() { - // NULL batch means just wait for earlier writes to be done - Status s = Write(WriteOptions(), NULL); - if (s.ok()) { - // Wait until the compaction completes - MutexLock l(&mutex_); - while (imm_ != NULL && bg_error_.ok()) { - bg_cv_.Wait(); - } - if (imm_ != NULL) { - s = bg_error_; - } - } - return s; -} - -void DBImpl::MaybeScheduleCompaction() { - mutex_.AssertHeld(); - if (bg_compaction_scheduled_) { - // Already scheduled - } else if (shutting_down_.Acquire_Load()) { - // DB is being deleted; no more background compactions - } else if (imm_ == NULL && - manual_compaction_ == NULL && - !versions_->NeedsCompaction()) { - // No work to be done - } else { - bg_compaction_scheduled_ = true; - env_->Schedule(&DBImpl::BGWork, this); - } -} - -void DBImpl::BGWork(void* db) { - reinterpret_cast(db)->BackgroundCall(); -} - -void DBImpl::BackgroundCall() { - MutexLock l(&mutex_); - assert(bg_compaction_scheduled_); - if (!shutting_down_.Acquire_Load()) { - Status s = BackgroundCompaction(); - if (!s.ok()) { - // Wait a little bit before retrying background compaction in - // case this is an environmental problem and we do not want to - // chew up resources for failed compactions for the duration of - // the problem. - bg_cv_.SignalAll(); // In case a waiter can proceed despite the error - Log(options_.info_log, "Waiting after background compaction error: %s", - s.ToString().c_str()); - mutex_.Unlock(); - env_->SleepForMicroseconds(1000000); - mutex_.Lock(); - } - } - - bg_compaction_scheduled_ = false; - - // Previous compaction may have produced too many files in a level, - // so reschedule another compaction if needed. - MaybeScheduleCompaction(); - bg_cv_.SignalAll(); -} - -Status DBImpl::BackgroundCompaction() { - mutex_.AssertHeld(); - - if (imm_ != NULL) { - return CompactMemTable(); - } - - Compaction* c; - bool is_manual = (manual_compaction_ != NULL); - InternalKey manual_end; - if (is_manual) { - ManualCompaction* m = manual_compaction_; - c = versions_->CompactRange(m->level, m->begin, m->end); - m->done = (c == NULL); - if (c != NULL) { - manual_end = c->input(0, c->num_input_files(0) - 1)->largest; - } - Log(options_.info_log, - "Manual compaction at level-%d from %s .. %s; will stop at %s\n", - m->level, - (m->begin ? m->begin->DebugString().c_str() : "(begin)"), - (m->end ? m->end->DebugString().c_str() : "(end)"), - (m->done ? "(end)" : manual_end.DebugString().c_str())); - } else { - c = versions_->PickCompaction(); - } - - Status status; - if (c == NULL) { - // Nothing to do - } else if (!is_manual && c->IsTrivialMove()) { - // Move file to next level - assert(c->num_input_files(0) == 1); - FileMetaData* f = c->input(0, 0); - c->edit()->DeleteFile(c->level(), f->number); - c->edit()->AddFile(c->level() + 1, f->number, f->file_size, - f->smallest, f->largest); - status = versions_->LogAndApply(c->edit(), &mutex_); - VersionSet::LevelSummaryStorage tmp; - Log(options_.info_log, "Moved #%lld to level-%d %lld bytes %s: %s\n", - static_cast(f->number), - c->level() + 1, - static_cast(f->file_size), - status.ToString().c_str(), - versions_->LevelSummary(&tmp)); - } else { - CompactionState* compact = new CompactionState(c); - status = DoCompactionWork(compact); - CleanupCompaction(compact); - c->ReleaseInputs(); - DeleteObsoleteFiles(); - } - delete c; - - if (status.ok()) { - // Done - } else if (shutting_down_.Acquire_Load()) { - // Ignore compaction errors found during shutting down - } else { - Log(options_.info_log, - "Compaction error: %s", status.ToString().c_str()); - if (options_.paranoid_checks && bg_error_.ok()) { - bg_error_ = status; - } - } - - if (is_manual) { - ManualCompaction* m = manual_compaction_; - if (!status.ok()) { - m->done = true; - } - if (!m->done) { - // We only compacted part of the requested range. Update *m - // to the range that is left to be compacted. - m->tmp_storage = manual_end; - m->begin = &m->tmp_storage; - } - manual_compaction_ = NULL; - } - return status; -} - -void DBImpl::CleanupCompaction(CompactionState* compact) { - mutex_.AssertHeld(); - if (compact->builder != NULL) { - // May happen if we get a shutdown call in the middle of compaction - compact->builder->Abandon(); - delete compact->builder; - } else { - assert(compact->outfile == NULL); - } - delete compact->outfile; - for (size_t i = 0; i < compact->outputs.size(); i++) { - const CompactionState::Output& out = compact->outputs[i]; - pending_outputs_.erase(out.number); - } - delete compact; -} - -Status DBImpl::OpenCompactionOutputFile(CompactionState* compact) { - assert(compact != NULL); - assert(compact->builder == NULL); - uint64_t file_number; - { - mutex_.Lock(); - file_number = versions_->NewFileNumber(); - pending_outputs_.insert(file_number); - CompactionState::Output out; - out.number = file_number; - out.smallest.Clear(); - out.largest.Clear(); - compact->outputs.push_back(out); - mutex_.Unlock(); - } - - // Make the output file - std::string fname = TableFileName(dbname_, file_number); - Status s = env_->NewWritableFile(fname, &compact->outfile); - if (s.ok()) { - compact->builder = new TableBuilder(options_, compact->outfile); - } - return s; -} - -Status DBImpl::FinishCompactionOutputFile(CompactionState* compact, - Iterator* input) { - assert(compact != NULL); - assert(compact->outfile != NULL); - assert(compact->builder != NULL); - - const uint64_t output_number = compact->current_output()->number; - assert(output_number != 0); - - // Check for iterator errors - Status s = input->status(); - const uint64_t current_entries = compact->builder->NumEntries(); - if (s.ok()) { - s = compact->builder->Finish(); - } else { - compact->builder->Abandon(); - } - const uint64_t current_bytes = compact->builder->FileSize(); - compact->current_output()->file_size = current_bytes; - compact->total_bytes += current_bytes; - delete compact->builder; - compact->builder = NULL; - - // Finish and check for file errors - if (s.ok()) { - s = compact->outfile->Sync(); - } - if (s.ok()) { - s = compact->outfile->Close(); - } - delete compact->outfile; - compact->outfile = NULL; - - if (s.ok() && current_entries > 0) { - // Verify that the table is usable - Iterator* iter = table_cache_->NewIterator(ReadOptions(), - output_number, - current_bytes); - s = iter->status(); - delete iter; - if (s.ok()) { - Log(options_.info_log, - "Generated table #%llu: %lld keys, %lld bytes", - (unsigned long long) output_number, - (unsigned long long) current_entries, - (unsigned long long) current_bytes); - } - } - return s; -} - - -Status DBImpl::InstallCompactionResults(CompactionState* compact) { - mutex_.AssertHeld(); - Log(options_.info_log, "Compacted %d@%d + %d@%d files => %lld bytes", - compact->compaction->num_input_files(0), - compact->compaction->level(), - compact->compaction->num_input_files(1), - compact->compaction->level() + 1, - static_cast(compact->total_bytes)); - - // Add compaction outputs - compact->compaction->AddInputDeletions(compact->compaction->edit()); - const int level = compact->compaction->level(); - for (size_t i = 0; i < compact->outputs.size(); i++) { - const CompactionState::Output& out = compact->outputs[i]; - compact->compaction->edit()->AddFile( - level + 1, - out.number, out.file_size, out.smallest, out.largest); - } - return versions_->LogAndApply(compact->compaction->edit(), &mutex_); -} - -Status DBImpl::DoCompactionWork(CompactionState* compact) { - const uint64_t start_micros = env_->NowMicros(); - int64_t imm_micros = 0; // Micros spent doing imm_ compactions - - Log(options_.info_log, "Compacting %d@%d + %d@%d files", - compact->compaction->num_input_files(0), - compact->compaction->level(), - compact->compaction->num_input_files(1), - compact->compaction->level() + 1); - - assert(versions_->NumLevelFiles(compact->compaction->level()) > 0); - assert(compact->builder == NULL); - assert(compact->outfile == NULL); - if (snapshots_.empty()) { - compact->smallest_snapshot = versions_->LastSequence(); - } else { - compact->smallest_snapshot = snapshots_.oldest()->number_; - } - - // Release mutex while we're actually doing the compaction work - mutex_.Unlock(); - - Iterator* input = versions_->MakeInputIterator(compact->compaction); - input->SeekToFirst(); - Status status; - ParsedInternalKey ikey; - std::string current_user_key; - bool has_current_user_key = false; - SequenceNumber last_sequence_for_key = kMaxSequenceNumber; - for (; input->Valid() && !shutting_down_.Acquire_Load(); ) { - // Prioritize immutable compaction work - if (has_imm_.NoBarrier_Load() != NULL) { - const uint64_t imm_start = env_->NowMicros(); - mutex_.Lock(); - if (imm_ != NULL) { - CompactMemTable(); - bg_cv_.SignalAll(); // Wakeup MakeRoomForWrite() if necessary - } - mutex_.Unlock(); - imm_micros += (env_->NowMicros() - imm_start); - } - - Slice key = input->key(); - if (compact->compaction->ShouldStopBefore(key) && - compact->builder != NULL) { - status = FinishCompactionOutputFile(compact, input); - if (!status.ok()) { - break; - } - } - - // Handle key/value, add to state, etc. - bool drop = false; - if (!ParseInternalKey(key, &ikey)) { - // Do not hide error keys - current_user_key.clear(); - has_current_user_key = false; - last_sequence_for_key = kMaxSequenceNumber; - } else { - if (!has_current_user_key || - user_comparator()->Compare(ikey.user_key, - Slice(current_user_key)) != 0) { - // First occurrence of this user key - current_user_key.assign(ikey.user_key.data(), ikey.user_key.size()); - has_current_user_key = true; - last_sequence_for_key = kMaxSequenceNumber; - } - - if (last_sequence_for_key <= compact->smallest_snapshot) { - // Hidden by an newer entry for same user key - drop = true; // (A) - } else if (ikey.type == kTypeDeletion && - ikey.sequence <= compact->smallest_snapshot && - compact->compaction->IsBaseLevelForKey(ikey.user_key)) { - // For this user key: - // (1) there is no data in higher levels - // (2) data in lower levels will have larger sequence numbers - // (3) data in layers that are being compacted here and have - // smaller sequence numbers will be dropped in the next - // few iterations of this loop (by rule (A) above). - // Therefore this deletion marker is obsolete and can be dropped. - drop = true; - } - - last_sequence_for_key = ikey.sequence; - } -#if 0 - Log(options_.info_log, - " Compact: %s, seq %d, type: %d %d, drop: %d, is_base: %d, " - "%d smallest_snapshot: %d", - ikey.user_key.ToString().c_str(), - (int)ikey.sequence, ikey.type, kTypeValue, drop, - compact->compaction->IsBaseLevelForKey(ikey.user_key), - (int)last_sequence_for_key, (int)compact->smallest_snapshot); -#endif - - if (!drop) { - // Open output file if necessary - if (compact->builder == NULL) { - status = OpenCompactionOutputFile(compact); - if (!status.ok()) { - break; - } - } - if (compact->builder->NumEntries() == 0) { - compact->current_output()->smallest.DecodeFrom(key); - } - compact->current_output()->largest.DecodeFrom(key); - compact->builder->Add(key, input->value()); - - // Close output file if it is big enough - if (compact->builder->FileSize() >= - compact->compaction->MaxOutputFileSize()) { - status = FinishCompactionOutputFile(compact, input); - if (!status.ok()) { - break; - } - } - } - - input->Next(); - } - - if (status.ok() && shutting_down_.Acquire_Load()) { - status = Status::IOError("Deleting DB during compaction"); - } - if (status.ok() && compact->builder != NULL) { - status = FinishCompactionOutputFile(compact, input); - } - if (status.ok()) { - status = input->status(); - } - delete input; - input = NULL; - - CompactionStats stats; - stats.micros = env_->NowMicros() - start_micros - imm_micros; - for (int which = 0; which < 2; which++) { - for (int i = 0; i < compact->compaction->num_input_files(which); i++) { - stats.bytes_read += compact->compaction->input(which, i)->file_size; - } - } - for (size_t i = 0; i < compact->outputs.size(); i++) { - stats.bytes_written += compact->outputs[i].file_size; - } - - mutex_.Lock(); - stats_[compact->compaction->level() + 1].Add(stats); - - if (status.ok()) { - status = InstallCompactionResults(compact); - } - VersionSet::LevelSummaryStorage tmp; - Log(options_.info_log, - "compacted to: %s", versions_->LevelSummary(&tmp)); - return status; -} - -namespace { -struct IterState { - port::Mutex* mu; - Version* version; - MemTable* mem; - MemTable* imm; -}; - -static void CleanupIteratorState(void* arg1, void* arg2) { - IterState* state = reinterpret_cast(arg1); - state->mu->Lock(); - state->mem->Unref(); - if (state->imm != NULL) state->imm->Unref(); - state->version->Unref(); - state->mu->Unlock(); - delete state; -} -} // namespace - -Iterator* DBImpl::NewInternalIterator(const ReadOptions& options, - SequenceNumber* latest_snapshot) { - IterState* cleanup = new IterState; - mutex_.Lock(); - *latest_snapshot = versions_->LastSequence(); - - // Collect together all needed child iterators - std::vector list; - list.push_back(mem_->NewIterator()); - mem_->Ref(); - if (imm_ != NULL) { - list.push_back(imm_->NewIterator()); - imm_->Ref(); - } - versions_->current()->AddIterators(options, &list); - Iterator* internal_iter = - NewMergingIterator(&internal_comparator_, &list[0], list.size()); - versions_->current()->Ref(); - - cleanup->mu = &mutex_; - cleanup->mem = mem_; - cleanup->imm = imm_; - cleanup->version = versions_->current(); - internal_iter->RegisterCleanup(CleanupIteratorState, cleanup, NULL); - - mutex_.Unlock(); - return internal_iter; -} - -Iterator* DBImpl::TEST_NewInternalIterator() { - SequenceNumber ignored; - return NewInternalIterator(ReadOptions(), &ignored); -} - -int64_t DBImpl::TEST_MaxNextLevelOverlappingBytes() { - MutexLock l(&mutex_); - return versions_->MaxNextLevelOverlappingBytes(); -} - -Status DBImpl::Get(const ReadOptions& options, - const Slice& key, - std::string* value) { - Status s; - MutexLock l(&mutex_); - SequenceNumber snapshot; - if (options.snapshot != NULL) { - snapshot = reinterpret_cast(options.snapshot)->number_; - } else { - snapshot = versions_->LastSequence(); - } - - MemTable* mem = mem_; - MemTable* imm = imm_; - Version* current = versions_->current(); - mem->Ref(); - if (imm != NULL) imm->Ref(); - current->Ref(); - - bool have_stat_update = false; - Version::GetStats stats; - - // Unlock while reading from files and memtables - { - mutex_.Unlock(); - // First look in the memtable, then in the immutable memtable (if any). - LookupKey lkey(key, snapshot); - if (mem->Get(lkey, value, &s)) { - // Done - } else if (imm != NULL && imm->Get(lkey, value, &s)) { - // Done - } else { - s = current->Get(options, lkey, value, &stats); - have_stat_update = true; - } - mutex_.Lock(); - } - - if (have_stat_update && current->UpdateStats(stats)) { - MaybeScheduleCompaction(); - } - mem->Unref(); - if (imm != NULL) imm->Unref(); - current->Unref(); - return s; -} - -Iterator* DBImpl::NewIterator(const ReadOptions& options) { - SequenceNumber latest_snapshot; - Iterator* internal_iter = NewInternalIterator(options, &latest_snapshot); - return NewDBIterator( - &dbname_, env_, user_comparator(), internal_iter, - (options.snapshot != NULL - ? reinterpret_cast(options.snapshot)->number_ - : latest_snapshot)); -} - -const Snapshot* DBImpl::GetSnapshot() { - MutexLock l(&mutex_); - return snapshots_.New(versions_->LastSequence()); -} - -void DBImpl::ReleaseSnapshot(const Snapshot* s) { - MutexLock l(&mutex_); - snapshots_.Delete(reinterpret_cast(s)); -} - -// Convenience methods -Status DBImpl::Put(const WriteOptions& o, const Slice& key, const Slice& val) { - return DB::Put(o, key, val); -} - -Status DBImpl::Delete(const WriteOptions& options, const Slice& key) { - return DB::Delete(options, key); -} - -Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) { - Writer w(&mutex_); - w.batch = my_batch; - w.sync = options.sync; - w.done = false; - - MutexLock l(&mutex_); - writers_.push_back(&w); - while (!w.done && &w != writers_.front()) { - w.cv.Wait(); - } - if (w.done) { - return w.status; - } - - // May temporarily unlock and wait. - Status status = MakeRoomForWrite(my_batch == NULL); - uint64_t last_sequence = versions_->LastSequence(); - Writer* last_writer = &w; - if (status.ok() && my_batch != NULL) { // NULL batch is for compactions - WriteBatch* updates = BuildBatchGroup(&last_writer); - WriteBatchInternal::SetSequence(updates, last_sequence + 1); - last_sequence += WriteBatchInternal::Count(updates); - - // Add to log and apply to memtable. We can release the lock - // during this phase since &w is currently responsible for logging - // and protects against concurrent loggers and concurrent writes - // into mem_. - { - mutex_.Unlock(); - status = log_->AddRecord(WriteBatchInternal::Contents(updates)); - if (status.ok() && options.sync) { - status = logfile_->Sync(); - } - if (status.ok()) { - status = WriteBatchInternal::InsertInto(updates, mem_); - } - mutex_.Lock(); - } - if (updates == tmp_batch_) tmp_batch_->Clear(); - - versions_->SetLastSequence(last_sequence); - } - - while (true) { - Writer* ready = writers_.front(); - writers_.pop_front(); - if (ready != &w) { - ready->status = status; - ready->done = true; - ready->cv.Signal(); - } - if (ready == last_writer) break; - } - - // Notify new head of write queue - if (!writers_.empty()) { - writers_.front()->cv.Signal(); - } - - return status; -} - -// REQUIRES: Writer list must be non-empty -// REQUIRES: First writer must have a non-NULL batch -WriteBatch* DBImpl::BuildBatchGroup(Writer** last_writer) { - assert(!writers_.empty()); - Writer* first = writers_.front(); - WriteBatch* result = first->batch; - assert(result != NULL); - - size_t size = WriteBatchInternal::ByteSize(first->batch); - - // Allow the group to grow up to a maximum size, but if the - // original write is small, limit the growth so we do not slow - // down the small write too much. - size_t max_size = 1 << 20; - if (size <= (128<<10)) { - max_size = size + (128<<10); - } - - *last_writer = first; - std::deque::iterator iter = writers_.begin(); - ++iter; // Advance past "first" - for (; iter != writers_.end(); ++iter) { - Writer* w = *iter; - if (w->sync && !first->sync) { - // Do not include a sync write into a batch handled by a non-sync write. - break; - } - - if (w->batch != NULL) { - size += WriteBatchInternal::ByteSize(w->batch); - if (size > max_size) { - // Do not make batch too big - break; - } - - // Append to *reuslt - if (result == first->batch) { - // Switch to temporary batch instead of disturbing caller's batch - result = tmp_batch_; - assert(WriteBatchInternal::Count(result) == 0); - WriteBatchInternal::Append(result, first->batch); - } - WriteBatchInternal::Append(result, w->batch); - } - *last_writer = w; - } - return result; -} - -// REQUIRES: mutex_ is held -// REQUIRES: this thread is currently at the front of the writer queue -Status DBImpl::MakeRoomForWrite(bool force) { - mutex_.AssertHeld(); - assert(!writers_.empty()); - bool allow_delay = !force; - Status s; - while (true) { - if (!bg_error_.ok()) { - // Yield previous error - s = bg_error_; - break; - } else if ( - allow_delay && - versions_->NumLevelFiles(0) >= config::kL0_SlowdownWritesTrigger) { - // We are getting close to hitting a hard limit on the number of - // L0 files. Rather than delaying a single write by several - // seconds when we hit the hard limit, start delaying each - // individual write by 1ms to reduce latency variance. Also, - // this delay hands over some CPU to the compaction thread in - // case it is sharing the same core as the writer. - mutex_.Unlock(); - env_->SleepForMicroseconds(1000); - allow_delay = false; // Do not delay a single write more than once - mutex_.Lock(); - } else if (!force && - (mem_->ApproximateMemoryUsage() <= options_.write_buffer_size)) { - // There is room in current memtable - break; - } else if (imm_ != NULL) { - // We have filled up the current memtable, but the previous - // one is still being compacted, so we wait. - bg_cv_.Wait(); - } else if (versions_->NumLevelFiles(0) >= config::kL0_StopWritesTrigger) { - // There are too many level-0 files. - Log(options_.info_log, "waiting...\n"); - bg_cv_.Wait(); - } else { - // Attempt to switch to a new memtable and trigger compaction of old - assert(versions_->PrevLogNumber() == 0); - uint64_t new_log_number = versions_->NewFileNumber(); - WritableFile* lfile = NULL; - s = env_->NewWritableFile(LogFileName(dbname_, new_log_number), &lfile); - if (!s.ok()) { - // Avoid chewing through file number space in a tight loop. - versions_->ReuseFileNumber(new_log_number); - break; - } - delete log_; - delete logfile_; - logfile_ = lfile; - logfile_number_ = new_log_number; - log_ = new log::Writer(lfile); - imm_ = mem_; - has_imm_.Release_Store(imm_); - mem_ = new MemTable(internal_comparator_); - mem_->Ref(); - force = false; // Do not force another compaction if have room - MaybeScheduleCompaction(); - } - } - return s; -} - -bool DBImpl::GetProperty(const Slice& property, std::string* value) { - value->clear(); - - MutexLock l(&mutex_); - Slice in = property; - Slice prefix("leveldb."); - if (!in.starts_with(prefix)) return false; - in.remove_prefix(prefix.size()); - - if (in.starts_with("num-files-at-level")) { - in.remove_prefix(strlen("num-files-at-level")); - uint64_t level; - bool ok = ConsumeDecimalNumber(&in, &level) && in.empty(); - if (!ok || level >= config::kNumLevels) { - return false; - } else { - char buf[100]; - snprintf(buf, sizeof(buf), "%d", - versions_->NumLevelFiles(static_cast(level))); - *value = buf; - return true; - } - } else if (in == "stats") { - char buf[200]; - snprintf(buf, sizeof(buf), - " Compactions\n" - "Level Files Size(MB) Time(sec) Read(MB) Write(MB)\n" - "--------------------------------------------------\n" - ); - value->append(buf); - for (int level = 0; level < config::kNumLevels; level++) { - int files = versions_->NumLevelFiles(level); - if (stats_[level].micros > 0 || files > 0) { - snprintf( - buf, sizeof(buf), - "%3d %8d %8.0f %9.0f %8.0f %9.0f\n", - level, - files, - versions_->NumLevelBytes(level) / 1048576.0, - stats_[level].micros / 1e6, - stats_[level].bytes_read / 1048576.0, - stats_[level].bytes_written / 1048576.0); - value->append(buf); - } - } - return true; - } else if (in == "sstables") { - *value = versions_->current()->DebugString(); - return true; - } - - return false; -} - -void DBImpl::GetApproximateSizes( - const Range* range, int n, - uint64_t* sizes) { - // TODO(opt): better implementation - Version* v; - { - MutexLock l(&mutex_); - versions_->current()->Ref(); - v = versions_->current(); - } - - for (int i = 0; i < n; i++) { - // Convert user_key into a corresponding internal key. - InternalKey k1(range[i].start, kMaxSequenceNumber, kValueTypeForSeek); - InternalKey k2(range[i].limit, kMaxSequenceNumber, kValueTypeForSeek); - uint64_t start = versions_->ApproximateOffsetOf(v, k1); - uint64_t limit = versions_->ApproximateOffsetOf(v, k2); - sizes[i] = (limit >= start ? limit - start : 0); - } - - { - MutexLock l(&mutex_); - v->Unref(); - } -} - -// Default implementations of convenience methods that subclasses of DB -// can call if they wish -Status DB::Put(const WriteOptions& opt, const Slice& key, const Slice& value) { - WriteBatch batch; - batch.Put(key, value); - return Write(opt, &batch); -} - -Status DB::Delete(const WriteOptions& opt, const Slice& key) { - WriteBatch batch; - batch.Delete(key); - return Write(opt, &batch); -} - -DB::~DB() { } - -Status DB::Open(const Options& options, const std::string& dbname, - DB** dbptr) { - *dbptr = NULL; - - DBImpl* impl = new DBImpl(options, dbname); - impl->mutex_.Lock(); - VersionEdit edit; - Status s = impl->Recover(&edit); // Handles create_if_missing, error_if_exists - if (s.ok()) { - uint64_t new_log_number = impl->versions_->NewFileNumber(); - WritableFile* lfile; - s = options.env->NewWritableFile(LogFileName(dbname, new_log_number), - &lfile); - if (s.ok()) { - edit.SetLogNumber(new_log_number); - impl->logfile_ = lfile; - impl->logfile_number_ = new_log_number; - impl->log_ = new log::Writer(lfile); - s = impl->versions_->LogAndApply(&edit, &impl->mutex_); - } - if (s.ok()) { - impl->DeleteObsoleteFiles(); - impl->MaybeScheduleCompaction(); - } - } - impl->mutex_.Unlock(); - if (s.ok()) { - *dbptr = impl; - } else { - delete impl; - } - return s; -} - -Snapshot::~Snapshot() { -} - -Status DestroyDB(const std::string& dbname, const Options& options) { - Env* env = options.env; - std::vector filenames; - // Ignore error in case directory does not exist - env->GetChildren(dbname, &filenames); - if (filenames.empty()) { - return Status::OK(); - } - - FileLock* lock; - const std::string lockname = LockFileName(dbname); - Status result = env->LockFile(lockname, &lock); - if (result.ok()) { - uint64_t number; - FileType type; - for (size_t i = 0; i < filenames.size(); i++) { - if (ParseFileName(filenames[i], &number, &type) && - type != kDBLockFile) { // Lock file will be deleted at end - Status del = env->DeleteFile(dbname + "/" + filenames[i]); - if (result.ok() && !del.ok()) { - result = del; - } - } - } - env->UnlockFile(lock); // Ignore error since state is already gone - env->DeleteFile(lockname); - env->DeleteDir(dbname); // Ignore error in case dir contains other files - } - return result; -} - -} // namespace leveldb diff --git a/leveldb/db/db_impl.h b/leveldb/db/db_impl.h deleted file mode 100644 index 8d2bb34..0000000 --- a/leveldb/db/db_impl.h +++ /dev/null @@ -1,194 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_DB_DB_IMPL_H_ -#define STORAGE_LEVELDB_DB_DB_IMPL_H_ - -#include -#include -#include "db/dbformat.h" -#include "db/log_writer.h" -#include "db/snapshot.h" -#include "leveldb/db.h" -#include "leveldb/env.h" -#include "port/port.h" - -namespace leveldb { - -class MemTable; -class TableCache; -class Version; -class VersionEdit; -class VersionSet; - -class DBImpl : public DB { - public: - DBImpl(const Options& options, const std::string& dbname); - virtual ~DBImpl(); - - // Implementations of the DB interface - virtual Status Put(const WriteOptions&, const Slice& key, const Slice& value); - virtual Status Delete(const WriteOptions&, const Slice& key); - virtual Status Write(const WriteOptions& options, WriteBatch* updates); - virtual Status Get(const ReadOptions& options, - const Slice& key, - std::string* value); - virtual Iterator* NewIterator(const ReadOptions&); - virtual const Snapshot* GetSnapshot(); - virtual void ReleaseSnapshot(const Snapshot* snapshot); - virtual bool GetProperty(const Slice& property, std::string* value); - virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes); - virtual void CompactRange(const Slice* begin, const Slice* end); - - // Extra methods (for testing) that are not in the public DB interface - - // Compact any files in the named level that overlap [*begin,*end] - void TEST_CompactRange(int level, const Slice* begin, const Slice* end); - - // Force current memtable contents to be compacted. - Status TEST_CompactMemTable(); - - // Return an internal iterator over the current state of the database. - // The keys of this iterator are internal keys (see format.h). - // The returned iterator should be deleted when no longer needed. - Iterator* TEST_NewInternalIterator(); - - // Return the maximum overlapping data (in bytes) at next level for any - // file at a level >= 1. - int64_t TEST_MaxNextLevelOverlappingBytes(); - - private: - friend class DB; - struct CompactionState; - struct Writer; - - Iterator* NewInternalIterator(const ReadOptions&, - SequenceNumber* latest_snapshot); - - Status NewDB(); - - // Recover the descriptor from persistent storage. May do a significant - // amount of work to recover recently logged updates. Any changes to - // be made to the descriptor are added to *edit. - Status Recover(VersionEdit* edit); - - void MaybeIgnoreError(Status* s) const; - - // Delete any unneeded files and stale in-memory entries. - void DeleteObsoleteFiles(); - - // Compact the in-memory write buffer to disk. Switches to a new - // log-file/memtable and writes a new descriptor iff successful. - Status CompactMemTable(); - - Status RecoverLogFile(uint64_t log_number, - VersionEdit* edit, - SequenceNumber* max_sequence); - - Status WriteLevel0Table(MemTable* mem, VersionEdit* edit, Version* base); - - Status MakeRoomForWrite(bool force /* compact even if there is room? */); - WriteBatch* BuildBatchGroup(Writer** last_writer); - - void MaybeScheduleCompaction(); - static void BGWork(void* db); - void BackgroundCall(); - Status BackgroundCompaction(); - void CleanupCompaction(CompactionState* compact); - Status DoCompactionWork(CompactionState* compact); - - Status OpenCompactionOutputFile(CompactionState* compact); - Status FinishCompactionOutputFile(CompactionState* compact, Iterator* input); - Status InstallCompactionResults(CompactionState* compact); - - // Constant after construction - Env* const env_; - const InternalKeyComparator internal_comparator_; - const InternalFilterPolicy internal_filter_policy_; - const Options options_; // options_.comparator == &internal_comparator_ - bool owns_info_log_; - bool owns_cache_; - const std::string dbname_; - - // table_cache_ provides its own synchronization - TableCache* table_cache_; - - // Lock over the persistent DB state. Non-NULL iff successfully acquired. - FileLock* db_lock_; - - // State below is protected by mutex_ - port::Mutex mutex_; - port::AtomicPointer shutting_down_; - port::CondVar bg_cv_; // Signalled when background work finishes - MemTable* mem_; - MemTable* imm_; // Memtable being compacted - port::AtomicPointer has_imm_; // So bg thread can detect non-NULL imm_ - WritableFile* logfile_; - uint64_t logfile_number_; - log::Writer* log_; - - // Queue of writers. - std::deque writers_; - WriteBatch* tmp_batch_; - - SnapshotList snapshots_; - - // Set of table files to protect from deletion because they are - // part of ongoing compactions. - std::set pending_outputs_; - - // Has a background compaction been scheduled or is running? - bool bg_compaction_scheduled_; - - // Information for a manual compaction - struct ManualCompaction { - int level; - bool done; - const InternalKey* begin; // NULL means beginning of key range - const InternalKey* end; // NULL means end of key range - InternalKey tmp_storage; // Used to keep track of compaction progress - }; - ManualCompaction* manual_compaction_; - - VersionSet* versions_; - - // Have we encountered a background error in paranoid mode? - Status bg_error_; - - // Per level compaction stats. stats_[level] stores the stats for - // compactions that produced data for the specified "level". - struct CompactionStats { - int64_t micros; - int64_t bytes_read; - int64_t bytes_written; - - CompactionStats() : micros(0), bytes_read(0), bytes_written(0) { } - - void Add(const CompactionStats& c) { - this->micros += c.micros; - this->bytes_read += c.bytes_read; - this->bytes_written += c.bytes_written; - } - }; - CompactionStats stats_[config::kNumLevels]; - - // No copying allowed - DBImpl(const DBImpl&); - void operator=(const DBImpl&); - - const Comparator* user_comparator() const { - return internal_comparator_.user_comparator(); - } -}; - -// Sanitize db options. The caller should delete result.info_log if -// it is not equal to src.info_log. -extern Options SanitizeOptions(const std::string& db, - const InternalKeyComparator* icmp, - const InternalFilterPolicy* ipolicy, - const Options& src); - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_DB_DB_IMPL_H_ diff --git a/leveldb/db/db_iter.cc b/leveldb/db/db_iter.cc deleted file mode 100644 index 87dca2d..0000000 --- a/leveldb/db/db_iter.cc +++ /dev/null @@ -1,299 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/db_iter.h" - -#include "db/filename.h" -#include "db/dbformat.h" -#include "leveldb/env.h" -#include "leveldb/iterator.h" -#include "port/port.h" -#include "util/logging.h" -#include "util/mutexlock.h" - -namespace leveldb { - -#if 0 -static void DumpInternalIter(Iterator* iter) { - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ParsedInternalKey k; - if (!ParseInternalKey(iter->key(), &k)) { - fprintf(stderr, "Corrupt '%s'\n", EscapeString(iter->key()).c_str()); - } else { - fprintf(stderr, "@ '%s'\n", k.DebugString().c_str()); - } - } -} -#endif - -namespace { - -// Memtables and sstables that make the DB representation contain -// (userkey,seq,type) => uservalue entries. DBIter -// combines multiple entries for the same userkey found in the DB -// representation into a single entry while accounting for sequence -// numbers, deletion markers, overwrites, etc. -class DBIter: public Iterator { - public: - // Which direction is the iterator currently moving? - // (1) When moving forward, the internal iterator is positioned at - // the exact entry that yields this->key(), this->value() - // (2) When moving backwards, the internal iterator is positioned - // just before all entries whose user key == this->key(). - enum Direction { - kForward, - kReverse - }; - - DBIter(const std::string* dbname, Env* env, - const Comparator* cmp, Iterator* iter, SequenceNumber s) - : dbname_(dbname), - env_(env), - user_comparator_(cmp), - iter_(iter), - sequence_(s), - direction_(kForward), - valid_(false) { - } - virtual ~DBIter() { - delete iter_; - } - virtual bool Valid() const { return valid_; } - virtual Slice key() const { - assert(valid_); - return (direction_ == kForward) ? ExtractUserKey(iter_->key()) : saved_key_; - } - virtual Slice value() const { - assert(valid_); - return (direction_ == kForward) ? iter_->value() : saved_value_; - } - virtual Status status() const { - if (status_.ok()) { - return iter_->status(); - } else { - return status_; - } - } - - virtual void Next(); - virtual void Prev(); - virtual void Seek(const Slice& target); - virtual void SeekToFirst(); - virtual void SeekToLast(); - - private: - void FindNextUserEntry(bool skipping, std::string* skip); - void FindPrevUserEntry(); - bool ParseKey(ParsedInternalKey* key); - - inline void SaveKey(const Slice& k, std::string* dst) { - dst->assign(k.data(), k.size()); - } - - inline void ClearSavedValue() { - if (saved_value_.capacity() > 1048576) { - std::string empty; - swap(empty, saved_value_); - } else { - saved_value_.clear(); - } - } - - const std::string* const dbname_; - Env* const env_; - const Comparator* const user_comparator_; - Iterator* const iter_; - SequenceNumber const sequence_; - - Status status_; - std::string saved_key_; // == current key when direction_==kReverse - std::string saved_value_; // == current raw value when direction_==kReverse - Direction direction_; - bool valid_; - - // No copying allowed - DBIter(const DBIter&); - void operator=(const DBIter&); -}; - -inline bool DBIter::ParseKey(ParsedInternalKey* ikey) { - if (!ParseInternalKey(iter_->key(), ikey)) { - status_ = Status::Corruption("corrupted internal key in DBIter"); - return false; - } else { - return true; - } -} - -void DBIter::Next() { - assert(valid_); - - if (direction_ == kReverse) { // Switch directions? - direction_ = kForward; - // iter_ is pointing just before the entries for this->key(), - // so advance into the range of entries for this->key() and then - // use the normal skipping code below. - if (!iter_->Valid()) { - iter_->SeekToFirst(); - } else { - iter_->Next(); - } - if (!iter_->Valid()) { - valid_ = false; - saved_key_.clear(); - return; - } - } - - // Temporarily use saved_key_ as storage for key to skip. - std::string* skip = &saved_key_; - SaveKey(ExtractUserKey(iter_->key()), skip); - FindNextUserEntry(true, skip); -} - -void DBIter::FindNextUserEntry(bool skipping, std::string* skip) { - // Loop until we hit an acceptable entry to yield - assert(iter_->Valid()); - assert(direction_ == kForward); - do { - ParsedInternalKey ikey; - if (ParseKey(&ikey) && ikey.sequence <= sequence_) { - switch (ikey.type) { - case kTypeDeletion: - // Arrange to skip all upcoming entries for this key since - // they are hidden by this deletion. - SaveKey(ikey.user_key, skip); - skipping = true; - break; - case kTypeValue: - if (skipping && - user_comparator_->Compare(ikey.user_key, *skip) <= 0) { - // Entry hidden - } else { - valid_ = true; - saved_key_.clear(); - return; - } - break; - } - } - iter_->Next(); - } while (iter_->Valid()); - saved_key_.clear(); - valid_ = false; -} - -void DBIter::Prev() { - assert(valid_); - - if (direction_ == kForward) { // Switch directions? - // iter_ is pointing at the current entry. Scan backwards until - // the key changes so we can use the normal reverse scanning code. - assert(iter_->Valid()); // Otherwise valid_ would have been false - SaveKey(ExtractUserKey(iter_->key()), &saved_key_); - while (true) { - iter_->Prev(); - if (!iter_->Valid()) { - valid_ = false; - saved_key_.clear(); - ClearSavedValue(); - return; - } - if (user_comparator_->Compare(ExtractUserKey(iter_->key()), - saved_key_) < 0) { - break; - } - } - direction_ = kReverse; - } - - FindPrevUserEntry(); -} - -void DBIter::FindPrevUserEntry() { - assert(direction_ == kReverse); - - ValueType value_type = kTypeDeletion; - if (iter_->Valid()) { - do { - ParsedInternalKey ikey; - if (ParseKey(&ikey) && ikey.sequence <= sequence_) { - if ((value_type != kTypeDeletion) && - user_comparator_->Compare(ikey.user_key, saved_key_) < 0) { - // We encountered a non-deleted value in entries for previous keys, - break; - } - value_type = ikey.type; - if (value_type == kTypeDeletion) { - saved_key_.clear(); - ClearSavedValue(); - } else { - Slice raw_value = iter_->value(); - if (saved_value_.capacity() > raw_value.size() + 1048576) { - std::string empty; - swap(empty, saved_value_); - } - SaveKey(ExtractUserKey(iter_->key()), &saved_key_); - saved_value_.assign(raw_value.data(), raw_value.size()); - } - } - iter_->Prev(); - } while (iter_->Valid()); - } - - if (value_type == kTypeDeletion) { - // End - valid_ = false; - saved_key_.clear(); - ClearSavedValue(); - direction_ = kForward; - } else { - valid_ = true; - } -} - -void DBIter::Seek(const Slice& target) { - direction_ = kForward; - ClearSavedValue(); - saved_key_.clear(); - AppendInternalKey( - &saved_key_, ParsedInternalKey(target, sequence_, kValueTypeForSeek)); - iter_->Seek(saved_key_); - if (iter_->Valid()) { - FindNextUserEntry(false, &saved_key_ /* temporary storage */); - } else { - valid_ = false; - } -} - -void DBIter::SeekToFirst() { - direction_ = kForward; - ClearSavedValue(); - iter_->SeekToFirst(); - if (iter_->Valid()) { - FindNextUserEntry(false, &saved_key_ /* temporary storage */); - } else { - valid_ = false; - } -} - -void DBIter::SeekToLast() { - direction_ = kReverse; - ClearSavedValue(); - iter_->SeekToLast(); - FindPrevUserEntry(); -} - -} // anonymous namespace - -Iterator* NewDBIterator( - const std::string* dbname, - Env* env, - const Comparator* user_key_comparator, - Iterator* internal_iter, - const SequenceNumber& sequence) { - return new DBIter(dbname, env, user_key_comparator, internal_iter, sequence); -} - -} // namespace leveldb diff --git a/leveldb/db/db_iter.h b/leveldb/db/db_iter.h deleted file mode 100644 index d9e1b17..0000000 --- a/leveldb/db/db_iter.h +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_DB_DB_ITER_H_ -#define STORAGE_LEVELDB_DB_DB_ITER_H_ - -#include -#include "leveldb/db.h" -#include "db/dbformat.h" - -namespace leveldb { - -// Return a new iterator that converts internal keys (yielded by -// "*internal_iter") that were live at the specified "sequence" number -// into appropriate user keys. -extern Iterator* NewDBIterator( - const std::string* dbname, - Env* env, - const Comparator* user_key_comparator, - Iterator* internal_iter, - const SequenceNumber& sequence); - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_DB_DB_ITER_H_ diff --git a/leveldb/db/db_test.cc b/leveldb/db/db_test.cc deleted file mode 100644 index 3744d0e..0000000 --- a/leveldb/db/db_test.cc +++ /dev/null @@ -1,1946 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "leveldb/db.h" -#include "leveldb/filter_policy.h" -#include "db/db_impl.h" -#include "db/filename.h" -#include "db/version_set.h" -#include "db/write_batch_internal.h" -#include "leveldb/cache.h" -#include "leveldb/env.h" -#include "leveldb/table.h" -#include "util/hash.h" -#include "util/logging.h" -#include "util/mutexlock.h" -#include "util/testharness.h" -#include "util/testutil.h" - -namespace leveldb { - -static std::string RandomString(Random* rnd, int len) { - std::string r; - test::RandomString(rnd, len, &r); - return r; -} - -namespace { -class AtomicCounter { - private: - port::Mutex mu_; - int count_; - public: - AtomicCounter() : count_(0) { } - void Increment() { - MutexLock l(&mu_); - count_++; - } - int Read() { - MutexLock l(&mu_); - return count_; - } - void Reset() { - MutexLock l(&mu_); - count_ = 0; - } -}; -} - -// Special Env used to delay background operations -class SpecialEnv : public EnvWrapper { - public: - // sstable Sync() calls are blocked while this pointer is non-NULL. - port::AtomicPointer delay_sstable_sync_; - - // Simulate no-space errors while this pointer is non-NULL. - port::AtomicPointer no_space_; - - // Simulate non-writable file system while this pointer is non-NULL - port::AtomicPointer non_writable_; - - bool count_random_reads_; - AtomicCounter random_read_counter_; - - AtomicCounter sleep_counter_; - - explicit SpecialEnv(Env* base) : EnvWrapper(base) { - delay_sstable_sync_.Release_Store(NULL); - no_space_.Release_Store(NULL); - non_writable_.Release_Store(NULL); - count_random_reads_ = false; - } - - Status NewWritableFile(const std::string& f, WritableFile** r) { - class SSTableFile : public WritableFile { - private: - SpecialEnv* env_; - WritableFile* base_; - - public: - SSTableFile(SpecialEnv* env, WritableFile* base) - : env_(env), - base_(base) { - } - ~SSTableFile() { delete base_; } - Status Append(const Slice& data) { - if (env_->no_space_.Acquire_Load() != NULL) { - // Drop writes on the floor - return Status::OK(); - } else { - return base_->Append(data); - } - } - Status Close() { return base_->Close(); } - Status Flush() { return base_->Flush(); } - Status Sync() { - while (env_->delay_sstable_sync_.Acquire_Load() != NULL) { - env_->SleepForMicroseconds(100000); - } - return base_->Sync(); - } - }; - - if (non_writable_.Acquire_Load() != NULL) { - return Status::IOError("simulated write error"); - } - - Status s = target()->NewWritableFile(f, r); - if (s.ok()) { - if (strstr(f.c_str(), ".sst") != NULL) { - *r = new SSTableFile(this, *r); - } - } - return s; - } - - Status NewRandomAccessFile(const std::string& f, RandomAccessFile** r) { - class CountingFile : public RandomAccessFile { - private: - RandomAccessFile* target_; - AtomicCounter* counter_; - public: - CountingFile(RandomAccessFile* target, AtomicCounter* counter) - : target_(target), counter_(counter) { - } - virtual ~CountingFile() { delete target_; } - virtual Status Read(uint64_t offset, size_t n, Slice* result, - char* scratch) const { - counter_->Increment(); - return target_->Read(offset, n, result, scratch); - } - }; - - Status s = target()->NewRandomAccessFile(f, r); - if (s.ok() && count_random_reads_) { - *r = new CountingFile(*r, &random_read_counter_); - } - return s; - } - - virtual void SleepForMicroseconds(int micros) { - sleep_counter_.Increment(); - target()->SleepForMicroseconds(micros); - } -}; - -class DBTest { - private: - const FilterPolicy* filter_policy_; - - // Sequence of option configurations to try - enum OptionConfig { - kDefault, - kFilter, - kUncompressed, - kEnd - }; - int option_config_; - - public: - std::string dbname_; - SpecialEnv* env_; - DB* db_; - - Options last_options_; - - DBTest() : option_config_(kDefault), - env_(new SpecialEnv(Env::Default())) { - filter_policy_ = NewBloomFilterPolicy(10); - dbname_ = test::TmpDir() + "/db_test"; - DestroyDB(dbname_, Options()); - db_ = NULL; - Reopen(); - } - - ~DBTest() { - delete db_; - DestroyDB(dbname_, Options()); - delete env_; - delete filter_policy_; - } - - // Switch to a fresh database with the next option configuration to - // test. Return false if there are no more configurations to test. - bool ChangeOptions() { - option_config_++; - if (option_config_ >= kEnd) { - return false; - } else { - DestroyAndReopen(); - return true; - } - } - - // Return the current option configuration. - Options CurrentOptions() { - Options options; - switch (option_config_) { - case kFilter: - options.filter_policy = filter_policy_; - break; - case kUncompressed: - options.compression = kNoCompression; - break; - default: - break; - } - return options; - } - - DBImpl* dbfull() { - return reinterpret_cast(db_); - } - - void Reopen(Options* options = NULL) { - ASSERT_OK(TryReopen(options)); - } - - void Close() { - delete db_; - db_ = NULL; - } - - void DestroyAndReopen(Options* options = NULL) { - delete db_; - db_ = NULL; - DestroyDB(dbname_, Options()); - ASSERT_OK(TryReopen(options)); - } - - Status TryReopen(Options* options) { - delete db_; - db_ = NULL; - Options opts; - if (options != NULL) { - opts = *options; - } else { - opts = CurrentOptions(); - opts.create_if_missing = true; - } - last_options_ = opts; - - return DB::Open(opts, dbname_, &db_); - } - - Status Put(const std::string& k, const std::string& v) { - return db_->Put(WriteOptions(), k, v); - } - - Status Delete(const std::string& k) { - return db_->Delete(WriteOptions(), k); - } - - std::string Get(const std::string& k, const Snapshot* snapshot = NULL) { - ReadOptions options; - options.snapshot = snapshot; - std::string result; - Status s = db_->Get(options, k, &result); - if (s.IsNotFound()) { - result = "NOT_FOUND"; - } else if (!s.ok()) { - result = s.ToString(); - } - return result; - } - - // Return a string that contains all key,value pairs in order, - // formatted like "(k1->v1)(k2->v2)". - std::string Contents() { - std::vector forward; - std::string result; - Iterator* iter = db_->NewIterator(ReadOptions()); - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - std::string s = IterStatus(iter); - result.push_back('('); - result.append(s); - result.push_back(')'); - forward.push_back(s); - } - - // Check reverse iteration results are the reverse of forward results - int matched = 0; - for (iter->SeekToLast(); iter->Valid(); iter->Prev()) { - ASSERT_LT(matched, forward.size()); - ASSERT_EQ(IterStatus(iter), forward[forward.size() - matched - 1]); - matched++; - } - ASSERT_EQ(matched, forward.size()); - - delete iter; - return result; - } - - std::string AllEntriesFor(const Slice& user_key) { - Iterator* iter = dbfull()->TEST_NewInternalIterator(); - InternalKey target(user_key, kMaxSequenceNumber, kTypeValue); - iter->Seek(target.Encode()); - std::string result; - if (!iter->status().ok()) { - result = iter->status().ToString(); - } else { - result = "[ "; - bool first = true; - while (iter->Valid()) { - ParsedInternalKey ikey; - if (!ParseInternalKey(iter->key(), &ikey)) { - result += "CORRUPTED"; - } else { - if (last_options_.comparator->Compare(ikey.user_key, user_key) != 0) { - break; - } - if (!first) { - result += ", "; - } - first = false; - switch (ikey.type) { - case kTypeValue: - result += iter->value().ToString(); - break; - case kTypeDeletion: - result += "DEL"; - break; - } - } - iter->Next(); - } - if (!first) { - result += " "; - } - result += "]"; - } - delete iter; - return result; - } - - int NumTableFilesAtLevel(int level) { - std::string property; - ASSERT_TRUE( - db_->GetProperty("leveldb.num-files-at-level" + NumberToString(level), - &property)); - return atoi(property.c_str()); - } - - int TotalTableFiles() { - int result = 0; - for (int level = 0; level < config::kNumLevels; level++) { - result += NumTableFilesAtLevel(level); - } - return result; - } - - // Return spread of files per level - std::string FilesPerLevel() { - std::string result; - int last_non_zero_offset = 0; - for (int level = 0; level < config::kNumLevels; level++) { - int f = NumTableFilesAtLevel(level); - char buf[100]; - snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f); - result += buf; - if (f > 0) { - last_non_zero_offset = result.size(); - } - } - result.resize(last_non_zero_offset); - return result; - } - - int CountFiles() { - std::vector files; - env_->GetChildren(dbname_, &files); - return static_cast(files.size()); - } - - uint64_t Size(const Slice& start, const Slice& limit) { - Range r(start, limit); - uint64_t size; - db_->GetApproximateSizes(&r, 1, &size); - return size; - } - - void Compact(const Slice& start, const Slice& limit) { - db_->CompactRange(&start, &limit); - } - - // Do n memtable compactions, each of which produces an sstable - // covering the range [small,large]. - void MakeTables(int n, const std::string& small, const std::string& large) { - for (int i = 0; i < n; i++) { - Put(small, "begin"); - Put(large, "end"); - dbfull()->TEST_CompactMemTable(); - } - } - - // Prevent pushing of new sstables into deeper levels by adding - // tables that cover a specified range to all levels. - void FillLevels(const std::string& smallest, const std::string& largest) { - MakeTables(config::kNumLevels, smallest, largest); - } - - void DumpFileCounts(const char* label) { - fprintf(stderr, "---\n%s:\n", label); - fprintf(stderr, "maxoverlap: %lld\n", - static_cast( - dbfull()->TEST_MaxNextLevelOverlappingBytes())); - for (int level = 0; level < config::kNumLevels; level++) { - int num = NumTableFilesAtLevel(level); - if (num > 0) { - fprintf(stderr, " level %3d : %d files\n", level, num); - } - } - } - - std::string DumpSSTableList() { - std::string property; - db_->GetProperty("leveldb.sstables", &property); - return property; - } - - std::string IterStatus(Iterator* iter) { - std::string result; - if (iter->Valid()) { - result = iter->key().ToString() + "->" + iter->value().ToString(); - } else { - result = "(invalid)"; - } - return result; - } -}; - -TEST(DBTest, Empty) { - do { - ASSERT_TRUE(db_ != NULL); - ASSERT_EQ("NOT_FOUND", Get("foo")); - } while (ChangeOptions()); -} - -TEST(DBTest, ReadWrite) { - do { - ASSERT_OK(Put("foo", "v1")); - ASSERT_EQ("v1", Get("foo")); - ASSERT_OK(Put("bar", "v2")); - ASSERT_OK(Put("foo", "v3")); - ASSERT_EQ("v3", Get("foo")); - ASSERT_EQ("v2", Get("bar")); - } while (ChangeOptions()); -} - -TEST(DBTest, PutDeleteGet) { - do { - ASSERT_OK(db_->Put(WriteOptions(), "foo", "v1")); - ASSERT_EQ("v1", Get("foo")); - ASSERT_OK(db_->Put(WriteOptions(), "foo", "v2")); - ASSERT_EQ("v2", Get("foo")); - ASSERT_OK(db_->Delete(WriteOptions(), "foo")); - ASSERT_EQ("NOT_FOUND", Get("foo")); - } while (ChangeOptions()); -} - -TEST(DBTest, GetFromImmutableLayer) { - do { - Options options = CurrentOptions(); - options.env = env_; - options.write_buffer_size = 100000; // Small write buffer - Reopen(&options); - - ASSERT_OK(Put("foo", "v1")); - ASSERT_EQ("v1", Get("foo")); - - env_->delay_sstable_sync_.Release_Store(env_); // Block sync calls - Put("k1", std::string(100000, 'x')); // Fill memtable - Put("k2", std::string(100000, 'y')); // Trigger compaction - ASSERT_EQ("v1", Get("foo")); - env_->delay_sstable_sync_.Release_Store(NULL); // Release sync calls - } while (ChangeOptions()); -} - -TEST(DBTest, GetFromVersions) { - do { - ASSERT_OK(Put("foo", "v1")); - dbfull()->TEST_CompactMemTable(); - ASSERT_EQ("v1", Get("foo")); - } while (ChangeOptions()); -} - -TEST(DBTest, GetSnapshot) { - do { - // Try with both a short key and a long key - for (int i = 0; i < 2; i++) { - std::string key = (i == 0) ? std::string("foo") : std::string(200, 'x'); - ASSERT_OK(Put(key, "v1")); - const Snapshot* s1 = db_->GetSnapshot(); - ASSERT_OK(Put(key, "v2")); - ASSERT_EQ("v2", Get(key)); - ASSERT_EQ("v1", Get(key, s1)); - dbfull()->TEST_CompactMemTable(); - ASSERT_EQ("v2", Get(key)); - ASSERT_EQ("v1", Get(key, s1)); - db_->ReleaseSnapshot(s1); - } - } while (ChangeOptions()); -} - -TEST(DBTest, GetLevel0Ordering) { - do { - // Check that we process level-0 files in correct order. The code - // below generates two level-0 files where the earlier one comes - // before the later one in the level-0 file list since the earlier - // one has a smaller "smallest" key. - ASSERT_OK(Put("bar", "b")); - ASSERT_OK(Put("foo", "v1")); - dbfull()->TEST_CompactMemTable(); - ASSERT_OK(Put("foo", "v2")); - dbfull()->TEST_CompactMemTable(); - ASSERT_EQ("v2", Get("foo")); - } while (ChangeOptions()); -} - -TEST(DBTest, GetOrderedByLevels) { - do { - ASSERT_OK(Put("foo", "v1")); - Compact("a", "z"); - ASSERT_EQ("v1", Get("foo")); - ASSERT_OK(Put("foo", "v2")); - ASSERT_EQ("v2", Get("foo")); - dbfull()->TEST_CompactMemTable(); - ASSERT_EQ("v2", Get("foo")); - } while (ChangeOptions()); -} - -TEST(DBTest, GetPicksCorrectFile) { - do { - // Arrange to have multiple files in a non-level-0 level. - ASSERT_OK(Put("a", "va")); - Compact("a", "b"); - ASSERT_OK(Put("x", "vx")); - Compact("x", "y"); - ASSERT_OK(Put("f", "vf")); - Compact("f", "g"); - ASSERT_EQ("va", Get("a")); - ASSERT_EQ("vf", Get("f")); - ASSERT_EQ("vx", Get("x")); - } while (ChangeOptions()); -} - -TEST(DBTest, GetEncountersEmptyLevel) { - do { - // Arrange for the following to happen: - // * sstable A in level 0 - // * nothing in level 1 - // * sstable B in level 2 - // Then do enough Get() calls to arrange for an automatic compaction - // of sstable A. A bug would cause the compaction to be marked as - // occuring at level 1 (instead of the correct level 0). - - // Step 1: First place sstables in levels 0 and 2 - int compaction_count = 0; - while (NumTableFilesAtLevel(0) == 0 || - NumTableFilesAtLevel(2) == 0) { - ASSERT_LE(compaction_count, 100) << "could not fill levels 0 and 2"; - compaction_count++; - Put("a", "begin"); - Put("z", "end"); - dbfull()->TEST_CompactMemTable(); - } - - // Step 2: clear level 1 if necessary. - dbfull()->TEST_CompactRange(1, NULL, NULL); - ASSERT_EQ(NumTableFilesAtLevel(0), 1); - ASSERT_EQ(NumTableFilesAtLevel(1), 0); - ASSERT_EQ(NumTableFilesAtLevel(2), 1); - - // Step 3: read a bunch of times - for (int i = 0; i < 1000; i++) { - ASSERT_EQ("NOT_FOUND", Get("missing")); - } - - // Step 4: Wait for compaction to finish - env_->SleepForMicroseconds(1000000); - - ASSERT_EQ(NumTableFilesAtLevel(0), 0); - } while (ChangeOptions()); -} - -TEST(DBTest, IterEmpty) { - Iterator* iter = db_->NewIterator(ReadOptions()); - - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->SeekToLast(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->Seek("foo"); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - delete iter; -} - -TEST(DBTest, IterSingle) { - ASSERT_OK(Put("a", "va")); - Iterator* iter = db_->NewIterator(ReadOptions()); - - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->SeekToLast(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - iter->SeekToLast(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->Seek(""); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->Seek("a"); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->Seek("b"); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - delete iter; -} - -TEST(DBTest, IterMulti) { - ASSERT_OK(Put("a", "va")); - ASSERT_OK(Put("b", "vb")); - ASSERT_OK(Put("c", "vc")); - Iterator* iter = db_->NewIterator(ReadOptions()); - - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "b->vb"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->SeekToLast(); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "b->vb"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - iter->SeekToLast(); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->Seek(""); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Seek("a"); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Seek("ax"); - ASSERT_EQ(IterStatus(iter), "b->vb"); - iter->Seek("b"); - ASSERT_EQ(IterStatus(iter), "b->vb"); - iter->Seek("z"); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - // Switch from reverse to forward - iter->SeekToLast(); - iter->Prev(); - iter->Prev(); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "b->vb"); - - // Switch from forward to reverse - iter->SeekToFirst(); - iter->Next(); - iter->Next(); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "b->vb"); - - // Make sure iter stays at snapshot - ASSERT_OK(Put("a", "va2")); - ASSERT_OK(Put("a2", "va3")); - ASSERT_OK(Put("b", "vb2")); - ASSERT_OK(Put("c", "vc2")); - ASSERT_OK(Delete("b")); - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "b->vb"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - iter->SeekToLast(); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "b->vb"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - delete iter; -} - -TEST(DBTest, IterSmallAndLargeMix) { - ASSERT_OK(Put("a", "va")); - ASSERT_OK(Put("b", std::string(100000, 'b'))); - ASSERT_OK(Put("c", "vc")); - ASSERT_OK(Put("d", std::string(100000, 'd'))); - ASSERT_OK(Put("e", std::string(100000, 'e'))); - - Iterator* iter = db_->NewIterator(ReadOptions()); - - iter->SeekToFirst(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b')); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd')); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e')); - iter->Next(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - iter->SeekToLast(); - ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e')); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd')); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b')); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "a->va"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "(invalid)"); - - delete iter; -} - -TEST(DBTest, IterMultiWithDelete) { - do { - ASSERT_OK(Put("a", "va")); - ASSERT_OK(Put("b", "vb")); - ASSERT_OK(Put("c", "vc")); - ASSERT_OK(Delete("b")); - ASSERT_EQ("NOT_FOUND", Get("b")); - - Iterator* iter = db_->NewIterator(ReadOptions()); - iter->Seek("c"); - ASSERT_EQ(IterStatus(iter), "c->vc"); - iter->Prev(); - ASSERT_EQ(IterStatus(iter), "a->va"); - delete iter; - } while (ChangeOptions()); -} - -TEST(DBTest, Recover) { - do { - ASSERT_OK(Put("foo", "v1")); - ASSERT_OK(Put("baz", "v5")); - - Reopen(); - ASSERT_EQ("v1", Get("foo")); - - ASSERT_EQ("v1", Get("foo")); - ASSERT_EQ("v5", Get("baz")); - ASSERT_OK(Put("bar", "v2")); - ASSERT_OK(Put("foo", "v3")); - - Reopen(); - ASSERT_EQ("v3", Get("foo")); - ASSERT_OK(Put("foo", "v4")); - ASSERT_EQ("v4", Get("foo")); - ASSERT_EQ("v2", Get("bar")); - ASSERT_EQ("v5", Get("baz")); - } while (ChangeOptions()); -} - -TEST(DBTest, RecoveryWithEmptyLog) { - do { - ASSERT_OK(Put("foo", "v1")); - ASSERT_OK(Put("foo", "v2")); - Reopen(); - Reopen(); - ASSERT_OK(Put("foo", "v3")); - Reopen(); - ASSERT_EQ("v3", Get("foo")); - } while (ChangeOptions()); -} - -// Check that writes done during a memtable compaction are recovered -// if the database is shutdown during the memtable compaction. -TEST(DBTest, RecoverDuringMemtableCompaction) { - do { - Options options = CurrentOptions(); - options.env = env_; - options.write_buffer_size = 1000000; - Reopen(&options); - - // Trigger a long memtable compaction and reopen the database during it - ASSERT_OK(Put("foo", "v1")); // Goes to 1st log file - ASSERT_OK(Put("big1", std::string(10000000, 'x'))); // Fills memtable - ASSERT_OK(Put("big2", std::string(1000, 'y'))); // Triggers compaction - ASSERT_OK(Put("bar", "v2")); // Goes to new log file - - Reopen(&options); - ASSERT_EQ("v1", Get("foo")); - ASSERT_EQ("v2", Get("bar")); - ASSERT_EQ(std::string(10000000, 'x'), Get("big1")); - ASSERT_EQ(std::string(1000, 'y'), Get("big2")); - } while (ChangeOptions()); -} - -static std::string Key(int i) { - char buf[100]; - snprintf(buf, sizeof(buf), "key%06d", i); - return std::string(buf); -} - -TEST(DBTest, MinorCompactionsHappen) { - Options options = CurrentOptions(); - options.write_buffer_size = 10000; - Reopen(&options); - - const int N = 500; - - int starting_num_tables = TotalTableFiles(); - for (int i = 0; i < N; i++) { - ASSERT_OK(Put(Key(i), Key(i) + std::string(1000, 'v'))); - } - int ending_num_tables = TotalTableFiles(); - ASSERT_GT(ending_num_tables, starting_num_tables); - - for (int i = 0; i < N; i++) { - ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(Key(i))); - } - - Reopen(); - - for (int i = 0; i < N; i++) { - ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(Key(i))); - } -} - -TEST(DBTest, RecoverWithLargeLog) { - { - Options options = CurrentOptions(); - Reopen(&options); - ASSERT_OK(Put("big1", std::string(200000, '1'))); - ASSERT_OK(Put("big2", std::string(200000, '2'))); - ASSERT_OK(Put("small3", std::string(10, '3'))); - ASSERT_OK(Put("small4", std::string(10, '4'))); - ASSERT_EQ(NumTableFilesAtLevel(0), 0); - } - - // Make sure that if we re-open with a small write buffer size that - // we flush table files in the middle of a large log file. - Options options = CurrentOptions(); - options.write_buffer_size = 100000; - Reopen(&options); - ASSERT_EQ(NumTableFilesAtLevel(0), 3); - ASSERT_EQ(std::string(200000, '1'), Get("big1")); - ASSERT_EQ(std::string(200000, '2'), Get("big2")); - ASSERT_EQ(std::string(10, '3'), Get("small3")); - ASSERT_EQ(std::string(10, '4'), Get("small4")); - ASSERT_GT(NumTableFilesAtLevel(0), 1); -} - -TEST(DBTest, CompactionsGenerateMultipleFiles) { - Options options = CurrentOptions(); - options.write_buffer_size = 100000000; // Large write buffer - Reopen(&options); - - Random rnd(301); - - // Write 8MB (80 values, each 100K) - ASSERT_EQ(NumTableFilesAtLevel(0), 0); - std::vector values; - for (int i = 0; i < 80; i++) { - values.push_back(RandomString(&rnd, 100000)); - ASSERT_OK(Put(Key(i), values[i])); - } - - // Reopening moves updates to level-0 - Reopen(&options); - dbfull()->TEST_CompactRange(0, NULL, NULL); - - ASSERT_EQ(NumTableFilesAtLevel(0), 0); - ASSERT_GT(NumTableFilesAtLevel(1), 1); - for (int i = 0; i < 80; i++) { - ASSERT_EQ(Get(Key(i)), values[i]); - } -} - -TEST(DBTest, RepeatedWritesToSameKey) { - Options options = CurrentOptions(); - options.env = env_; - options.write_buffer_size = 100000; // Small write buffer - Reopen(&options); - - // We must have at most one file per level except for level-0, - // which may have up to kL0_StopWritesTrigger files. - const int kMaxFiles = config::kNumLevels + config::kL0_StopWritesTrigger; - - Random rnd(301); - std::string value = RandomString(&rnd, 2 * options.write_buffer_size); - for (int i = 0; i < 5 * kMaxFiles; i++) { - Put("key", value); - ASSERT_LE(TotalTableFiles(), kMaxFiles); - fprintf(stderr, "after %d: %d files\n", int(i+1), TotalTableFiles()); - } -} - -TEST(DBTest, SparseMerge) { - Options options = CurrentOptions(); - options.compression = kNoCompression; - Reopen(&options); - - FillLevels("A", "Z"); - - // Suppose there is: - // small amount of data with prefix A - // large amount of data with prefix B - // small amount of data with prefix C - // and that recent updates have made small changes to all three prefixes. - // Check that we do not do a compaction that merges all of B in one shot. - const std::string value(1000, 'x'); - Put("A", "va"); - // Write approximately 100MB of "B" values - for (int i = 0; i < 100000; i++) { - char key[100]; - snprintf(key, sizeof(key), "B%010d", i); - Put(key, value); - } - Put("C", "vc"); - dbfull()->TEST_CompactMemTable(); - dbfull()->TEST_CompactRange(0, NULL, NULL); - - // Make sparse update - Put("A", "va2"); - Put("B100", "bvalue2"); - Put("C", "vc2"); - dbfull()->TEST_CompactMemTable(); - - // Compactions should not cause us to create a situation where - // a file overlaps too much data at the next level. - ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576); - dbfull()->TEST_CompactRange(0, NULL, NULL); - ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576); - dbfull()->TEST_CompactRange(1, NULL, NULL); - ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576); -} - -static bool Between(uint64_t val, uint64_t low, uint64_t high) { - bool result = (val >= low) && (val <= high); - if (!result) { - fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n", - (unsigned long long)(val), - (unsigned long long)(low), - (unsigned long long)(high)); - } - return result; -} - -TEST(DBTest, ApproximateSizes) { - do { - Options options = CurrentOptions(); - options.write_buffer_size = 100000000; // Large write buffer - options.compression = kNoCompression; - DestroyAndReopen(); - - ASSERT_TRUE(Between(Size("", "xyz"), 0, 0)); - Reopen(&options); - ASSERT_TRUE(Between(Size("", "xyz"), 0, 0)); - - // Write 8MB (80 values, each 100K) - ASSERT_EQ(NumTableFilesAtLevel(0), 0); - const int N = 80; - static const int S1 = 100000; - static const int S2 = 105000; // Allow some expansion from metadata - Random rnd(301); - for (int i = 0; i < N; i++) { - ASSERT_OK(Put(Key(i), RandomString(&rnd, S1))); - } - - // 0 because GetApproximateSizes() does not account for memtable space - ASSERT_TRUE(Between(Size("", Key(50)), 0, 0)); - - // Check sizes across recovery by reopening a few times - for (int run = 0; run < 3; run++) { - Reopen(&options); - - for (int compact_start = 0; compact_start < N; compact_start += 10) { - for (int i = 0; i < N; i += 10) { - ASSERT_TRUE(Between(Size("", Key(i)), S1*i, S2*i)); - ASSERT_TRUE(Between(Size("", Key(i)+".suffix"), S1*(i+1), S2*(i+1))); - ASSERT_TRUE(Between(Size(Key(i), Key(i+10)), S1*10, S2*10)); - } - ASSERT_TRUE(Between(Size("", Key(50)), S1*50, S2*50)); - ASSERT_TRUE(Between(Size("", Key(50)+".suffix"), S1*50, S2*50)); - - std::string cstart_str = Key(compact_start); - std::string cend_str = Key(compact_start + 9); - Slice cstart = cstart_str; - Slice cend = cend_str; - dbfull()->TEST_CompactRange(0, &cstart, &cend); - } - - ASSERT_EQ(NumTableFilesAtLevel(0), 0); - ASSERT_GT(NumTableFilesAtLevel(1), 0); - } - } while (ChangeOptions()); -} - -TEST(DBTest, ApproximateSizes_MixOfSmallAndLarge) { - do { - Options options = CurrentOptions(); - options.compression = kNoCompression; - Reopen(); - - Random rnd(301); - std::string big1 = RandomString(&rnd, 100000); - ASSERT_OK(Put(Key(0), RandomString(&rnd, 10000))); - ASSERT_OK(Put(Key(1), RandomString(&rnd, 10000))); - ASSERT_OK(Put(Key(2), big1)); - ASSERT_OK(Put(Key(3), RandomString(&rnd, 10000))); - ASSERT_OK(Put(Key(4), big1)); - ASSERT_OK(Put(Key(5), RandomString(&rnd, 10000))); - ASSERT_OK(Put(Key(6), RandomString(&rnd, 300000))); - ASSERT_OK(Put(Key(7), RandomString(&rnd, 10000))); - - // Check sizes across recovery by reopening a few times - for (int run = 0; run < 3; run++) { - Reopen(&options); - - ASSERT_TRUE(Between(Size("", Key(0)), 0, 0)); - ASSERT_TRUE(Between(Size("", Key(1)), 10000, 11000)); - ASSERT_TRUE(Between(Size("", Key(2)), 20000, 21000)); - ASSERT_TRUE(Between(Size("", Key(3)), 120000, 121000)); - ASSERT_TRUE(Between(Size("", Key(4)), 130000, 131000)); - ASSERT_TRUE(Between(Size("", Key(5)), 230000, 231000)); - ASSERT_TRUE(Between(Size("", Key(6)), 240000, 241000)); - ASSERT_TRUE(Between(Size("", Key(7)), 540000, 541000)); - ASSERT_TRUE(Between(Size("", Key(8)), 550000, 560000)); - - ASSERT_TRUE(Between(Size(Key(3), Key(5)), 110000, 111000)); - - dbfull()->TEST_CompactRange(0, NULL, NULL); - } - } while (ChangeOptions()); -} - -TEST(DBTest, IteratorPinsRef) { - Put("foo", "hello"); - - // Get iterator that will yield the current contents of the DB. - Iterator* iter = db_->NewIterator(ReadOptions()); - - // Write to force compactions - Put("foo", "newvalue1"); - for (int i = 0; i < 100; i++) { - ASSERT_OK(Put(Key(i), Key(i) + std::string(100000, 'v'))); // 100K values - } - Put("foo", "newvalue2"); - - iter->SeekToFirst(); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ("foo", iter->key().ToString()); - ASSERT_EQ("hello", iter->value().ToString()); - iter->Next(); - ASSERT_TRUE(!iter->Valid()); - delete iter; -} - -TEST(DBTest, Snapshot) { - do { - Put("foo", "v1"); - const Snapshot* s1 = db_->GetSnapshot(); - Put("foo", "v2"); - const Snapshot* s2 = db_->GetSnapshot(); - Put("foo", "v3"); - const Snapshot* s3 = db_->GetSnapshot(); - - Put("foo", "v4"); - ASSERT_EQ("v1", Get("foo", s1)); - ASSERT_EQ("v2", Get("foo", s2)); - ASSERT_EQ("v3", Get("foo", s3)); - ASSERT_EQ("v4", Get("foo")); - - db_->ReleaseSnapshot(s3); - ASSERT_EQ("v1", Get("foo", s1)); - ASSERT_EQ("v2", Get("foo", s2)); - ASSERT_EQ("v4", Get("foo")); - - db_->ReleaseSnapshot(s1); - ASSERT_EQ("v2", Get("foo", s2)); - ASSERT_EQ("v4", Get("foo")); - - db_->ReleaseSnapshot(s2); - ASSERT_EQ("v4", Get("foo")); - } while (ChangeOptions()); -} - -TEST(DBTest, HiddenValuesAreRemoved) { - do { - Random rnd(301); - FillLevels("a", "z"); - - std::string big = RandomString(&rnd, 50000); - Put("foo", big); - Put("pastfoo", "v"); - const Snapshot* snapshot = db_->GetSnapshot(); - Put("foo", "tiny"); - Put("pastfoo2", "v2"); // Advance sequence number one more - - ASSERT_OK(dbfull()->TEST_CompactMemTable()); - ASSERT_GT(NumTableFilesAtLevel(0), 0); - - ASSERT_EQ(big, Get("foo", snapshot)); - ASSERT_TRUE(Between(Size("", "pastfoo"), 50000, 60000)); - db_->ReleaseSnapshot(snapshot); - ASSERT_EQ(AllEntriesFor("foo"), "[ tiny, " + big + " ]"); - Slice x("x"); - dbfull()->TEST_CompactRange(0, NULL, &x); - ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]"); - ASSERT_EQ(NumTableFilesAtLevel(0), 0); - ASSERT_GE(NumTableFilesAtLevel(1), 1); - dbfull()->TEST_CompactRange(1, NULL, &x); - ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]"); - - ASSERT_TRUE(Between(Size("", "pastfoo"), 0, 1000)); - } while (ChangeOptions()); -} - -TEST(DBTest, DeletionMarkers1) { - Put("foo", "v1"); - ASSERT_OK(dbfull()->TEST_CompactMemTable()); - const int last = config::kMaxMemCompactLevel; - ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo => v1 is now in last level - - // Place a table at level last-1 to prevent merging with preceding mutation - Put("a", "begin"); - Put("z", "end"); - dbfull()->TEST_CompactMemTable(); - ASSERT_EQ(NumTableFilesAtLevel(last), 1); - ASSERT_EQ(NumTableFilesAtLevel(last-1), 1); - - Delete("foo"); - Put("foo", "v2"); - ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]"); - ASSERT_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2 - ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]"); - Slice z("z"); - dbfull()->TEST_CompactRange(last-2, NULL, &z); - // DEL eliminated, but v1 remains because we aren't compacting that level - // (DEL can be eliminated because v2 hides v1). - ASSERT_EQ(AllEntriesFor("foo"), "[ v2, v1 ]"); - dbfull()->TEST_CompactRange(last-1, NULL, NULL); - // Merging last-1 w/ last, so we are the base level for "foo", so - // DEL is removed. (as is v1). - ASSERT_EQ(AllEntriesFor("foo"), "[ v2 ]"); -} - -TEST(DBTest, DeletionMarkers2) { - Put("foo", "v1"); - ASSERT_OK(dbfull()->TEST_CompactMemTable()); - const int last = config::kMaxMemCompactLevel; - ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo => v1 is now in last level - - // Place a table at level last-1 to prevent merging with preceding mutation - Put("a", "begin"); - Put("z", "end"); - dbfull()->TEST_CompactMemTable(); - ASSERT_EQ(NumTableFilesAtLevel(last), 1); - ASSERT_EQ(NumTableFilesAtLevel(last-1), 1); - - Delete("foo"); - ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]"); - ASSERT_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2 - ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]"); - dbfull()->TEST_CompactRange(last-2, NULL, NULL); - // DEL kept: "last" file overlaps - ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]"); - dbfull()->TEST_CompactRange(last-1, NULL, NULL); - // Merging last-1 w/ last, so we are the base level for "foo", so - // DEL is removed. (as is v1). - ASSERT_EQ(AllEntriesFor("foo"), "[ ]"); -} - -TEST(DBTest, OverlapInLevel0) { - do { - ASSERT_EQ(config::kMaxMemCompactLevel, 2) << "Fix test to match config"; - - // Fill levels 1 and 2 to disable the pushing of new memtables to levels > 0. - ASSERT_OK(Put("100", "v100")); - ASSERT_OK(Put("999", "v999")); - dbfull()->TEST_CompactMemTable(); - ASSERT_OK(Delete("100")); - ASSERT_OK(Delete("999")); - dbfull()->TEST_CompactMemTable(); - ASSERT_EQ("0,1,1", FilesPerLevel()); - - // Make files spanning the following ranges in level-0: - // files[0] 200 .. 900 - // files[1] 300 .. 500 - // Note that files are sorted by smallest key. - ASSERT_OK(Put("300", "v300")); - ASSERT_OK(Put("500", "v500")); - dbfull()->TEST_CompactMemTable(); - ASSERT_OK(Put("200", "v200")); - ASSERT_OK(Put("600", "v600")); - ASSERT_OK(Put("900", "v900")); - dbfull()->TEST_CompactMemTable(); - ASSERT_EQ("2,1,1", FilesPerLevel()); - - // Compact away the placeholder files we created initially - dbfull()->TEST_CompactRange(1, NULL, NULL); - dbfull()->TEST_CompactRange(2, NULL, NULL); - ASSERT_EQ("2", FilesPerLevel()); - - // Do a memtable compaction. Before bug-fix, the compaction would - // not detect the overlap with level-0 files and would incorrectly place - // the deletion in a deeper level. - ASSERT_OK(Delete("600")); - dbfull()->TEST_CompactMemTable(); - ASSERT_EQ("3", FilesPerLevel()); - ASSERT_EQ("NOT_FOUND", Get("600")); - } while (ChangeOptions()); -} - -TEST(DBTest, L0_CompactionBug_Issue44_a) { - Reopen(); - ASSERT_OK(Put("b", "v")); - Reopen(); - ASSERT_OK(Delete("b")); - ASSERT_OK(Delete("a")); - Reopen(); - ASSERT_OK(Delete("a")); - Reopen(); - ASSERT_OK(Put("a", "v")); - Reopen(); - Reopen(); - ASSERT_EQ("(a->v)", Contents()); - env_->SleepForMicroseconds(1000000); // Wait for compaction to finish - ASSERT_EQ("(a->v)", Contents()); -} - -TEST(DBTest, L0_CompactionBug_Issue44_b) { - Reopen(); - Put("",""); - Reopen(); - Delete("e"); - Put("",""); - Reopen(); - Put("c", "cv"); - Reopen(); - Put("",""); - Reopen(); - Put("",""); - env_->SleepForMicroseconds(1000000); // Wait for compaction to finish - Reopen(); - Put("d","dv"); - Reopen(); - Put("",""); - Reopen(); - Delete("d"); - Delete("b"); - Reopen(); - ASSERT_EQ("(->)(c->cv)", Contents()); - env_->SleepForMicroseconds(1000000); // Wait for compaction to finish - ASSERT_EQ("(->)(c->cv)", Contents()); -} - -TEST(DBTest, ComparatorCheck) { - class NewComparator : public Comparator { - public: - virtual const char* Name() const { return "leveldb.NewComparator"; } - virtual int Compare(const Slice& a, const Slice& b) const { - return BytewiseComparator()->Compare(a, b); - } - virtual void FindShortestSeparator(std::string* s, const Slice& l) const { - BytewiseComparator()->FindShortestSeparator(s, l); - } - virtual void FindShortSuccessor(std::string* key) const { - BytewiseComparator()->FindShortSuccessor(key); - } - }; - NewComparator cmp; - Options new_options = CurrentOptions(); - new_options.comparator = &cmp; - Status s = TryReopen(&new_options); - ASSERT_TRUE(!s.ok()); - ASSERT_TRUE(s.ToString().find("comparator") != std::string::npos) - << s.ToString(); -} - -TEST(DBTest, CustomComparator) { - class NumberComparator : public Comparator { - public: - virtual const char* Name() const { return "test.NumberComparator"; } - virtual int Compare(const Slice& a, const Slice& b) const { - return ToNumber(a) - ToNumber(b); - } - virtual void FindShortestSeparator(std::string* s, const Slice& l) const { - ToNumber(*s); // Check format - ToNumber(l); // Check format - } - virtual void FindShortSuccessor(std::string* key) const { - ToNumber(*key); // Check format - } - private: - static int ToNumber(const Slice& x) { - // Check that there are no extra characters. - ASSERT_TRUE(x.size() >= 2 && x[0] == '[' && x[x.size()-1] == ']') - << EscapeString(x); - int val; - char ignored; - ASSERT_TRUE(sscanf(x.ToString().c_str(), "[%i]%c", &val, &ignored) == 1) - << EscapeString(x); - return val; - } - }; - NumberComparator cmp; - Options new_options = CurrentOptions(); - new_options.create_if_missing = true; - new_options.comparator = &cmp; - new_options.filter_policy = NULL; // Cannot use bloom filters - new_options.write_buffer_size = 1000; // Compact more often - DestroyAndReopen(&new_options); - ASSERT_OK(Put("[10]", "ten")); - ASSERT_OK(Put("[0x14]", "twenty")); - for (int i = 0; i < 2; i++) { - ASSERT_EQ("ten", Get("[10]")); - ASSERT_EQ("ten", Get("[0xa]")); - ASSERT_EQ("twenty", Get("[20]")); - ASSERT_EQ("twenty", Get("[0x14]")); - ASSERT_EQ("NOT_FOUND", Get("[15]")); - ASSERT_EQ("NOT_FOUND", Get("[0xf]")); - Compact("[0]", "[9999]"); - } - - for (int run = 0; run < 2; run++) { - for (int i = 0; i < 1000; i++) { - char buf[100]; - snprintf(buf, sizeof(buf), "[%d]", i*10); - ASSERT_OK(Put(buf, buf)); - } - Compact("[0]", "[1000000]"); - } -} - -TEST(DBTest, ManualCompaction) { - ASSERT_EQ(config::kMaxMemCompactLevel, 2) - << "Need to update this test to match kMaxMemCompactLevel"; - - MakeTables(3, "p", "q"); - ASSERT_EQ("1,1,1", FilesPerLevel()); - - // Compaction range falls before files - Compact("", "c"); - ASSERT_EQ("1,1,1", FilesPerLevel()); - - // Compaction range falls after files - Compact("r", "z"); - ASSERT_EQ("1,1,1", FilesPerLevel()); - - // Compaction range overlaps files - Compact("p1", "p9"); - ASSERT_EQ("0,0,1", FilesPerLevel()); - - // Populate a different range - MakeTables(3, "c", "e"); - ASSERT_EQ("1,1,2", FilesPerLevel()); - - // Compact just the new range - Compact("b", "f"); - ASSERT_EQ("0,0,2", FilesPerLevel()); - - // Compact all - MakeTables(1, "a", "z"); - ASSERT_EQ("0,1,2", FilesPerLevel()); - db_->CompactRange(NULL, NULL); - ASSERT_EQ("0,0,1", FilesPerLevel()); -} - -TEST(DBTest, DBOpen_Options) { - std::string dbname = test::TmpDir() + "/db_options_test"; - DestroyDB(dbname, Options()); - - // Does not exist, and create_if_missing == false: error - DB* db = NULL; - Options opts; - opts.create_if_missing = false; - Status s = DB::Open(opts, dbname, &db); - ASSERT_TRUE(strstr(s.ToString().c_str(), "does not exist") != NULL); - ASSERT_TRUE(db == NULL); - - // Does not exist, and create_if_missing == true: OK - opts.create_if_missing = true; - s = DB::Open(opts, dbname, &db); - ASSERT_OK(s); - ASSERT_TRUE(db != NULL); - - delete db; - db = NULL; - - // Does exist, and error_if_exists == true: error - opts.create_if_missing = false; - opts.error_if_exists = true; - s = DB::Open(opts, dbname, &db); - ASSERT_TRUE(strstr(s.ToString().c_str(), "exists") != NULL); - ASSERT_TRUE(db == NULL); - - // Does exist, and error_if_exists == false: OK - opts.create_if_missing = true; - opts.error_if_exists = false; - s = DB::Open(opts, dbname, &db); - ASSERT_OK(s); - ASSERT_TRUE(db != NULL); - - delete db; - db = NULL; -} - -// Check that number of files does not grow when we are out of space -TEST(DBTest, NoSpace) { - Options options = CurrentOptions(); - options.env = env_; - Reopen(&options); - - ASSERT_OK(Put("foo", "v1")); - ASSERT_EQ("v1", Get("foo")); - Compact("a", "z"); - const int num_files = CountFiles(); - env_->no_space_.Release_Store(env_); // Force out-of-space errors - env_->sleep_counter_.Reset(); - for (int i = 0; i < 5; i++) { - for (int level = 0; level < config::kNumLevels-1; level++) { - dbfull()->TEST_CompactRange(level, NULL, NULL); - } - } - env_->no_space_.Release_Store(NULL); - ASSERT_LT(CountFiles(), num_files + 3); - - // Check that compaction attempts slept after errors - ASSERT_GE(env_->sleep_counter_.Read(), 5); -} - -TEST(DBTest, NonWritableFileSystem) { - Options options = CurrentOptions(); - options.write_buffer_size = 1000; - options.env = env_; - Reopen(&options); - ASSERT_OK(Put("foo", "v1")); - env_->non_writable_.Release_Store(env_); // Force errors for new files - std::string big(100000, 'x'); - int errors = 0; - for (int i = 0; i < 20; i++) { - fprintf(stderr, "iter %d; errors %d\n", i, errors); - if (!Put("foo", big).ok()) { - errors++; - env_->SleepForMicroseconds(100000); - } - } - ASSERT_GT(errors, 0); - env_->non_writable_.Release_Store(NULL); -} - -TEST(DBTest, FilesDeletedAfterCompaction) { - ASSERT_OK(Put("foo", "v2")); - Compact("a", "z"); - const int num_files = CountFiles(); - for (int i = 0; i < 10; i++) { - ASSERT_OK(Put("foo", "v2")); - Compact("a", "z"); - } - ASSERT_EQ(CountFiles(), num_files); -} - -TEST(DBTest, BloomFilter) { - env_->count_random_reads_ = true; - Options options = CurrentOptions(); - options.env = env_; - options.block_cache = NewLRUCache(0); // Prevent cache hits - options.filter_policy = NewBloomFilterPolicy(10); - Reopen(&options); - - // Populate multiple layers - const int N = 10000; - for (int i = 0; i < N; i++) { - ASSERT_OK(Put(Key(i), Key(i))); - } - Compact("a", "z"); - for (int i = 0; i < N; i += 100) { - ASSERT_OK(Put(Key(i), Key(i))); - } - dbfull()->TEST_CompactMemTable(); - - // Prevent auto compactions triggered by seeks - env_->delay_sstable_sync_.Release_Store(env_); - - // Lookup present keys. Should rarely read from small sstable. - env_->random_read_counter_.Reset(); - for (int i = 0; i < N; i++) { - ASSERT_EQ(Key(i), Get(Key(i))); - } - int reads = env_->random_read_counter_.Read(); - fprintf(stderr, "%d present => %d reads\n", N, reads); - ASSERT_GE(reads, N); - ASSERT_LE(reads, N + 2*N/100); - - // Lookup present keys. Should rarely read from either sstable. - env_->random_read_counter_.Reset(); - for (int i = 0; i < N; i++) { - ASSERT_EQ("NOT_FOUND", Get(Key(i) + ".missing")); - } - reads = env_->random_read_counter_.Read(); - fprintf(stderr, "%d missing => %d reads\n", N, reads); - ASSERT_LE(reads, 3*N/100); - - env_->delay_sstable_sync_.Release_Store(NULL); - Close(); - delete options.block_cache; - delete options.filter_policy; -} - -// Multi-threaded test: -namespace { - -static const int kNumThreads = 4; -static const int kTestSeconds = 10; -static const int kNumKeys = 1000; - -struct MTState { - DBTest* test; - port::AtomicPointer stop; - port::AtomicPointer counter[kNumThreads]; - port::AtomicPointer thread_done[kNumThreads]; -}; - -struct MTThread { - MTState* state; - int id; -}; - -static void MTThreadBody(void* arg) { - MTThread* t = reinterpret_cast(arg); - int id = t->id; - DB* db = t->state->test->db_; - uintptr_t counter = 0; - fprintf(stderr, "... starting thread %d\n", id); - Random rnd(1000 + id); - std::string value; - char valbuf[1500]; - while (t->state->stop.Acquire_Load() == NULL) { - t->state->counter[id].Release_Store(reinterpret_cast(counter)); - - int key = rnd.Uniform(kNumKeys); - char keybuf[20]; - snprintf(keybuf, sizeof(keybuf), "%016d", key); - - if (rnd.OneIn(2)) { - // Write values of the form . - // We add some padding for force compactions. - snprintf(valbuf, sizeof(valbuf), "%d.%d.%-1000d", - key, id, static_cast(counter)); - ASSERT_OK(db->Put(WriteOptions(), Slice(keybuf), Slice(valbuf))); - } else { - // Read a value and verify that it matches the pattern written above. - Status s = db->Get(ReadOptions(), Slice(keybuf), &value); - if (s.IsNotFound()) { - // Key has not yet been written - } else { - // Check that the writer thread counter is >= the counter in the value - ASSERT_OK(s); - int k, w, c; - ASSERT_EQ(3, sscanf(value.c_str(), "%d.%d.%d", &k, &w, &c)) << value; - ASSERT_EQ(k, key); - ASSERT_GE(w, 0); - ASSERT_LT(w, kNumThreads); - ASSERT_LE(c, reinterpret_cast( - t->state->counter[w].Acquire_Load())); - } - } - counter++; - } - t->state->thread_done[id].Release_Store(t); - fprintf(stderr, "... stopping thread %d after %d ops\n", id, int(counter)); -} - -} // namespace - -TEST(DBTest, MultiThreaded) { - do { - // Initialize state - MTState mt; - mt.test = this; - mt.stop.Release_Store(0); - for (int id = 0; id < kNumThreads; id++) { - mt.counter[id].Release_Store(0); - mt.thread_done[id].Release_Store(0); - } - - // Start threads - MTThread thread[kNumThreads]; - for (int id = 0; id < kNumThreads; id++) { - thread[id].state = &mt; - thread[id].id = id; - env_->StartThread(MTThreadBody, &thread[id]); - } - - // Let them run for a while - env_->SleepForMicroseconds(kTestSeconds * 1000000); - - // Stop the threads and wait for them to finish - mt.stop.Release_Store(&mt); - for (int id = 0; id < kNumThreads; id++) { - while (mt.thread_done[id].Acquire_Load() == NULL) { - env_->SleepForMicroseconds(100000); - } - } - } while (ChangeOptions()); -} - -namespace { -typedef std::map KVMap; -} - -class ModelDB: public DB { - public: - class ModelSnapshot : public Snapshot { - public: - KVMap map_; - }; - - explicit ModelDB(const Options& options): options_(options) { } - ~ModelDB() { } - virtual Status Put(const WriteOptions& o, const Slice& k, const Slice& v) { - return DB::Put(o, k, v); - } - virtual Status Delete(const WriteOptions& o, const Slice& key) { - return DB::Delete(o, key); - } - virtual Status Get(const ReadOptions& options, - const Slice& key, std::string* value) { - assert(false); // Not implemented - return Status::NotFound(key); - } - virtual Iterator* NewIterator(const ReadOptions& options) { - if (options.snapshot == NULL) { - KVMap* saved = new KVMap; - *saved = map_; - return new ModelIter(saved, true); - } else { - const KVMap* snapshot_state = - &(reinterpret_cast(options.snapshot)->map_); - return new ModelIter(snapshot_state, false); - } - } - virtual const Snapshot* GetSnapshot() { - ModelSnapshot* snapshot = new ModelSnapshot; - snapshot->map_ = map_; - return snapshot; - } - - virtual void ReleaseSnapshot(const Snapshot* snapshot) { - delete reinterpret_cast(snapshot); - } - virtual Status Write(const WriteOptions& options, WriteBatch* batch) { - class Handler : public WriteBatch::Handler { - public: - KVMap* map_; - virtual void Put(const Slice& key, const Slice& value) { - (*map_)[key.ToString()] = value.ToString(); - } - virtual void Delete(const Slice& key) { - map_->erase(key.ToString()); - } - }; - Handler handler; - handler.map_ = &map_; - return batch->Iterate(&handler); - } - - virtual bool GetProperty(const Slice& property, std::string* value) { - return false; - } - virtual void GetApproximateSizes(const Range* r, int n, uint64_t* sizes) { - for (int i = 0; i < n; i++) { - sizes[i] = 0; - } - } - virtual void CompactRange(const Slice* start, const Slice* end) { - } - - private: - class ModelIter: public Iterator { - public: - ModelIter(const KVMap* map, bool owned) - : map_(map), owned_(owned), iter_(map_->end()) { - } - ~ModelIter() { - if (owned_) delete map_; - } - virtual bool Valid() const { return iter_ != map_->end(); } - virtual void SeekToFirst() { iter_ = map_->begin(); } - virtual void SeekToLast() { - if (map_->empty()) { - iter_ = map_->end(); - } else { - iter_ = map_->find(map_->rbegin()->first); - } - } - virtual void Seek(const Slice& k) { - iter_ = map_->lower_bound(k.ToString()); - } - virtual void Next() { ++iter_; } - virtual void Prev() { --iter_; } - virtual Slice key() const { return iter_->first; } - virtual Slice value() const { return iter_->second; } - virtual Status status() const { return Status::OK(); } - private: - const KVMap* const map_; - const bool owned_; // Do we own map_ - KVMap::const_iterator iter_; - }; - const Options options_; - KVMap map_; -}; - -static std::string RandomKey(Random* rnd) { - int len = (rnd->OneIn(3) - ? 1 // Short sometimes to encourage collisions - : (rnd->OneIn(100) ? rnd->Skewed(10) : rnd->Uniform(10))); - return test::RandomKey(rnd, len); -} - -static bool CompareIterators(int step, - DB* model, - DB* db, - const Snapshot* model_snap, - const Snapshot* db_snap) { - ReadOptions options; - options.snapshot = model_snap; - Iterator* miter = model->NewIterator(options); - options.snapshot = db_snap; - Iterator* dbiter = db->NewIterator(options); - bool ok = true; - int count = 0; - for (miter->SeekToFirst(), dbiter->SeekToFirst(); - ok && miter->Valid() && dbiter->Valid(); - miter->Next(), dbiter->Next()) { - count++; - if (miter->key().compare(dbiter->key()) != 0) { - fprintf(stderr, "step %d: Key mismatch: '%s' vs. '%s'\n", - step, - EscapeString(miter->key()).c_str(), - EscapeString(dbiter->key()).c_str()); - ok = false; - break; - } - - if (miter->value().compare(dbiter->value()) != 0) { - fprintf(stderr, "step %d: Value mismatch for key '%s': '%s' vs. '%s'\n", - step, - EscapeString(miter->key()).c_str(), - EscapeString(miter->value()).c_str(), - EscapeString(miter->value()).c_str()); - ok = false; - } - } - - if (ok) { - if (miter->Valid() != dbiter->Valid()) { - fprintf(stderr, "step %d: Mismatch at end of iterators: %d vs. %d\n", - step, miter->Valid(), dbiter->Valid()); - ok = false; - } - } - fprintf(stderr, "%d entries compared: ok=%d\n", count, ok); - delete miter; - delete dbiter; - return ok; -} - -TEST(DBTest, Randomized) { - Random rnd(test::RandomSeed()); - do { - ModelDB model(CurrentOptions()); - const int N = 10000; - const Snapshot* model_snap = NULL; - const Snapshot* db_snap = NULL; - std::string k, v; - for (int step = 0; step < N; step++) { - if (step % 100 == 0) { - fprintf(stderr, "Step %d of %d\n", step, N); - } - // TODO(sanjay): Test Get() works - int p = rnd.Uniform(100); - if (p < 45) { // Put - k = RandomKey(&rnd); - v = RandomString(&rnd, - rnd.OneIn(20) - ? 100 + rnd.Uniform(100) - : rnd.Uniform(8)); - ASSERT_OK(model.Put(WriteOptions(), k, v)); - ASSERT_OK(db_->Put(WriteOptions(), k, v)); - - } else if (p < 90) { // Delete - k = RandomKey(&rnd); - ASSERT_OK(model.Delete(WriteOptions(), k)); - ASSERT_OK(db_->Delete(WriteOptions(), k)); - - - } else { // Multi-element batch - WriteBatch b; - const int num = rnd.Uniform(8); - for (int i = 0; i < num; i++) { - if (i == 0 || !rnd.OneIn(10)) { - k = RandomKey(&rnd); - } else { - // Periodically re-use the same key from the previous iter, so - // we have multiple entries in the write batch for the same key - } - if (rnd.OneIn(2)) { - v = RandomString(&rnd, rnd.Uniform(10)); - b.Put(k, v); - } else { - b.Delete(k); - } - } - ASSERT_OK(model.Write(WriteOptions(), &b)); - ASSERT_OK(db_->Write(WriteOptions(), &b)); - } - - if ((step % 100) == 0) { - ASSERT_TRUE(CompareIterators(step, &model, db_, NULL, NULL)); - ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap)); - // Save a snapshot from each DB this time that we'll use next - // time we compare things, to make sure the current state is - // preserved with the snapshot - if (model_snap != NULL) model.ReleaseSnapshot(model_snap); - if (db_snap != NULL) db_->ReleaseSnapshot(db_snap); - - Reopen(); - ASSERT_TRUE(CompareIterators(step, &model, db_, NULL, NULL)); - - model_snap = model.GetSnapshot(); - db_snap = db_->GetSnapshot(); - } - } - if (model_snap != NULL) model.ReleaseSnapshot(model_snap); - if (db_snap != NULL) db_->ReleaseSnapshot(db_snap); - } while (ChangeOptions()); -} - -std::string MakeKey(unsigned int num) { - char buf[30]; - snprintf(buf, sizeof(buf), "%016u", num); - return std::string(buf); -} - -void BM_LogAndApply(int iters, int num_base_files) { - std::string dbname = test::TmpDir() + "/leveldb_test_benchmark"; - DestroyDB(dbname, Options()); - - DB* db = NULL; - Options opts; - opts.create_if_missing = true; - Status s = DB::Open(opts, dbname, &db); - ASSERT_OK(s); - ASSERT_TRUE(db != NULL); - - delete db; - db = NULL; - - Env* env = Env::Default(); - - port::Mutex mu; - MutexLock l(&mu); - - InternalKeyComparator cmp(BytewiseComparator()); - Options options; - VersionSet vset(dbname, &options, NULL, &cmp); - ASSERT_OK(vset.Recover()); - VersionEdit vbase; - uint64_t fnum = 1; - for (int i = 0; i < num_base_files; i++) { - InternalKey start(MakeKey(2*fnum), 1, kTypeValue); - InternalKey limit(MakeKey(2*fnum+1), 1, kTypeDeletion); - vbase.AddFile(2, fnum++, 1 /* file size */, start, limit); - } - ASSERT_OK(vset.LogAndApply(&vbase, &mu)); - - uint64_t start_micros = env->NowMicros(); - - for (int i = 0; i < iters; i++) { - VersionEdit vedit; - vedit.DeleteFile(2, fnum); - InternalKey start(MakeKey(2*fnum), 1, kTypeValue); - InternalKey limit(MakeKey(2*fnum+1), 1, kTypeDeletion); - vedit.AddFile(2, fnum++, 1 /* file size */, start, limit); - vset.LogAndApply(&vedit, &mu); - } - uint64_t stop_micros = env->NowMicros(); - unsigned int us = stop_micros - start_micros; - char buf[16]; - snprintf(buf, sizeof(buf), "%d", num_base_files); - fprintf(stderr, - "BM_LogAndApply/%-6s %8d iters : %9u us (%7.0f us / iter)\n", - buf, iters, us, ((float)us) / iters); -} - -} // namespace leveldb - -int main(int argc, char** argv) { - if (argc > 1 && std::string(argv[1]) == "--benchmark") { - leveldb::BM_LogAndApply(1000, 1); - leveldb::BM_LogAndApply(1000, 100); - leveldb::BM_LogAndApply(1000, 10000); - leveldb::BM_LogAndApply(100, 100000); - return 0; - } - - return leveldb::test::RunAllTests(); -} diff --git a/leveldb/db/dbformat.cc b/leveldb/db/dbformat.cc deleted file mode 100644 index 28e11b3..0000000 --- a/leveldb/db/dbformat.cc +++ /dev/null @@ -1,140 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include -#include "db/dbformat.h" -#include "port/port.h" -#include "util/coding.h" - -namespace leveldb { - -static uint64_t PackSequenceAndType(uint64_t seq, ValueType t) { - assert(seq <= kMaxSequenceNumber); - assert(t <= kValueTypeForSeek); - return (seq << 8) | t; -} - -void AppendInternalKey(std::string* result, const ParsedInternalKey& key) { - result->append(key.user_key.data(), key.user_key.size()); - PutFixed64(result, PackSequenceAndType(key.sequence, key.type)); -} - -std::string ParsedInternalKey::DebugString() const { - char buf[50]; - snprintf(buf, sizeof(buf), "' @ %llu : %d", - (unsigned long long) sequence, - int(type)); - std::string result = "'"; - result += user_key.ToString(); - result += buf; - return result; -} - -std::string InternalKey::DebugString() const { - std::string result; - ParsedInternalKey parsed; - if (ParseInternalKey(rep_, &parsed)) { - result = parsed.DebugString(); - } else { - result = "(bad)"; - result.append(EscapeString(rep_)); - } - return result; -} - -const char* InternalKeyComparator::Name() const { - return "leveldb.InternalKeyComparator"; -} - -int InternalKeyComparator::Compare(const Slice& akey, const Slice& bkey) const { - // Order by: - // increasing user key (according to user-supplied comparator) - // decreasing sequence number - // decreasing type (though sequence# should be enough to disambiguate) - int r = user_comparator_->Compare(ExtractUserKey(akey), ExtractUserKey(bkey)); - if (r == 0) { - const uint64_t anum = DecodeFixed64(akey.data() + akey.size() - 8); - const uint64_t bnum = DecodeFixed64(bkey.data() + bkey.size() - 8); - if (anum > bnum) { - r = -1; - } else if (anum < bnum) { - r = +1; - } - } - return r; -} - -void InternalKeyComparator::FindShortestSeparator( - std::string* start, - const Slice& limit) const { - // Attempt to shorten the user portion of the key - Slice user_start = ExtractUserKey(*start); - Slice user_limit = ExtractUserKey(limit); - std::string tmp(user_start.data(), user_start.size()); - user_comparator_->FindShortestSeparator(&tmp, user_limit); - if (tmp.size() < user_start.size() && - user_comparator_->Compare(user_start, tmp) < 0) { - // User key has become shorter physically, but larger logically. - // Tack on the earliest possible number to the shortened user key. - PutFixed64(&tmp, PackSequenceAndType(kMaxSequenceNumber,kValueTypeForSeek)); - assert(this->Compare(*start, tmp) < 0); - assert(this->Compare(tmp, limit) < 0); - start->swap(tmp); - } -} - -void InternalKeyComparator::FindShortSuccessor(std::string* key) const { - Slice user_key = ExtractUserKey(*key); - std::string tmp(user_key.data(), user_key.size()); - user_comparator_->FindShortSuccessor(&tmp); - if (tmp.size() < user_key.size() && - user_comparator_->Compare(user_key, tmp) < 0) { - // User key has become shorter physically, but larger logically. - // Tack on the earliest possible number to the shortened user key. - PutFixed64(&tmp, PackSequenceAndType(kMaxSequenceNumber,kValueTypeForSeek)); - assert(this->Compare(*key, tmp) < 0); - key->swap(tmp); - } -} - -const char* InternalFilterPolicy::Name() const { - return user_policy_->Name(); -} - -void InternalFilterPolicy::CreateFilter(const Slice* keys, int n, - std::string* dst) const { - // We rely on the fact that the code in table.cc does not mind us - // adjusting keys[]. - Slice* mkey = const_cast(keys); - for (int i = 0; i < n; i++) { - mkey[i] = ExtractUserKey(keys[i]); - // TODO(sanjay): Suppress dups? - } - user_policy_->CreateFilter(keys, n, dst); -} - -bool InternalFilterPolicy::KeyMayMatch(const Slice& key, const Slice& f) const { - return user_policy_->KeyMayMatch(ExtractUserKey(key), f); -} - -LookupKey::LookupKey(const Slice& user_key, SequenceNumber s) { - size_t usize = user_key.size(); - size_t needed = usize + 13; // A conservative estimate - char* dst; - if (needed <= sizeof(space_)) { - dst = space_; - } else { - dst = new char[needed]; - } - start_ = dst; - dst = EncodeVarint32(dst, usize + 8); - kstart_ = dst; - memcpy(dst, user_key.data(), usize); - dst += usize; - EncodeFixed64(dst, PackSequenceAndType(s, kValueTypeForSeek)); - dst += 8; - end_ = dst; -} - -} // namespace leveldb diff --git a/leveldb/db/dbformat.h b/leveldb/db/dbformat.h deleted file mode 100644 index f7f64da..0000000 --- a/leveldb/db/dbformat.h +++ /dev/null @@ -1,227 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_DB_FORMAT_H_ -#define STORAGE_LEVELDB_DB_FORMAT_H_ - -#include -#include "leveldb/comparator.h" -#include "leveldb/db.h" -#include "leveldb/filter_policy.h" -#include "leveldb/slice.h" -#include "leveldb/table_builder.h" -#include "util/coding.h" -#include "util/logging.h" - -namespace leveldb { - -// Grouping of constants. We may want to make some of these -// parameters set via options. -namespace config { -static const int kNumLevels = 7; - -// Level-0 compaction is started when we hit this many files. -static const int kL0_CompactionTrigger = 4; - -// Soft limit on number of level-0 files. We slow down writes at this point. -static const int kL0_SlowdownWritesTrigger = 8; - -// Maximum number of level-0 files. We stop writes at this point. -static const int kL0_StopWritesTrigger = 12; - -// Maximum level to which a new compacted memtable is pushed if it -// does not create overlap. We try to push to level 2 to avoid the -// relatively expensive level 0=>1 compactions and to avoid some -// expensive manifest file operations. We do not push all the way to -// the largest level since that can generate a lot of wasted disk -// space if the same key space is being repeatedly overwritten. -static const int kMaxMemCompactLevel = 2; - -} // namespace config - -class InternalKey; - -// Value types encoded as the last component of internal keys. -// DO NOT CHANGE THESE ENUM VALUES: they are embedded in the on-disk -// data structures. -enum ValueType { - kTypeDeletion = 0x0, - kTypeValue = 0x1 -}; -// kValueTypeForSeek defines the ValueType that should be passed when -// constructing a ParsedInternalKey object for seeking to a particular -// sequence number (since we sort sequence numbers in decreasing order -// and the value type is embedded as the low 8 bits in the sequence -// number in internal keys, we need to use the highest-numbered -// ValueType, not the lowest). -static const ValueType kValueTypeForSeek = kTypeValue; - -typedef uint64_t SequenceNumber; - -// We leave eight bits empty at the bottom so a type and sequence# -// can be packed together into 64-bits. -static const SequenceNumber kMaxSequenceNumber = - ((0x1ull << 56) - 1); - -struct ParsedInternalKey { - Slice user_key; - SequenceNumber sequence; - ValueType type; - - ParsedInternalKey() { } // Intentionally left uninitialized (for speed) - ParsedInternalKey(const Slice& u, const SequenceNumber& seq, ValueType t) - : user_key(u), sequence(seq), type(t) { } - std::string DebugString() const; -}; - -// Return the length of the encoding of "key". -inline size_t InternalKeyEncodingLength(const ParsedInternalKey& key) { - return key.user_key.size() + 8; -} - -// Append the serialization of "key" to *result. -extern void AppendInternalKey(std::string* result, - const ParsedInternalKey& key); - -// Attempt to parse an internal key from "internal_key". On success, -// stores the parsed data in "*result", and returns true. -// -// On error, returns false, leaves "*result" in an undefined state. -extern bool ParseInternalKey(const Slice& internal_key, - ParsedInternalKey* result); - -// Returns the user key portion of an internal key. -inline Slice ExtractUserKey(const Slice& internal_key) { - assert(internal_key.size() >= 8); - return Slice(internal_key.data(), internal_key.size() - 8); -} - -inline ValueType ExtractValueType(const Slice& internal_key) { - assert(internal_key.size() >= 8); - const size_t n = internal_key.size(); - uint64_t num = DecodeFixed64(internal_key.data() + n - 8); - unsigned char c = num & 0xff; - return static_cast(c); -} - -// A comparator for internal keys that uses a specified comparator for -// the user key portion and breaks ties by decreasing sequence number. -class InternalKeyComparator : public Comparator { - private: - const Comparator* user_comparator_; - public: - explicit InternalKeyComparator(const Comparator* c) : user_comparator_(c) { } - virtual const char* Name() const; - virtual int Compare(const Slice& a, const Slice& b) const; - virtual void FindShortestSeparator( - std::string* start, - const Slice& limit) const; - virtual void FindShortSuccessor(std::string* key) const; - - const Comparator* user_comparator() const { return user_comparator_; } - - int Compare(const InternalKey& a, const InternalKey& b) const; -}; - -// Filter policy wrapper that converts from internal keys to user keys -class InternalFilterPolicy : public FilterPolicy { - private: - const FilterPolicy* const user_policy_; - public: - explicit InternalFilterPolicy(const FilterPolicy* p) : user_policy_(p) { } - virtual const char* Name() const; - virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const; - virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const; -}; - -// Modules in this directory should keep internal keys wrapped inside -// the following class instead of plain strings so that we do not -// incorrectly use string comparisons instead of an InternalKeyComparator. -class InternalKey { - private: - std::string rep_; - public: - InternalKey() { } // Leave rep_ as empty to indicate it is invalid - InternalKey(const Slice& user_key, SequenceNumber s, ValueType t) { - AppendInternalKey(&rep_, ParsedInternalKey(user_key, s, t)); - } - - void DecodeFrom(const Slice& s) { rep_.assign(s.data(), s.size()); } - Slice Encode() const { - assert(!rep_.empty()); - return rep_; - } - - Slice user_key() const { return ExtractUserKey(rep_); } - - void SetFrom(const ParsedInternalKey& p) { - rep_.clear(); - AppendInternalKey(&rep_, p); - } - - void Clear() { rep_.clear(); } - - std::string DebugString() const; -}; - -inline int InternalKeyComparator::Compare( - const InternalKey& a, const InternalKey& b) const { - return Compare(a.Encode(), b.Encode()); -} - -inline bool ParseInternalKey(const Slice& internal_key, - ParsedInternalKey* result) { - const size_t n = internal_key.size(); - if (n < 8) return false; - uint64_t num = DecodeFixed64(internal_key.data() + n - 8); - unsigned char c = num & 0xff; - result->sequence = num >> 8; - result->type = static_cast(c); - result->user_key = Slice(internal_key.data(), n - 8); - return (c <= static_cast(kTypeValue)); -} - -// A helper class useful for DBImpl::Get() -class LookupKey { - public: - // Initialize *this for looking up user_key at a snapshot with - // the specified sequence number. - LookupKey(const Slice& user_key, SequenceNumber sequence); - - ~LookupKey(); - - // Return a key suitable for lookup in a MemTable. - Slice memtable_key() const { return Slice(start_, end_ - start_); } - - // Return an internal key (suitable for passing to an internal iterator) - Slice internal_key() const { return Slice(kstart_, end_ - kstart_); } - - // Return the user key - Slice user_key() const { return Slice(kstart_, end_ - kstart_ - 8); } - - private: - // We construct a char array of the form: - // klength varint32 <-- start_ - // userkey char[klength] <-- kstart_ - // tag uint64 - // <-- end_ - // The array is a suitable MemTable key. - // The suffix starting with "userkey" can be used as an InternalKey. - const char* start_; - const char* kstart_; - const char* end_; - char space_[200]; // Avoid allocation for short keys - - // No copying allowed - LookupKey(const LookupKey&); - void operator=(const LookupKey&); -}; - -inline LookupKey::~LookupKey() { - if (start_ != space_) delete[] start_; -} - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_DB_FORMAT_H_ diff --git a/leveldb/db/dbformat_test.cc b/leveldb/db/dbformat_test.cc deleted file mode 100644 index 5d82f5d..0000000 --- a/leveldb/db/dbformat_test.cc +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/dbformat.h" -#include "util/logging.h" -#include "util/testharness.h" - -namespace leveldb { - -static std::string IKey(const std::string& user_key, - uint64_t seq, - ValueType vt) { - std::string encoded; - AppendInternalKey(&encoded, ParsedInternalKey(user_key, seq, vt)); - return encoded; -} - -static std::string Shorten(const std::string& s, const std::string& l) { - std::string result = s; - InternalKeyComparator(BytewiseComparator()).FindShortestSeparator(&result, l); - return result; -} - -static std::string ShortSuccessor(const std::string& s) { - std::string result = s; - InternalKeyComparator(BytewiseComparator()).FindShortSuccessor(&result); - return result; -} - -static void TestKey(const std::string& key, - uint64_t seq, - ValueType vt) { - std::string encoded = IKey(key, seq, vt); - - Slice in(encoded); - ParsedInternalKey decoded("", 0, kTypeValue); - - ASSERT_TRUE(ParseInternalKey(in, &decoded)); - ASSERT_EQ(key, decoded.user_key.ToString()); - ASSERT_EQ(seq, decoded.sequence); - ASSERT_EQ(vt, decoded.type); - - ASSERT_TRUE(!ParseInternalKey(Slice("bar"), &decoded)); -} - -class FormatTest { }; - -TEST(FormatTest, InternalKey_EncodeDecode) { - const char* keys[] = { "", "k", "hello", "longggggggggggggggggggggg" }; - const uint64_t seq[] = { - 1, 2, 3, - (1ull << 8) - 1, 1ull << 8, (1ull << 8) + 1, - (1ull << 16) - 1, 1ull << 16, (1ull << 16) + 1, - (1ull << 32) - 1, 1ull << 32, (1ull << 32) + 1 - }; - for (int k = 0; k < sizeof(keys) / sizeof(keys[0]); k++) { - for (int s = 0; s < sizeof(seq) / sizeof(seq[0]); s++) { - TestKey(keys[k], seq[s], kTypeValue); - TestKey("hello", 1, kTypeDeletion); - } - } -} - -TEST(FormatTest, InternalKeyShortSeparator) { - // When user keys are same - ASSERT_EQ(IKey("foo", 100, kTypeValue), - Shorten(IKey("foo", 100, kTypeValue), - IKey("foo", 99, kTypeValue))); - ASSERT_EQ(IKey("foo", 100, kTypeValue), - Shorten(IKey("foo", 100, kTypeValue), - IKey("foo", 101, kTypeValue))); - ASSERT_EQ(IKey("foo", 100, kTypeValue), - Shorten(IKey("foo", 100, kTypeValue), - IKey("foo", 100, kTypeValue))); - ASSERT_EQ(IKey("foo", 100, kTypeValue), - Shorten(IKey("foo", 100, kTypeValue), - IKey("foo", 100, kTypeDeletion))); - - // When user keys are misordered - ASSERT_EQ(IKey("foo", 100, kTypeValue), - Shorten(IKey("foo", 100, kTypeValue), - IKey("bar", 99, kTypeValue))); - - // When user keys are different, but correctly ordered - ASSERT_EQ(IKey("g", kMaxSequenceNumber, kValueTypeForSeek), - Shorten(IKey("foo", 100, kTypeValue), - IKey("hello", 200, kTypeValue))); - - // When start user key is prefix of limit user key - ASSERT_EQ(IKey("foo", 100, kTypeValue), - Shorten(IKey("foo", 100, kTypeValue), - IKey("foobar", 200, kTypeValue))); - - // When limit user key is prefix of start user key - ASSERT_EQ(IKey("foobar", 100, kTypeValue), - Shorten(IKey("foobar", 100, kTypeValue), - IKey("foo", 200, kTypeValue))); -} - -TEST(FormatTest, InternalKeyShortestSuccessor) { - ASSERT_EQ(IKey("g", kMaxSequenceNumber, kValueTypeForSeek), - ShortSuccessor(IKey("foo", 100, kTypeValue))); - ASSERT_EQ(IKey("\xff\xff", 100, kTypeValue), - ShortSuccessor(IKey("\xff\xff", 100, kTypeValue))); -} - -} // namespace leveldb - -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} diff --git a/leveldb/db/filename.cc b/leveldb/db/filename.cc deleted file mode 100644 index 3c4d49f..0000000 --- a/leveldb/db/filename.cc +++ /dev/null @@ -1,139 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include -#include -#include "db/filename.h" -#include "db/dbformat.h" -#include "leveldb/env.h" -#include "util/logging.h" - -namespace leveldb { - -// A utility routine: write "data" to the named file and Sync() it. -extern Status WriteStringToFileSync(Env* env, const Slice& data, - const std::string& fname); - -static std::string MakeFileName(const std::string& name, uint64_t number, - const char* suffix) { - char buf[100]; - snprintf(buf, sizeof(buf), "/%06llu.%s", - static_cast(number), - suffix); - return name + buf; -} - -std::string LogFileName(const std::string& name, uint64_t number) { - assert(number > 0); - return MakeFileName(name, number, "log"); -} - -std::string TableFileName(const std::string& name, uint64_t number) { - assert(number > 0); - return MakeFileName(name, number, "sst"); -} - -std::string DescriptorFileName(const std::string& dbname, uint64_t number) { - assert(number > 0); - char buf[100]; - snprintf(buf, sizeof(buf), "/MANIFEST-%06llu", - static_cast(number)); - return dbname + buf; -} - -std::string CurrentFileName(const std::string& dbname) { - return dbname + "/CURRENT"; -} - -std::string LockFileName(const std::string& dbname) { - return dbname + "/LOCK"; -} - -std::string TempFileName(const std::string& dbname, uint64_t number) { - assert(number > 0); - return MakeFileName(dbname, number, "dbtmp"); -} - -std::string InfoLogFileName(const std::string& dbname) { - return dbname + "/LOG"; -} - -// Return the name of the old info log file for "dbname". -std::string OldInfoLogFileName(const std::string& dbname) { - return dbname + "/LOG.old"; -} - - -// Owned filenames have the form: -// dbname/CURRENT -// dbname/LOCK -// dbname/LOG -// dbname/LOG.old -// dbname/MANIFEST-[0-9]+ -// dbname/[0-9]+.(log|sst) -bool ParseFileName(const std::string& fname, - uint64_t* number, - FileType* type) { - Slice rest(fname); - if (rest == "CURRENT") { - *number = 0; - *type = kCurrentFile; - } else if (rest == "LOCK") { - *number = 0; - *type = kDBLockFile; - } else if (rest == "LOG" || rest == "LOG.old") { - *number = 0; - *type = kInfoLogFile; - } else if (rest.starts_with("MANIFEST-")) { - rest.remove_prefix(strlen("MANIFEST-")); - uint64_t num; - if (!ConsumeDecimalNumber(&rest, &num)) { - return false; - } - if (!rest.empty()) { - return false; - } - *type = kDescriptorFile; - *number = num; - } else { - // Avoid strtoull() to keep filename format independent of the - // current locale - uint64_t num; - if (!ConsumeDecimalNumber(&rest, &num)) { - return false; - } - Slice suffix = rest; - if (suffix == Slice(".log")) { - *type = kLogFile; - } else if (suffix == Slice(".sst")) { - *type = kTableFile; - } else if (suffix == Slice(".dbtmp")) { - *type = kTempFile; - } else { - return false; - } - *number = num; - } - return true; -} - -Status SetCurrentFile(Env* env, const std::string& dbname, - uint64_t descriptor_number) { - // Remove leading "dbname/" and add newline to manifest file name - std::string manifest = DescriptorFileName(dbname, descriptor_number); - Slice contents = manifest; - assert(contents.starts_with(dbname + "/")); - contents.remove_prefix(dbname.size() + 1); - std::string tmp = TempFileName(dbname, descriptor_number); - Status s = WriteStringToFileSync(env, contents.ToString() + "\n", tmp); - if (s.ok()) { - s = env->RenameFile(tmp, CurrentFileName(dbname)); - } - if (!s.ok()) { - env->DeleteFile(tmp); - } - return s; -} - -} // namespace leveldb diff --git a/leveldb/db/filename.h b/leveldb/db/filename.h deleted file mode 100644 index d5d09b1..0000000 --- a/leveldb/db/filename.h +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// File names used by DB code - -#ifndef STORAGE_LEVELDB_DB_FILENAME_H_ -#define STORAGE_LEVELDB_DB_FILENAME_H_ - -#include -#include -#include "leveldb/slice.h" -#include "leveldb/status.h" -#include "port/port.h" - -namespace leveldb { - -class Env; - -enum FileType { - kLogFile, - kDBLockFile, - kTableFile, - kDescriptorFile, - kCurrentFile, - kTempFile, - kInfoLogFile // Either the current one, or an old one -}; - -// Return the name of the log file with the specified number -// in the db named by "dbname". The result will be prefixed with -// "dbname". -extern std::string LogFileName(const std::string& dbname, uint64_t number); - -// Return the name of the sstable with the specified number -// in the db named by "dbname". The result will be prefixed with -// "dbname". -extern std::string TableFileName(const std::string& dbname, uint64_t number); - -// Return the name of the descriptor file for the db named by -// "dbname" and the specified incarnation number. The result will be -// prefixed with "dbname". -extern std::string DescriptorFileName(const std::string& dbname, - uint64_t number); - -// Return the name of the current file. This file contains the name -// of the current manifest file. The result will be prefixed with -// "dbname". -extern std::string CurrentFileName(const std::string& dbname); - -// Return the name of the lock file for the db named by -// "dbname". The result will be prefixed with "dbname". -extern std::string LockFileName(const std::string& dbname); - -// Return the name of a temporary file owned by the db named "dbname". -// The result will be prefixed with "dbname". -extern std::string TempFileName(const std::string& dbname, uint64_t number); - -// Return the name of the info log file for "dbname". -extern std::string InfoLogFileName(const std::string& dbname); - -// Return the name of the old info log file for "dbname". -extern std::string OldInfoLogFileName(const std::string& dbname); - -// If filename is a leveldb file, store the type of the file in *type. -// The number encoded in the filename is stored in *number. If the -// filename was successfully parsed, returns true. Else return false. -extern bool ParseFileName(const std::string& filename, - uint64_t* number, - FileType* type); - -// Make the CURRENT file point to the descriptor file with the -// specified number. -extern Status SetCurrentFile(Env* env, const std::string& dbname, - uint64_t descriptor_number); - - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_DB_FILENAME_H_ diff --git a/leveldb/db/filename_test.cc b/leveldb/db/filename_test.cc deleted file mode 100644 index 47353d6..0000000 --- a/leveldb/db/filename_test.cc +++ /dev/null @@ -1,122 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/filename.h" - -#include "db/dbformat.h" -#include "port/port.h" -#include "util/logging.h" -#include "util/testharness.h" - -namespace leveldb { - -class FileNameTest { }; - -TEST(FileNameTest, Parse) { - Slice db; - FileType type; - uint64_t number; - - // Successful parses - static struct { - const char* fname; - uint64_t number; - FileType type; - } cases[] = { - { "100.log", 100, kLogFile }, - { "0.log", 0, kLogFile }, - { "0.sst", 0, kTableFile }, - { "CURRENT", 0, kCurrentFile }, - { "LOCK", 0, kDBLockFile }, - { "MANIFEST-2", 2, kDescriptorFile }, - { "MANIFEST-7", 7, kDescriptorFile }, - { "LOG", 0, kInfoLogFile }, - { "LOG.old", 0, kInfoLogFile }, - { "18446744073709551615.log", 18446744073709551615ull, kLogFile }, - }; - for (int i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { - std::string f = cases[i].fname; - ASSERT_TRUE(ParseFileName(f, &number, &type)) << f; - ASSERT_EQ(cases[i].type, type) << f; - ASSERT_EQ(cases[i].number, number) << f; - } - - // Errors - static const char* errors[] = { - "", - "foo", - "foo-dx-100.log", - ".log", - "", - "manifest", - "CURREN", - "CURRENTX", - "MANIFES", - "MANIFEST", - "MANIFEST-", - "XMANIFEST-3", - "MANIFEST-3x", - "LOC", - "LOCKx", - "LO", - "LOGx", - "18446744073709551616.log", - "184467440737095516150.log", - "100", - "100.", - "100.lop" - }; - for (int i = 0; i < sizeof(errors) / sizeof(errors[0]); i++) { - std::string f = errors[i]; - ASSERT_TRUE(!ParseFileName(f, &number, &type)) << f; - }; -} - -TEST(FileNameTest, Construction) { - uint64_t number; - FileType type; - std::string fname; - - fname = CurrentFileName("foo"); - ASSERT_EQ("foo/", std::string(fname.data(), 4)); - ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); - ASSERT_EQ(0, number); - ASSERT_EQ(kCurrentFile, type); - - fname = LockFileName("foo"); - ASSERT_EQ("foo/", std::string(fname.data(), 4)); - ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); - ASSERT_EQ(0, number); - ASSERT_EQ(kDBLockFile, type); - - fname = LogFileName("foo", 192); - ASSERT_EQ("foo/", std::string(fname.data(), 4)); - ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); - ASSERT_EQ(192, number); - ASSERT_EQ(kLogFile, type); - - fname = TableFileName("bar", 200); - ASSERT_EQ("bar/", std::string(fname.data(), 4)); - ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); - ASSERT_EQ(200, number); - ASSERT_EQ(kTableFile, type); - - fname = DescriptorFileName("bar", 100); - ASSERT_EQ("bar/", std::string(fname.data(), 4)); - ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); - ASSERT_EQ(100, number); - ASSERT_EQ(kDescriptorFile, type); - - fname = TempFileName("tmp", 999); - ASSERT_EQ("tmp/", std::string(fname.data(), 4)); - ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); - ASSERT_EQ(999, number); - ASSERT_EQ(kTempFile, type); -} - -} // namespace leveldb - -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} diff --git a/leveldb/db/log_format.h b/leveldb/db/log_format.h deleted file mode 100644 index 2690cb9..0000000 --- a/leveldb/db/log_format.h +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// Log format information shared by reader and writer. -// See ../doc/log_format.txt for more detail. - -#ifndef STORAGE_LEVELDB_DB_LOG_FORMAT_H_ -#define STORAGE_LEVELDB_DB_LOG_FORMAT_H_ - -namespace leveldb { -namespace log { - -enum RecordType { - // Zero is reserved for preallocated files - kZeroType = 0, - - kFullType = 1, - - // For fragments - kFirstType = 2, - kMiddleType = 3, - kLastType = 4 -}; -static const int kMaxRecordType = kLastType; - -static const int kBlockSize = 32768; - -// Header is checksum (4 bytes), type (1 byte), length (2 bytes). -static const int kHeaderSize = 4 + 1 + 2; - -} // namespace log -} // namespace leveldb - -#endif // STORAGE_LEVELDB_DB_LOG_FORMAT_H_ diff --git a/leveldb/db/log_reader.cc b/leveldb/db/log_reader.cc deleted file mode 100644 index b35f115..0000000 --- a/leveldb/db/log_reader.cc +++ /dev/null @@ -1,259 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/log_reader.h" - -#include -#include "leveldb/env.h" -#include "util/coding.h" -#include "util/crc32c.h" - -namespace leveldb { -namespace log { - -Reader::Reporter::~Reporter() { -} - -Reader::Reader(SequentialFile* file, Reporter* reporter, bool checksum, - uint64_t initial_offset) - : file_(file), - reporter_(reporter), - checksum_(checksum), - backing_store_(new char[kBlockSize]), - buffer_(), - eof_(false), - last_record_offset_(0), - end_of_buffer_offset_(0), - initial_offset_(initial_offset) { -} - -Reader::~Reader() { - delete[] backing_store_; -} - -bool Reader::SkipToInitialBlock() { - size_t offset_in_block = initial_offset_ % kBlockSize; - uint64_t block_start_location = initial_offset_ - offset_in_block; - - // Don't search a block if we'd be in the trailer - if (offset_in_block > kBlockSize - 6) { - offset_in_block = 0; - block_start_location += kBlockSize; - } - - end_of_buffer_offset_ = block_start_location; - - // Skip to start of first block that can contain the initial record - if (block_start_location > 0) { - Status skip_status = file_->Skip(block_start_location); - if (!skip_status.ok()) { - ReportDrop(block_start_location, skip_status); - return false; - } - } - - return true; -} - -bool Reader::ReadRecord(Slice* record, std::string* scratch) { - if (last_record_offset_ < initial_offset_) { - if (!SkipToInitialBlock()) { - return false; - } - } - - scratch->clear(); - record->clear(); - bool in_fragmented_record = false; - // Record offset of the logical record that we're reading - // 0 is a dummy value to make compilers happy - uint64_t prospective_record_offset = 0; - - Slice fragment; - while (true) { - uint64_t physical_record_offset = end_of_buffer_offset_ - buffer_.size(); - const unsigned int record_type = ReadPhysicalRecord(&fragment); - switch (record_type) { - case kFullType: - if (in_fragmented_record) { - // Handle bug in earlier versions of log::Writer where - // it could emit an empty kFirstType record at the tail end - // of a block followed by a kFullType or kFirstType record - // at the beginning of the next block. - if (scratch->empty()) { - in_fragmented_record = false; - } else { - ReportCorruption(scratch->size(), "partial record without end(1)"); - } - } - prospective_record_offset = physical_record_offset; - scratch->clear(); - *record = fragment; - last_record_offset_ = prospective_record_offset; - return true; - - case kFirstType: - if (in_fragmented_record) { - // Handle bug in earlier versions of log::Writer where - // it could emit an empty kFirstType record at the tail end - // of a block followed by a kFullType or kFirstType record - // at the beginning of the next block. - if (scratch->empty()) { - in_fragmented_record = false; - } else { - ReportCorruption(scratch->size(), "partial record without end(2)"); - } - } - prospective_record_offset = physical_record_offset; - scratch->assign(fragment.data(), fragment.size()); - in_fragmented_record = true; - break; - - case kMiddleType: - if (!in_fragmented_record) { - ReportCorruption(fragment.size(), - "missing start of fragmented record(1)"); - } else { - scratch->append(fragment.data(), fragment.size()); - } - break; - - case kLastType: - if (!in_fragmented_record) { - ReportCorruption(fragment.size(), - "missing start of fragmented record(2)"); - } else { - scratch->append(fragment.data(), fragment.size()); - *record = Slice(*scratch); - last_record_offset_ = prospective_record_offset; - return true; - } - break; - - case kEof: - if (in_fragmented_record) { - ReportCorruption(scratch->size(), "partial record without end(3)"); - scratch->clear(); - } - return false; - - case kBadRecord: - if (in_fragmented_record) { - ReportCorruption(scratch->size(), "error in middle of record"); - in_fragmented_record = false; - scratch->clear(); - } - break; - - default: { - char buf[40]; - snprintf(buf, sizeof(buf), "unknown record type %u", record_type); - ReportCorruption( - (fragment.size() + (in_fragmented_record ? scratch->size() : 0)), - buf); - in_fragmented_record = false; - scratch->clear(); - break; - } - } - } - return false; -} - -uint64_t Reader::LastRecordOffset() { - return last_record_offset_; -} - -void Reader::ReportCorruption(size_t bytes, const char* reason) { - ReportDrop(bytes, Status::Corruption(reason)); -} - -void Reader::ReportDrop(size_t bytes, const Status& reason) { - if (reporter_ != NULL && - end_of_buffer_offset_ - buffer_.size() - bytes >= initial_offset_) { - reporter_->Corruption(bytes, reason); - } -} - -unsigned int Reader::ReadPhysicalRecord(Slice* result) { - while (true) { - if (buffer_.size() < kHeaderSize) { - if (!eof_) { - // Last read was a full read, so this is a trailer to skip - buffer_.clear(); - Status status = file_->Read(kBlockSize, &buffer_, backing_store_); - end_of_buffer_offset_ += buffer_.size(); - if (!status.ok()) { - buffer_.clear(); - ReportDrop(kBlockSize, status); - eof_ = true; - return kEof; - } else if (buffer_.size() < kBlockSize) { - eof_ = true; - } - continue; - } else if (buffer_.size() == 0) { - // End of file - return kEof; - } else { - size_t drop_size = buffer_.size(); - buffer_.clear(); - ReportCorruption(drop_size, "truncated record at end of file"); - return kEof; - } - } - - // Parse the header - const char* header = buffer_.data(); - const uint32_t a = static_cast(header[4]) & 0xff; - const uint32_t b = static_cast(header[5]) & 0xff; - const unsigned int type = header[6]; - const uint32_t length = a | (b << 8); - if (kHeaderSize + length > buffer_.size()) { - size_t drop_size = buffer_.size(); - buffer_.clear(); - ReportCorruption(drop_size, "bad record length"); - return kBadRecord; - } - - if (type == kZeroType && length == 0) { - // Skip zero length record without reporting any drops since - // such records are produced by the mmap based writing code in - // env_posix.cc that preallocates file regions. - buffer_.clear(); - return kBadRecord; - } - - // Check crc - if (checksum_) { - uint32_t expected_crc = crc32c::Unmask(DecodeFixed32(header)); - uint32_t actual_crc = crc32c::Value(header + 6, 1 + length); - if (actual_crc != expected_crc) { - // Drop the rest of the buffer since "length" itself may have - // been corrupted and if we trust it, we could find some - // fragment of a real log record that just happens to look - // like a valid log record. - size_t drop_size = buffer_.size(); - buffer_.clear(); - ReportCorruption(drop_size, "checksum mismatch"); - return kBadRecord; - } - } - - buffer_.remove_prefix(kHeaderSize + length); - - // Skip physical record that started before initial_offset_ - if (end_of_buffer_offset_ - buffer_.size() - kHeaderSize - length < - initial_offset_) { - result->clear(); - return kBadRecord; - } - - *result = Slice(header + kHeaderSize, length); - return type; - } -} - -} // namespace log -} // namespace leveldb diff --git a/leveldb/db/log_reader.h b/leveldb/db/log_reader.h deleted file mode 100644 index 82d4bee..0000000 --- a/leveldb/db/log_reader.h +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_DB_LOG_READER_H_ -#define STORAGE_LEVELDB_DB_LOG_READER_H_ - -#include - -#include "db/log_format.h" -#include "leveldb/slice.h" -#include "leveldb/status.h" - -namespace leveldb { - -class SequentialFile; - -namespace log { - -class Reader { - public: - // Interface for reporting errors. - class Reporter { - public: - virtual ~Reporter(); - - // Some corruption was detected. "size" is the approximate number - // of bytes dropped due to the corruption. - virtual void Corruption(size_t bytes, const Status& status) = 0; - }; - - // Create a reader that will return log records from "*file". - // "*file" must remain live while this Reader is in use. - // - // If "reporter" is non-NULL, it is notified whenever some data is - // dropped due to a detected corruption. "*reporter" must remain - // live while this Reader is in use. - // - // If "checksum" is true, verify checksums if available. - // - // The Reader will start reading at the first record located at physical - // position >= initial_offset within the file. - Reader(SequentialFile* file, Reporter* reporter, bool checksum, - uint64_t initial_offset); - - ~Reader(); - - // Read the next record into *record. Returns true if read - // successfully, false if we hit end of the input. May use - // "*scratch" as temporary storage. The contents filled in *record - // will only be valid until the next mutating operation on this - // reader or the next mutation to *scratch. - bool ReadRecord(Slice* record, std::string* scratch); - - // Returns the physical offset of the last record returned by ReadRecord. - // - // Undefined before the first call to ReadRecord. - uint64_t LastRecordOffset(); - - private: - SequentialFile* const file_; - Reporter* const reporter_; - bool const checksum_; - char* const backing_store_; - Slice buffer_; - bool eof_; // Last Read() indicated EOF by returning < kBlockSize - - // Offset of the last record returned by ReadRecord. - uint64_t last_record_offset_; - // Offset of the first location past the end of buffer_. - uint64_t end_of_buffer_offset_; - - // Offset at which to start looking for the first record to return - uint64_t const initial_offset_; - - // Extend record types with the following special values - enum { - kEof = kMaxRecordType + 1, - // Returned whenever we find an invalid physical record. - // Currently there are three situations in which this happens: - // * The record has an invalid CRC (ReadPhysicalRecord reports a drop) - // * The record is a 0-length record (No drop is reported) - // * The record is below constructor's initial_offset (No drop is reported) - kBadRecord = kMaxRecordType + 2 - }; - - // Skips all blocks that are completely before "initial_offset_". - // - // Returns true on success. Handles reporting. - bool SkipToInitialBlock(); - - // Return type, or one of the preceding special values - unsigned int ReadPhysicalRecord(Slice* result); - - // Reports dropped bytes to the reporter. - // buffer_ must be updated to remove the dropped bytes prior to invocation. - void ReportCorruption(size_t bytes, const char* reason); - void ReportDrop(size_t bytes, const Status& reason); - - // No copying allowed - Reader(const Reader&); - void operator=(const Reader&); -}; - -} // namespace log -} // namespace leveldb - -#endif // STORAGE_LEVELDB_DB_LOG_READER_H_ diff --git a/leveldb/db/log_test.cc b/leveldb/db/log_test.cc deleted file mode 100644 index 4c5cf87..0000000 --- a/leveldb/db/log_test.cc +++ /dev/null @@ -1,500 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/log_reader.h" -#include "db/log_writer.h" -#include "leveldb/env.h" -#include "util/coding.h" -#include "util/crc32c.h" -#include "util/random.h" -#include "util/testharness.h" - -namespace leveldb { -namespace log { - -// Construct a string of the specified length made out of the supplied -// partial string. -static std::string BigString(const std::string& partial_string, size_t n) { - std::string result; - while (result.size() < n) { - result.append(partial_string); - } - result.resize(n); - return result; -} - -// Construct a string from a number -static std::string NumberString(int n) { - char buf[50]; - snprintf(buf, sizeof(buf), "%d.", n); - return std::string(buf); -} - -// Return a skewed potentially long string -static std::string RandomSkewedString(int i, Random* rnd) { - return BigString(NumberString(i), rnd->Skewed(17)); -} - -class LogTest { - private: - class StringDest : public WritableFile { - public: - std::string contents_; - - virtual Status Close() { return Status::OK(); } - virtual Status Flush() { return Status::OK(); } - virtual Status Sync() { return Status::OK(); } - virtual Status Append(const Slice& slice) { - contents_.append(slice.data(), slice.size()); - return Status::OK(); - } - }; - - class StringSource : public SequentialFile { - public: - Slice contents_; - bool force_error_; - bool returned_partial_; - StringSource() : force_error_(false), returned_partial_(false) { } - - virtual Status Read(size_t n, Slice* result, char* scratch) { - ASSERT_TRUE(!returned_partial_) << "must not Read() after eof/error"; - - if (force_error_) { - force_error_ = false; - returned_partial_ = true; - return Status::Corruption("read error"); - } - - if (contents_.size() < n) { - n = contents_.size(); - returned_partial_ = true; - } - *result = Slice(contents_.data(), n); - contents_.remove_prefix(n); - return Status::OK(); - } - - virtual Status Skip(uint64_t n) { - if (n > contents_.size()) { - contents_.clear(); - return Status::NotFound("in-memory file skipepd past end"); - } - - contents_.remove_prefix(n); - - return Status::OK(); - } - }; - - class ReportCollector : public Reader::Reporter { - public: - size_t dropped_bytes_; - std::string message_; - - ReportCollector() : dropped_bytes_(0) { } - virtual void Corruption(size_t bytes, const Status& status) { - dropped_bytes_ += bytes; - message_.append(status.ToString()); - } - }; - - StringDest dest_; - StringSource source_; - ReportCollector report_; - bool reading_; - Writer writer_; - Reader reader_; - - // Record metadata for testing initial offset functionality - static size_t initial_offset_record_sizes_[]; - static uint64_t initial_offset_last_record_offsets_[]; - - public: - LogTest() : reading_(false), - writer_(&dest_), - reader_(&source_, &report_, true/*checksum*/, - 0/*initial_offset*/) { - } - - void Write(const std::string& msg) { - ASSERT_TRUE(!reading_) << "Write() after starting to read"; - writer_.AddRecord(Slice(msg)); - } - - size_t WrittenBytes() const { - return dest_.contents_.size(); - } - - std::string Read() { - if (!reading_) { - reading_ = true; - source_.contents_ = Slice(dest_.contents_); - } - std::string scratch; - Slice record; - if (reader_.ReadRecord(&record, &scratch)) { - return record.ToString(); - } else { - return "EOF"; - } - } - - void IncrementByte(int offset, int delta) { - dest_.contents_[offset] += delta; - } - - void SetByte(int offset, char new_byte) { - dest_.contents_[offset] = new_byte; - } - - void ShrinkSize(int bytes) { - dest_.contents_.resize(dest_.contents_.size() - bytes); - } - - void FixChecksum(int header_offset, int len) { - // Compute crc of type/len/data - uint32_t crc = crc32c::Value(&dest_.contents_[header_offset+6], 1 + len); - crc = crc32c::Mask(crc); - EncodeFixed32(&dest_.contents_[header_offset], crc); - } - - void ForceError() { - source_.force_error_ = true; - } - - size_t DroppedBytes() const { - return report_.dropped_bytes_; - } - - std::string ReportMessage() const { - return report_.message_; - } - - // Returns OK iff recorded error message contains "msg" - std::string MatchError(const std::string& msg) const { - if (report_.message_.find(msg) == std::string::npos) { - return report_.message_; - } else { - return "OK"; - } - } - - void WriteInitialOffsetLog() { - for (int i = 0; i < 4; i++) { - std::string record(initial_offset_record_sizes_[i], - static_cast('a' + i)); - Write(record); - } - } - - void CheckOffsetPastEndReturnsNoRecords(uint64_t offset_past_end) { - WriteInitialOffsetLog(); - reading_ = true; - source_.contents_ = Slice(dest_.contents_); - Reader* offset_reader = new Reader(&source_, &report_, true/*checksum*/, - WrittenBytes() + offset_past_end); - Slice record; - std::string scratch; - ASSERT_TRUE(!offset_reader->ReadRecord(&record, &scratch)); - delete offset_reader; - } - - void CheckInitialOffsetRecord(uint64_t initial_offset, - int expected_record_offset) { - WriteInitialOffsetLog(); - reading_ = true; - source_.contents_ = Slice(dest_.contents_); - Reader* offset_reader = new Reader(&source_, &report_, true/*checksum*/, - initial_offset); - Slice record; - std::string scratch; - ASSERT_TRUE(offset_reader->ReadRecord(&record, &scratch)); - ASSERT_EQ(initial_offset_record_sizes_[expected_record_offset], - record.size()); - ASSERT_EQ(initial_offset_last_record_offsets_[expected_record_offset], - offset_reader->LastRecordOffset()); - ASSERT_EQ((char)('a' + expected_record_offset), record.data()[0]); - delete offset_reader; - } - -}; - -size_t LogTest::initial_offset_record_sizes_[] = - {10000, // Two sizable records in first block - 10000, - 2 * log::kBlockSize - 1000, // Span three blocks - 1}; - -uint64_t LogTest::initial_offset_last_record_offsets_[] = - {0, - kHeaderSize + 10000, - 2 * (kHeaderSize + 10000), - 2 * (kHeaderSize + 10000) + - (2 * log::kBlockSize - 1000) + 3 * kHeaderSize}; - - -TEST(LogTest, Empty) { - ASSERT_EQ("EOF", Read()); -} - -TEST(LogTest, ReadWrite) { - Write("foo"); - Write("bar"); - Write(""); - Write("xxxx"); - ASSERT_EQ("foo", Read()); - ASSERT_EQ("bar", Read()); - ASSERT_EQ("", Read()); - ASSERT_EQ("xxxx", Read()); - ASSERT_EQ("EOF", Read()); - ASSERT_EQ("EOF", Read()); // Make sure reads at eof work -} - -TEST(LogTest, ManyBlocks) { - for (int i = 0; i < 100000; i++) { - Write(NumberString(i)); - } - for (int i = 0; i < 100000; i++) { - ASSERT_EQ(NumberString(i), Read()); - } - ASSERT_EQ("EOF", Read()); -} - -TEST(LogTest, Fragmentation) { - Write("small"); - Write(BigString("medium", 50000)); - Write(BigString("large", 100000)); - ASSERT_EQ("small", Read()); - ASSERT_EQ(BigString("medium", 50000), Read()); - ASSERT_EQ(BigString("large", 100000), Read()); - ASSERT_EQ("EOF", Read()); -} - -TEST(LogTest, MarginalTrailer) { - // Make a trailer that is exactly the same length as an empty record. - const int n = kBlockSize - 2*kHeaderSize; - Write(BigString("foo", n)); - ASSERT_EQ(kBlockSize - kHeaderSize, WrittenBytes()); - Write(""); - Write("bar"); - ASSERT_EQ(BigString("foo", n), Read()); - ASSERT_EQ("", Read()); - ASSERT_EQ("bar", Read()); - ASSERT_EQ("EOF", Read()); -} - -TEST(LogTest, MarginalTrailer2) { - // Make a trailer that is exactly the same length as an empty record. - const int n = kBlockSize - 2*kHeaderSize; - Write(BigString("foo", n)); - ASSERT_EQ(kBlockSize - kHeaderSize, WrittenBytes()); - Write("bar"); - ASSERT_EQ(BigString("foo", n), Read()); - ASSERT_EQ("bar", Read()); - ASSERT_EQ("EOF", Read()); - ASSERT_EQ(0, DroppedBytes()); - ASSERT_EQ("", ReportMessage()); -} - -TEST(LogTest, ShortTrailer) { - const int n = kBlockSize - 2*kHeaderSize + 4; - Write(BigString("foo", n)); - ASSERT_EQ(kBlockSize - kHeaderSize + 4, WrittenBytes()); - Write(""); - Write("bar"); - ASSERT_EQ(BigString("foo", n), Read()); - ASSERT_EQ("", Read()); - ASSERT_EQ("bar", Read()); - ASSERT_EQ("EOF", Read()); -} - -TEST(LogTest, AlignedEof) { - const int n = kBlockSize - 2*kHeaderSize + 4; - Write(BigString("foo", n)); - ASSERT_EQ(kBlockSize - kHeaderSize + 4, WrittenBytes()); - ASSERT_EQ(BigString("foo", n), Read()); - ASSERT_EQ("EOF", Read()); -} - -TEST(LogTest, RandomRead) { - const int N = 500; - Random write_rnd(301); - for (int i = 0; i < N; i++) { - Write(RandomSkewedString(i, &write_rnd)); - } - Random read_rnd(301); - for (int i = 0; i < N; i++) { - ASSERT_EQ(RandomSkewedString(i, &read_rnd), Read()); - } - ASSERT_EQ("EOF", Read()); -} - -// Tests of all the error paths in log_reader.cc follow: - -TEST(LogTest, ReadError) { - Write("foo"); - ForceError(); - ASSERT_EQ("EOF", Read()); - ASSERT_EQ(kBlockSize, DroppedBytes()); - ASSERT_EQ("OK", MatchError("read error")); -} - -TEST(LogTest, BadRecordType) { - Write("foo"); - // Type is stored in header[6] - IncrementByte(6, 100); - FixChecksum(0, 3); - ASSERT_EQ("EOF", Read()); - ASSERT_EQ(3, DroppedBytes()); - ASSERT_EQ("OK", MatchError("unknown record type")); -} - -TEST(LogTest, TruncatedTrailingRecord) { - Write("foo"); - ShrinkSize(4); // Drop all payload as well as a header byte - ASSERT_EQ("EOF", Read()); - ASSERT_EQ(kHeaderSize - 1, DroppedBytes()); - ASSERT_EQ("OK", MatchError("truncated record at end of file")); -} - -TEST(LogTest, BadLength) { - Write("foo"); - ShrinkSize(1); - ASSERT_EQ("EOF", Read()); - ASSERT_EQ(kHeaderSize + 2, DroppedBytes()); - ASSERT_EQ("OK", MatchError("bad record length")); -} - -TEST(LogTest, ChecksumMismatch) { - Write("foo"); - IncrementByte(0, 10); - ASSERT_EQ("EOF", Read()); - ASSERT_EQ(10, DroppedBytes()); - ASSERT_EQ("OK", MatchError("checksum mismatch")); -} - -TEST(LogTest, UnexpectedMiddleType) { - Write("foo"); - SetByte(6, kMiddleType); - FixChecksum(0, 3); - ASSERT_EQ("EOF", Read()); - ASSERT_EQ(3, DroppedBytes()); - ASSERT_EQ("OK", MatchError("missing start")); -} - -TEST(LogTest, UnexpectedLastType) { - Write("foo"); - SetByte(6, kLastType); - FixChecksum(0, 3); - ASSERT_EQ("EOF", Read()); - ASSERT_EQ(3, DroppedBytes()); - ASSERT_EQ("OK", MatchError("missing start")); -} - -TEST(LogTest, UnexpectedFullType) { - Write("foo"); - Write("bar"); - SetByte(6, kFirstType); - FixChecksum(0, 3); - ASSERT_EQ("bar", Read()); - ASSERT_EQ("EOF", Read()); - ASSERT_EQ(3, DroppedBytes()); - ASSERT_EQ("OK", MatchError("partial record without end")); -} - -TEST(LogTest, UnexpectedFirstType) { - Write("foo"); - Write(BigString("bar", 100000)); - SetByte(6, kFirstType); - FixChecksum(0, 3); - ASSERT_EQ(BigString("bar", 100000), Read()); - ASSERT_EQ("EOF", Read()); - ASSERT_EQ(3, DroppedBytes()); - ASSERT_EQ("OK", MatchError("partial record without end")); -} - -TEST(LogTest, ErrorJoinsRecords) { - // Consider two fragmented records: - // first(R1) last(R1) first(R2) last(R2) - // where the middle two fragments disappear. We do not want - // first(R1),last(R2) to get joined and returned as a valid record. - - // Write records that span two blocks - Write(BigString("foo", kBlockSize)); - Write(BigString("bar", kBlockSize)); - Write("correct"); - - // Wipe the middle block - for (int offset = kBlockSize; offset < 2*kBlockSize; offset++) { - SetByte(offset, 'x'); - } - - ASSERT_EQ("correct", Read()); - ASSERT_EQ("EOF", Read()); - const int dropped = DroppedBytes(); - ASSERT_LE(dropped, 2*kBlockSize + 100); - ASSERT_GE(dropped, 2*kBlockSize); -} - -TEST(LogTest, ReadStart) { - CheckInitialOffsetRecord(0, 0); -} - -TEST(LogTest, ReadSecondOneOff) { - CheckInitialOffsetRecord(1, 1); -} - -TEST(LogTest, ReadSecondTenThousand) { - CheckInitialOffsetRecord(10000, 1); -} - -TEST(LogTest, ReadSecondStart) { - CheckInitialOffsetRecord(10007, 1); -} - -TEST(LogTest, ReadThirdOneOff) { - CheckInitialOffsetRecord(10008, 2); -} - -TEST(LogTest, ReadThirdStart) { - CheckInitialOffsetRecord(20014, 2); -} - -TEST(LogTest, ReadFourthOneOff) { - CheckInitialOffsetRecord(20015, 3); -} - -TEST(LogTest, ReadFourthFirstBlockTrailer) { - CheckInitialOffsetRecord(log::kBlockSize - 4, 3); -} - -TEST(LogTest, ReadFourthMiddleBlock) { - CheckInitialOffsetRecord(log::kBlockSize + 1, 3); -} - -TEST(LogTest, ReadFourthLastBlock) { - CheckInitialOffsetRecord(2 * log::kBlockSize + 1, 3); -} - -TEST(LogTest, ReadFourthStart) { - CheckInitialOffsetRecord( - 2 * (kHeaderSize + 1000) + (2 * log::kBlockSize - 1000) + 3 * kHeaderSize, - 3); -} - -TEST(LogTest, ReadEnd) { - CheckOffsetPastEndReturnsNoRecords(0); -} - -TEST(LogTest, ReadPastEnd) { - CheckOffsetPastEndReturnsNoRecords(5); -} - -} // namespace log -} // namespace leveldb - -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} diff --git a/leveldb/db/log_writer.cc b/leveldb/db/log_writer.cc deleted file mode 100644 index 2da99ac..0000000 --- a/leveldb/db/log_writer.cc +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/log_writer.h" - -#include -#include "leveldb/env.h" -#include "util/coding.h" -#include "util/crc32c.h" - -namespace leveldb { -namespace log { - -Writer::Writer(WritableFile* dest) - : dest_(dest), - block_offset_(0) { - for (int i = 0; i <= kMaxRecordType; i++) { - char t = static_cast(i); - type_crc_[i] = crc32c::Value(&t, 1); - } -} - -Writer::~Writer() { -} - -Status Writer::AddRecord(const Slice& slice) { - const char* ptr = slice.data(); - size_t left = slice.size(); - - // Fragment the record if necessary and emit it. Note that if slice - // is empty, we still want to iterate once to emit a single - // zero-length record - Status s; - bool begin = true; - do { - const int leftover = kBlockSize - block_offset_; - assert(leftover >= 0); - if (leftover < kHeaderSize) { - // Switch to a new block - if (leftover > 0) { - // Fill the trailer (literal below relies on kHeaderSize being 7) - assert(kHeaderSize == 7); - dest_->Append(Slice("\x00\x00\x00\x00\x00\x00", leftover)); - } - block_offset_ = 0; - } - - // Invariant: we never leave < kHeaderSize bytes in a block. - assert(kBlockSize - block_offset_ - kHeaderSize >= 0); - - const size_t avail = kBlockSize - block_offset_ - kHeaderSize; - const size_t fragment_length = (left < avail) ? left : avail; - - RecordType type; - const bool end = (left == fragment_length); - if (begin && end) { - type = kFullType; - } else if (begin) { - type = kFirstType; - } else if (end) { - type = kLastType; - } else { - type = kMiddleType; - } - - s = EmitPhysicalRecord(type, ptr, fragment_length); - ptr += fragment_length; - left -= fragment_length; - begin = false; - } while (s.ok() && left > 0); - return s; -} - -Status Writer::EmitPhysicalRecord(RecordType t, const char* ptr, size_t n) { - assert(n <= 0xffff); // Must fit in two bytes - assert(block_offset_ + kHeaderSize + n <= kBlockSize); - - // Format the header - char buf[kHeaderSize]; - buf[4] = static_cast(n & 0xff); - buf[5] = static_cast(n >> 8); - buf[6] = static_cast(t); - - // Compute the crc of the record type and the payload. - uint32_t crc = crc32c::Extend(type_crc_[t], ptr, n); - crc = crc32c::Mask(crc); // Adjust for storage - EncodeFixed32(buf, crc); - - // Write the header and the payload - Status s = dest_->Append(Slice(buf, kHeaderSize)); - if (s.ok()) { - s = dest_->Append(Slice(ptr, n)); - if (s.ok()) { - s = dest_->Flush(); - } - } - block_offset_ += kHeaderSize + n; - return s; -} - -} // namespace log -} // namespace leveldb diff --git a/leveldb/db/log_writer.h b/leveldb/db/log_writer.h deleted file mode 100644 index a3a954d..0000000 --- a/leveldb/db/log_writer.h +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_DB_LOG_WRITER_H_ -#define STORAGE_LEVELDB_DB_LOG_WRITER_H_ - -#include -#include "db/log_format.h" -#include "leveldb/slice.h" -#include "leveldb/status.h" - -namespace leveldb { - -class WritableFile; - -namespace log { - -class Writer { - public: - // Create a writer that will append data to "*dest". - // "*dest" must be initially empty. - // "*dest" must remain live while this Writer is in use. - explicit Writer(WritableFile* dest); - ~Writer(); - - Status AddRecord(const Slice& slice); - - private: - WritableFile* dest_; - int block_offset_; // Current offset in block - - // crc32c values for all supported record types. These are - // pre-computed to reduce the overhead of computing the crc of the - // record type stored in the header. - uint32_t type_crc_[kMaxRecordType + 1]; - - Status EmitPhysicalRecord(RecordType type, const char* ptr, size_t length); - - // No copying allowed - Writer(const Writer&); - void operator=(const Writer&); -}; - -} // namespace log -} // namespace leveldb - -#endif // STORAGE_LEVELDB_DB_LOG_WRITER_H_ diff --git a/leveldb/db/memtable.cc b/leveldb/db/memtable.cc deleted file mode 100644 index bfec0a7..0000000 --- a/leveldb/db/memtable.cc +++ /dev/null @@ -1,145 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/memtable.h" -#include "db/dbformat.h" -#include "leveldb/comparator.h" -#include "leveldb/env.h" -#include "leveldb/iterator.h" -#include "util/coding.h" - -namespace leveldb { - -static Slice GetLengthPrefixedSlice(const char* data) { - uint32_t len; - const char* p = data; - p = GetVarint32Ptr(p, p + 5, &len); // +5: we assume "p" is not corrupted - return Slice(p, len); -} - -MemTable::MemTable(const InternalKeyComparator& cmp) - : comparator_(cmp), - refs_(0), - table_(comparator_, &arena_) { -} - -MemTable::~MemTable() { - assert(refs_ == 0); -} - -size_t MemTable::ApproximateMemoryUsage() { return arena_.MemoryUsage(); } - -int MemTable::KeyComparator::operator()(const char* aptr, const char* bptr) - const { - // Internal keys are encoded as length-prefixed strings. - Slice a = GetLengthPrefixedSlice(aptr); - Slice b = GetLengthPrefixedSlice(bptr); - return comparator.Compare(a, b); -} - -// Encode a suitable internal key target for "target" and return it. -// Uses *scratch as scratch space, and the returned pointer will point -// into this scratch space. -static const char* EncodeKey(std::string* scratch, const Slice& target) { - scratch->clear(); - PutVarint32(scratch, target.size()); - scratch->append(target.data(), target.size()); - return scratch->data(); -} - -class MemTableIterator: public Iterator { - public: - explicit MemTableIterator(MemTable::Table* table) : iter_(table) { } - - virtual bool Valid() const { return iter_.Valid(); } - virtual void Seek(const Slice& k) { iter_.Seek(EncodeKey(&tmp_, k)); } - virtual void SeekToFirst() { iter_.SeekToFirst(); } - virtual void SeekToLast() { iter_.SeekToLast(); } - virtual void Next() { iter_.Next(); } - virtual void Prev() { iter_.Prev(); } - virtual Slice key() const { return GetLengthPrefixedSlice(iter_.key()); } - virtual Slice value() const { - Slice key_slice = GetLengthPrefixedSlice(iter_.key()); - return GetLengthPrefixedSlice(key_slice.data() + key_slice.size()); - } - - virtual Status status() const { return Status::OK(); } - - private: - MemTable::Table::Iterator iter_; - std::string tmp_; // For passing to EncodeKey - - // No copying allowed - MemTableIterator(const MemTableIterator&); - void operator=(const MemTableIterator&); -}; - -Iterator* MemTable::NewIterator() { - return new MemTableIterator(&table_); -} - -void MemTable::Add(SequenceNumber s, ValueType type, - const Slice& key, - const Slice& value) { - // Format of an entry is concatenation of: - // key_size : varint32 of internal_key.size() - // key bytes : char[internal_key.size()] - // value_size : varint32 of value.size() - // value bytes : char[value.size()] - size_t key_size = key.size(); - size_t val_size = value.size(); - size_t internal_key_size = key_size + 8; - const size_t encoded_len = - VarintLength(internal_key_size) + internal_key_size + - VarintLength(val_size) + val_size; - char* buf = arena_.Allocate(encoded_len); - char* p = EncodeVarint32(buf, internal_key_size); - memcpy(p, key.data(), key_size); - p += key_size; - EncodeFixed64(p, (s << 8) | type); - p += 8; - p = EncodeVarint32(p, val_size); - memcpy(p, value.data(), val_size); - assert((p + val_size) - buf == encoded_len); - table_.Insert(buf); -} - -bool MemTable::Get(const LookupKey& key, std::string* value, Status* s) { - Slice memkey = key.memtable_key(); - Table::Iterator iter(&table_); - iter.Seek(memkey.data()); - if (iter.Valid()) { - // entry format is: - // klength varint32 - // userkey char[klength] - // tag uint64 - // vlength varint32 - // value char[vlength] - // Check that it belongs to same user key. We do not check the - // sequence number since the Seek() call above should have skipped - // all entries with overly large sequence numbers. - const char* entry = iter.key(); - uint32_t key_length; - const char* key_ptr = GetVarint32Ptr(entry, entry+5, &key_length); - if (comparator_.comparator.user_comparator()->Compare( - Slice(key_ptr, key_length - 8), - key.user_key()) == 0) { - // Correct user key - const uint64_t tag = DecodeFixed64(key_ptr + key_length - 8); - switch (static_cast(tag & 0xff)) { - case kTypeValue: { - Slice v = GetLengthPrefixedSlice(key_ptr + key_length); - value->assign(v.data(), v.size()); - return true; - } - case kTypeDeletion: - *s = Status::NotFound(Slice()); - return true; - } - } - } - return false; -} - -} // namespace leveldb diff --git a/leveldb/db/memtable.h b/leveldb/db/memtable.h deleted file mode 100644 index 92e90bb..0000000 --- a/leveldb/db/memtable.h +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_DB_MEMTABLE_H_ -#define STORAGE_LEVELDB_DB_MEMTABLE_H_ - -#include -#include "leveldb/db.h" -#include "db/dbformat.h" -#include "db/skiplist.h" -#include "util/arena.h" - -namespace leveldb { - -class InternalKeyComparator; -class Mutex; -class MemTableIterator; - -class MemTable { - public: - // MemTables are reference counted. The initial reference count - // is zero and the caller must call Ref() at least once. - explicit MemTable(const InternalKeyComparator& comparator); - - // Increase reference count. - void Ref() { ++refs_; } - - // Drop reference count. Delete if no more references exist. - void Unref() { - --refs_; - assert(refs_ >= 0); - if (refs_ <= 0) { - delete this; - } - } - - // Returns an estimate of the number of bytes of data in use by this - // data structure. - // - // REQUIRES: external synchronization to prevent simultaneous - // operations on the same MemTable. - size_t ApproximateMemoryUsage(); - - // Return an iterator that yields the contents of the memtable. - // - // The caller must ensure that the underlying MemTable remains live - // while the returned iterator is live. The keys returned by this - // iterator are internal keys encoded by AppendInternalKey in the - // db/format.{h,cc} module. - Iterator* NewIterator(); - - // Add an entry into memtable that maps key to value at the - // specified sequence number and with the specified type. - // Typically value will be empty if type==kTypeDeletion. - void Add(SequenceNumber seq, ValueType type, - const Slice& key, - const Slice& value); - - // If memtable contains a value for key, store it in *value and return true. - // If memtable contains a deletion for key, store a NotFound() error - // in *status and return true. - // Else, return false. - bool Get(const LookupKey& key, std::string* value, Status* s); - - private: - ~MemTable(); // Private since only Unref() should be used to delete it - - struct KeyComparator { - const InternalKeyComparator comparator; - explicit KeyComparator(const InternalKeyComparator& c) : comparator(c) { } - int operator()(const char* a, const char* b) const; - }; - friend class MemTableIterator; - friend class MemTableBackwardIterator; - - typedef SkipList Table; - - KeyComparator comparator_; - int refs_; - Arena arena_; - Table table_; - - // No copying allowed - MemTable(const MemTable&); - void operator=(const MemTable&); -}; - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_DB_MEMTABLE_H_ diff --git a/leveldb/db/repair.cc b/leveldb/db/repair.cc deleted file mode 100644 index 022d52f..0000000 --- a/leveldb/db/repair.cc +++ /dev/null @@ -1,389 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// We recover the contents of the descriptor from the other files we find. -// (1) Any log files are first converted to tables -// (2) We scan every table to compute -// (a) smallest/largest for the table -// (b) largest sequence number in the table -// (3) We generate descriptor contents: -// - log number is set to zero -// - next-file-number is set to 1 + largest file number we found -// - last-sequence-number is set to largest sequence# found across -// all tables (see 2c) -// - compaction pointers are cleared -// - every table file is added at level 0 -// -// Possible optimization 1: -// (a) Compute total size and use to pick appropriate max-level M -// (b) Sort tables by largest sequence# in the table -// (c) For each table: if it overlaps earlier table, place in level-0, -// else place in level-M. -// Possible optimization 2: -// Store per-table metadata (smallest, largest, largest-seq#, ...) -// in the table's meta section to speed up ScanTable. - -#include "db/builder.h" -#include "db/db_impl.h" -#include "db/dbformat.h" -#include "db/filename.h" -#include "db/log_reader.h" -#include "db/log_writer.h" -#include "db/memtable.h" -#include "db/table_cache.h" -#include "db/version_edit.h" -#include "db/write_batch_internal.h" -#include "leveldb/comparator.h" -#include "leveldb/db.h" -#include "leveldb/env.h" - -namespace leveldb { - -namespace { - -class Repairer { - public: - Repairer(const std::string& dbname, const Options& options) - : dbname_(dbname), - env_(options.env), - icmp_(options.comparator), - ipolicy_(options.filter_policy), - options_(SanitizeOptions(dbname, &icmp_, &ipolicy_, options)), - owns_info_log_(options_.info_log != options.info_log), - owns_cache_(options_.block_cache != options.block_cache), - next_file_number_(1) { - // TableCache can be small since we expect each table to be opened once. - table_cache_ = new TableCache(dbname_, &options_, 10); - } - - ~Repairer() { - delete table_cache_; - if (owns_info_log_) { - delete options_.info_log; - } - if (owns_cache_) { - delete options_.block_cache; - } - } - - Status Run() { - Status status = FindFiles(); - if (status.ok()) { - ConvertLogFilesToTables(); - ExtractMetaData(); - status = WriteDescriptor(); - } - if (status.ok()) { - unsigned long long bytes = 0; - for (size_t i = 0; i < tables_.size(); i++) { - bytes += tables_[i].meta.file_size; - } - Log(options_.info_log, - "**** Repaired leveldb %s; " - "recovered %d files; %llu bytes. " - "Some data may have been lost. " - "****", - dbname_.c_str(), - static_cast(tables_.size()), - bytes); - } - return status; - } - - private: - struct TableInfo { - FileMetaData meta; - SequenceNumber max_sequence; - }; - - std::string const dbname_; - Env* const env_; - InternalKeyComparator const icmp_; - InternalFilterPolicy const ipolicy_; - Options const options_; - bool owns_info_log_; - bool owns_cache_; - TableCache* table_cache_; - VersionEdit edit_; - - std::vector manifests_; - std::vector table_numbers_; - std::vector logs_; - std::vector tables_; - uint64_t next_file_number_; - - Status FindFiles() { - std::vector filenames; - Status status = env_->GetChildren(dbname_, &filenames); - if (!status.ok()) { - return status; - } - if (filenames.empty()) { - return Status::IOError(dbname_, "repair found no files"); - } - - uint64_t number; - FileType type; - for (size_t i = 0; i < filenames.size(); i++) { - if (ParseFileName(filenames[i], &number, &type)) { - if (type == kDescriptorFile) { - manifests_.push_back(filenames[i]); - } else { - if (number + 1 > next_file_number_) { - next_file_number_ = number + 1; - } - if (type == kLogFile) { - logs_.push_back(number); - } else if (type == kTableFile) { - table_numbers_.push_back(number); - } else { - // Ignore other files - } - } - } - } - return status; - } - - void ConvertLogFilesToTables() { - for (size_t i = 0; i < logs_.size(); i++) { - std::string logname = LogFileName(dbname_, logs_[i]); - Status status = ConvertLogToTable(logs_[i]); - if (!status.ok()) { - Log(options_.info_log, "Log #%llu: ignoring conversion error: %s", - (unsigned long long) logs_[i], - status.ToString().c_str()); - } - ArchiveFile(logname); - } - } - - Status ConvertLogToTable(uint64_t log) { - struct LogReporter : public log::Reader::Reporter { - Env* env; - Logger* info_log; - uint64_t lognum; - virtual void Corruption(size_t bytes, const Status& s) { - // We print error messages for corruption, but continue repairing. - Log(info_log, "Log #%llu: dropping %d bytes; %s", - (unsigned long long) lognum, - static_cast(bytes), - s.ToString().c_str()); - } - }; - - // Open the log file - std::string logname = LogFileName(dbname_, log); - SequentialFile* lfile; - Status status = env_->NewSequentialFile(logname, &lfile); - if (!status.ok()) { - return status; - } - - // Create the log reader. - LogReporter reporter; - reporter.env = env_; - reporter.info_log = options_.info_log; - reporter.lognum = log; - // We intentially make log::Reader do checksumming so that - // corruptions cause entire commits to be skipped instead of - // propagating bad information (like overly large sequence - // numbers). - log::Reader reader(lfile, &reporter, false/*do not checksum*/, - 0/*initial_offset*/); - - // Read all the records and add to a memtable - std::string scratch; - Slice record; - WriteBatch batch; - MemTable* mem = new MemTable(icmp_); - mem->Ref(); - int counter = 0; - while (reader.ReadRecord(&record, &scratch)) { - if (record.size() < 12) { - reporter.Corruption( - record.size(), Status::Corruption("log record too small")); - continue; - } - WriteBatchInternal::SetContents(&batch, record); - status = WriteBatchInternal::InsertInto(&batch, mem); - if (status.ok()) { - counter += WriteBatchInternal::Count(&batch); - } else { - Log(options_.info_log, "Log #%llu: ignoring %s", - (unsigned long long) log, - status.ToString().c_str()); - status = Status::OK(); // Keep going with rest of file - } - } - delete lfile; - - // Do not record a version edit for this conversion to a Table - // since ExtractMetaData() will also generate edits. - FileMetaData meta; - meta.number = next_file_number_++; - Iterator* iter = mem->NewIterator(); - status = BuildTable(dbname_, env_, options_, table_cache_, iter, &meta); - delete iter; - mem->Unref(); - mem = NULL; - if (status.ok()) { - if (meta.file_size > 0) { - table_numbers_.push_back(meta.number); - } - } - Log(options_.info_log, "Log #%llu: %d ops saved to Table #%llu %s", - (unsigned long long) log, - counter, - (unsigned long long) meta.number, - status.ToString().c_str()); - return status; - } - - void ExtractMetaData() { - std::vector kept; - for (size_t i = 0; i < table_numbers_.size(); i++) { - TableInfo t; - t.meta.number = table_numbers_[i]; - Status status = ScanTable(&t); - if (!status.ok()) { - std::string fname = TableFileName(dbname_, table_numbers_[i]); - Log(options_.info_log, "Table #%llu: ignoring %s", - (unsigned long long) table_numbers_[i], - status.ToString().c_str()); - ArchiveFile(fname); - } else { - tables_.push_back(t); - } - } - } - - Status ScanTable(TableInfo* t) { - std::string fname = TableFileName(dbname_, t->meta.number); - int counter = 0; - Status status = env_->GetFileSize(fname, &t->meta.file_size); - if (status.ok()) { - Iterator* iter = table_cache_->NewIterator( - ReadOptions(), t->meta.number, t->meta.file_size); - bool empty = true; - ParsedInternalKey parsed; - t->max_sequence = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - Slice key = iter->key(); - if (!ParseInternalKey(key, &parsed)) { - Log(options_.info_log, "Table #%llu: unparsable key %s", - (unsigned long long) t->meta.number, - EscapeString(key).c_str()); - continue; - } - - counter++; - if (empty) { - empty = false; - t->meta.smallest.DecodeFrom(key); - } - t->meta.largest.DecodeFrom(key); - if (parsed.sequence > t->max_sequence) { - t->max_sequence = parsed.sequence; - } - } - if (!iter->status().ok()) { - status = iter->status(); - } - delete iter; - } - Log(options_.info_log, "Table #%llu: %d entries %s", - (unsigned long long) t->meta.number, - counter, - status.ToString().c_str()); - return status; - } - - Status WriteDescriptor() { - std::string tmp = TempFileName(dbname_, 1); - WritableFile* file; - Status status = env_->NewWritableFile(tmp, &file); - if (!status.ok()) { - return status; - } - - SequenceNumber max_sequence = 0; - for (size_t i = 0; i < tables_.size(); i++) { - if (max_sequence < tables_[i].max_sequence) { - max_sequence = tables_[i].max_sequence; - } - } - - edit_.SetComparatorName(icmp_.user_comparator()->Name()); - edit_.SetLogNumber(0); - edit_.SetNextFile(next_file_number_); - edit_.SetLastSequence(max_sequence); - - for (size_t i = 0; i < tables_.size(); i++) { - // TODO(opt): separate out into multiple levels - const TableInfo& t = tables_[i]; - edit_.AddFile(0, t.meta.number, t.meta.file_size, - t.meta.smallest, t.meta.largest); - } - - //fprintf(stderr, "NewDescriptor:\n%s\n", edit_.DebugString().c_str()); - { - log::Writer log(file); - std::string record; - edit_.EncodeTo(&record); - status = log.AddRecord(record); - } - if (status.ok()) { - status = file->Close(); - } - delete file; - file = NULL; - - if (!status.ok()) { - env_->DeleteFile(tmp); - } else { - // Discard older manifests - for (size_t i = 0; i < manifests_.size(); i++) { - ArchiveFile(dbname_ + "/" + manifests_[i]); - } - - // Install new manifest - status = env_->RenameFile(tmp, DescriptorFileName(dbname_, 1)); - if (status.ok()) { - status = SetCurrentFile(env_, dbname_, 1); - } else { - env_->DeleteFile(tmp); - } - } - return status; - } - - void ArchiveFile(const std::string& fname) { - // Move into another directory. E.g., for - // dir/foo - // rename to - // dir/lost/foo - const char* slash = strrchr(fname.c_str(), '/'); - std::string new_dir; - if (slash != NULL) { - new_dir.assign(fname.data(), slash - fname.data()); - } - new_dir.append("/lost"); - env_->CreateDir(new_dir); // Ignore error - std::string new_file = new_dir; - new_file.append("/"); - new_file.append((slash == NULL) ? fname.c_str() : slash + 1); - Status s = env_->RenameFile(fname, new_file); - Log(options_.info_log, "Archiving %s: %s\n", - fname.c_str(), s.ToString().c_str()); - } -}; -} // namespace - -Status RepairDB(const std::string& dbname, const Options& options) { - Repairer repairer(dbname, options); - return repairer.Run(); -} - -} // namespace leveldb diff --git a/leveldb/db/skiplist.h b/leveldb/db/skiplist.h deleted file mode 100644 index af85be6..0000000 --- a/leveldb/db/skiplist.h +++ /dev/null @@ -1,379 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// Thread safety -// ------------- -// -// Writes require external synchronization, most likely a mutex. -// Reads require a guarantee that the SkipList will not be destroyed -// while the read is in progress. Apart from that, reads progress -// without any internal locking or synchronization. -// -// Invariants: -// -// (1) Allocated nodes are never deleted until the SkipList is -// destroyed. This is trivially guaranteed by the code since we -// never delete any skip list nodes. -// -// (2) The contents of a Node except for the next/prev pointers are -// immutable after the Node has been linked into the SkipList. -// Only Insert() modifies the list, and it is careful to initialize -// a node and use release-stores to publish the nodes in one or -// more lists. -// -// ... prev vs. next pointer ordering ... - -#include -#include -#include "port/port.h" -#include "util/arena.h" -#include "util/random.h" - -namespace leveldb { - -class Arena; - -template -class SkipList { - private: - struct Node; - - public: - // Create a new SkipList object that will use "cmp" for comparing keys, - // and will allocate memory using "*arena". Objects allocated in the arena - // must remain allocated for the lifetime of the skiplist object. - explicit SkipList(Comparator cmp, Arena* arena); - - // Insert key into the list. - // REQUIRES: nothing that compares equal to key is currently in the list. - void Insert(const Key& key); - - // Returns true iff an entry that compares equal to key is in the list. - bool Contains(const Key& key) const; - - // Iteration over the contents of a skip list - class Iterator { - public: - // Initialize an iterator over the specified list. - // The returned iterator is not valid. - explicit Iterator(const SkipList* list); - - // Returns true iff the iterator is positioned at a valid node. - bool Valid() const; - - // Returns the key at the current position. - // REQUIRES: Valid() - const Key& key() const; - - // Advances to the next position. - // REQUIRES: Valid() - void Next(); - - // Advances to the previous position. - // REQUIRES: Valid() - void Prev(); - - // Advance to the first entry with a key >= target - void Seek(const Key& target); - - // Position at the first entry in list. - // Final state of iterator is Valid() iff list is not empty. - void SeekToFirst(); - - // Position at the last entry in list. - // Final state of iterator is Valid() iff list is not empty. - void SeekToLast(); - - private: - const SkipList* list_; - Node* node_; - // Intentionally copyable - }; - - private: - enum { kMaxHeight = 12 }; - - // Immutable after construction - Comparator const compare_; - Arena* const arena_; // Arena used for allocations of nodes - - Node* const head_; - - // Modified only by Insert(). Read racily by readers, but stale - // values are ok. - port::AtomicPointer max_height_; // Height of the entire list - - inline int GetMaxHeight() const { - return static_cast( - reinterpret_cast(max_height_.NoBarrier_Load())); - } - - // Read/written only by Insert(). - Random rnd_; - - Node* NewNode(const Key& key, int height); - int RandomHeight(); - bool Equal(const Key& a, const Key& b) const { return (compare_(a, b) == 0); } - - // Return true if key is greater than the data stored in "n" - bool KeyIsAfterNode(const Key& key, Node* n) const; - - // Return the earliest node that comes at or after key. - // Return NULL if there is no such node. - // - // If prev is non-NULL, fills prev[level] with pointer to previous - // node at "level" for every level in [0..max_height_-1]. - Node* FindGreaterOrEqual(const Key& key, Node** prev) const; - - // Return the latest node with a key < key. - // Return head_ if there is no such node. - Node* FindLessThan(const Key& key) const; - - // Return the last node in the list. - // Return head_ if list is empty. - Node* FindLast() const; - - // No copying allowed - SkipList(const SkipList&); - void operator=(const SkipList&); -}; - -// Implementation details follow -template -struct SkipList::Node { - explicit Node(const Key& k) : key(k) { } - - Key const key; - - // Accessors/mutators for links. Wrapped in methods so we can - // add the appropriate barriers as necessary. - Node* Next(int n) { - assert(n >= 0); - // Use an 'acquire load' so that we observe a fully initialized - // version of the returned Node. - return reinterpret_cast(next_[n].Acquire_Load()); - } - void SetNext(int n, Node* x) { - assert(n >= 0); - // Use a 'release store' so that anybody who reads through this - // pointer observes a fully initialized version of the inserted node. - next_[n].Release_Store(x); - } - - // No-barrier variants that can be safely used in a few locations. - Node* NoBarrier_Next(int n) { - assert(n >= 0); - return reinterpret_cast(next_[n].NoBarrier_Load()); - } - void NoBarrier_SetNext(int n, Node* x) { - assert(n >= 0); - next_[n].NoBarrier_Store(x); - } - - private: - // Array of length equal to the node height. next_[0] is lowest level link. - port::AtomicPointer next_[1]; -}; - -template -typename SkipList::Node* -SkipList::NewNode(const Key& key, int height) { - char* mem = arena_->AllocateAligned( - sizeof(Node) + sizeof(port::AtomicPointer) * (height - 1)); - return new (mem) Node(key); -} - -template -inline SkipList::Iterator::Iterator(const SkipList* list) { - list_ = list; - node_ = NULL; -} - -template -inline bool SkipList::Iterator::Valid() const { - return node_ != NULL; -} - -template -inline const Key& SkipList::Iterator::key() const { - assert(Valid()); - return node_->key; -} - -template -inline void SkipList::Iterator::Next() { - assert(Valid()); - node_ = node_->Next(0); -} - -template -inline void SkipList::Iterator::Prev() { - // Instead of using explicit "prev" links, we just search for the - // last node that falls before key. - assert(Valid()); - node_ = list_->FindLessThan(node_->key); - if (node_ == list_->head_) { - node_ = NULL; - } -} - -template -inline void SkipList::Iterator::Seek(const Key& target) { - node_ = list_->FindGreaterOrEqual(target, NULL); -} - -template -inline void SkipList::Iterator::SeekToFirst() { - node_ = list_->head_->Next(0); -} - -template -inline void SkipList::Iterator::SeekToLast() { - node_ = list_->FindLast(); - if (node_ == list_->head_) { - node_ = NULL; - } -} - -template -int SkipList::RandomHeight() { - // Increase height with probability 1 in kBranching - static const unsigned int kBranching = 4; - int height = 1; - while (height < kMaxHeight && ((rnd_.Next() % kBranching) == 0)) { - height++; - } - assert(height > 0); - assert(height <= kMaxHeight); - return height; -} - -template -bool SkipList::KeyIsAfterNode(const Key& key, Node* n) const { - // NULL n is considered infinite - return (n != NULL) && (compare_(n->key, key) < 0); -} - -template -typename SkipList::Node* SkipList::FindGreaterOrEqual(const Key& key, Node** prev) - const { - Node* x = head_; - int level = GetMaxHeight() - 1; - while (true) { - Node* next = x->Next(level); - if (KeyIsAfterNode(key, next)) { - // Keep searching in this list - x = next; - } else { - if (prev != NULL) prev[level] = x; - if (level == 0) { - return next; - } else { - // Switch to next list - level--; - } - } - } -} - -template -typename SkipList::Node* -SkipList::FindLessThan(const Key& key) const { - Node* x = head_; - int level = GetMaxHeight() - 1; - while (true) { - assert(x == head_ || compare_(x->key, key) < 0); - Node* next = x->Next(level); - if (next == NULL || compare_(next->key, key) >= 0) { - if (level == 0) { - return x; - } else { - // Switch to next list - level--; - } - } else { - x = next; - } - } -} - -template -typename SkipList::Node* SkipList::FindLast() - const { - Node* x = head_; - int level = GetMaxHeight() - 1; - while (true) { - Node* next = x->Next(level); - if (next == NULL) { - if (level == 0) { - return x; - } else { - // Switch to next list - level--; - } - } else { - x = next; - } - } -} - -template -SkipList::SkipList(Comparator cmp, Arena* arena) - : compare_(cmp), - arena_(arena), - head_(NewNode(0 /* any key will do */, kMaxHeight)), - max_height_(reinterpret_cast(1)), - rnd_(0xdeadbeef) { - for (int i = 0; i < kMaxHeight; i++) { - head_->SetNext(i, NULL); - } -} - -template -void SkipList::Insert(const Key& key) { - // TODO(opt): We can use a barrier-free variant of FindGreaterOrEqual() - // here since Insert() is externally synchronized. - Node* prev[kMaxHeight]; - Node* x = FindGreaterOrEqual(key, prev); - - // Our data structure does not allow duplicate insertion - assert(x == NULL || !Equal(key, x->key)); - - int height = RandomHeight(); - if (height > GetMaxHeight()) { - for (int i = GetMaxHeight(); i < height; i++) { - prev[i] = head_; - } - //fprintf(stderr, "Change height from %d to %d\n", max_height_, height); - - // It is ok to mutate max_height_ without any synchronization - // with concurrent readers. A concurrent reader that observes - // the new value of max_height_ will see either the old value of - // new level pointers from head_ (NULL), or a new value set in - // the loop below. In the former case the reader will - // immediately drop to the next level since NULL sorts after all - // keys. In the latter case the reader will use the new node. - max_height_.NoBarrier_Store(reinterpret_cast(height)); - } - - x = NewNode(key, height); - for (int i = 0; i < height; i++) { - // NoBarrier_SetNext() suffices since we will add a barrier when - // we publish a pointer to "x" in prev[i]. - x->NoBarrier_SetNext(i, prev[i]->NoBarrier_Next(i)); - prev[i]->SetNext(i, x); - } -} - -template -bool SkipList::Contains(const Key& key) const { - Node* x = FindGreaterOrEqual(key, NULL); - if (x != NULL && Equal(key, x->key)) { - return true; - } else { - return false; - } -} - -} // namespace leveldb diff --git a/leveldb/db/skiplist_test.cc b/leveldb/db/skiplist_test.cc deleted file mode 100644 index c78f4b4..0000000 --- a/leveldb/db/skiplist_test.cc +++ /dev/null @@ -1,378 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/skiplist.h" -#include -#include "leveldb/env.h" -#include "util/arena.h" -#include "util/hash.h" -#include "util/random.h" -#include "util/testharness.h" - -namespace leveldb { - -typedef uint64_t Key; - -struct Comparator { - int operator()(const Key& a, const Key& b) const { - if (a < b) { - return -1; - } else if (a > b) { - return +1; - } else { - return 0; - } - } -}; - -class SkipTest { }; - -TEST(SkipTest, Empty) { - Arena arena; - Comparator cmp; - SkipList list(cmp, &arena); - ASSERT_TRUE(!list.Contains(10)); - - SkipList::Iterator iter(&list); - ASSERT_TRUE(!iter.Valid()); - iter.SeekToFirst(); - ASSERT_TRUE(!iter.Valid()); - iter.Seek(100); - ASSERT_TRUE(!iter.Valid()); - iter.SeekToLast(); - ASSERT_TRUE(!iter.Valid()); -} - -TEST(SkipTest, InsertAndLookup) { - const int N = 2000; - const int R = 5000; - Random rnd(1000); - std::set keys; - Arena arena; - Comparator cmp; - SkipList list(cmp, &arena); - for (int i = 0; i < N; i++) { - Key key = rnd.Next() % R; - if (keys.insert(key).second) { - list.Insert(key); - } - } - - for (int i = 0; i < R; i++) { - if (list.Contains(i)) { - ASSERT_EQ(keys.count(i), 1); - } else { - ASSERT_EQ(keys.count(i), 0); - } - } - - // Simple iterator tests - { - SkipList::Iterator iter(&list); - ASSERT_TRUE(!iter.Valid()); - - iter.Seek(0); - ASSERT_TRUE(iter.Valid()); - ASSERT_EQ(*(keys.begin()), iter.key()); - - iter.SeekToFirst(); - ASSERT_TRUE(iter.Valid()); - ASSERT_EQ(*(keys.begin()), iter.key()); - - iter.SeekToLast(); - ASSERT_TRUE(iter.Valid()); - ASSERT_EQ(*(keys.rbegin()), iter.key()); - } - - // Forward iteration test - for (int i = 0; i < R; i++) { - SkipList::Iterator iter(&list); - iter.Seek(i); - - // Compare against model iterator - std::set::iterator model_iter = keys.lower_bound(i); - for (int j = 0; j < 3; j++) { - if (model_iter == keys.end()) { - ASSERT_TRUE(!iter.Valid()); - break; - } else { - ASSERT_TRUE(iter.Valid()); - ASSERT_EQ(*model_iter, iter.key()); - ++model_iter; - iter.Next(); - } - } - } - - // Backward iteration test - { - SkipList::Iterator iter(&list); - iter.SeekToLast(); - - // Compare against model iterator - for (std::set::reverse_iterator model_iter = keys.rbegin(); - model_iter != keys.rend(); - ++model_iter) { - ASSERT_TRUE(iter.Valid()); - ASSERT_EQ(*model_iter, iter.key()); - iter.Prev(); - } - ASSERT_TRUE(!iter.Valid()); - } -} - -// We want to make sure that with a single writer and multiple -// concurrent readers (with no synchronization other than when a -// reader's iterator is created), the reader always observes all the -// data that was present in the skip list when the iterator was -// constructor. Because insertions are happening concurrently, we may -// also observe new values that were inserted since the iterator was -// constructed, but we should never miss any values that were present -// at iterator construction time. -// -// We generate multi-part keys: -// -// where: -// key is in range [0..K-1] -// gen is a generation number for key -// hash is hash(key,gen) -// -// The insertion code picks a random key, sets gen to be 1 + the last -// generation number inserted for that key, and sets hash to Hash(key,gen). -// -// At the beginning of a read, we snapshot the last inserted -// generation number for each key. We then iterate, including random -// calls to Next() and Seek(). For every key we encounter, we -// check that it is either expected given the initial snapshot or has -// been concurrently added since the iterator started. -class ConcurrentTest { - private: - static const uint32_t K = 4; - - static uint64_t key(Key key) { return (key >> 40); } - static uint64_t gen(Key key) { return (key >> 8) & 0xffffffffu; } - static uint64_t hash(Key key) { return key & 0xff; } - - static uint64_t HashNumbers(uint64_t k, uint64_t g) { - uint64_t data[2] = { k, g }; - return Hash(reinterpret_cast(data), sizeof(data), 0); - } - - static Key MakeKey(uint64_t k, uint64_t g) { - assert(sizeof(Key) == sizeof(uint64_t)); - assert(k <= K); // We sometimes pass K to seek to the end of the skiplist - assert(g <= 0xffffffffu); - return ((k << 40) | (g << 8) | (HashNumbers(k, g) & 0xff)); - } - - static bool IsValidKey(Key k) { - return hash(k) == (HashNumbers(key(k), gen(k)) & 0xff); - } - - static Key RandomTarget(Random* rnd) { - switch (rnd->Next() % 10) { - case 0: - // Seek to beginning - return MakeKey(0, 0); - case 1: - // Seek to end - return MakeKey(K, 0); - default: - // Seek to middle - return MakeKey(rnd->Next() % K, 0); - } - } - - // Per-key generation - struct State { - port::AtomicPointer generation[K]; - void Set(int k, intptr_t v) { - generation[k].Release_Store(reinterpret_cast(v)); - } - intptr_t Get(int k) { - return reinterpret_cast(generation[k].Acquire_Load()); - } - - State() { - for (int k = 0; k < K; k++) { - Set(k, 0); - } - } - }; - - // Current state of the test - State current_; - - Arena arena_; - - // SkipList is not protected by mu_. We just use a single writer - // thread to modify it. - SkipList list_; - - public: - ConcurrentTest() : list_(Comparator(), &arena_) { } - - // REQUIRES: External synchronization - void WriteStep(Random* rnd) { - const uint32_t k = rnd->Next() % K; - const intptr_t g = current_.Get(k) + 1; - const Key key = MakeKey(k, g); - list_.Insert(key); - current_.Set(k, g); - } - - void ReadStep(Random* rnd) { - // Remember the initial committed state of the skiplist. - State initial_state; - for (int k = 0; k < K; k++) { - initial_state.Set(k, current_.Get(k)); - } - - Key pos = RandomTarget(rnd); - SkipList::Iterator iter(&list_); - iter.Seek(pos); - while (true) { - Key current; - if (!iter.Valid()) { - current = MakeKey(K, 0); - } else { - current = iter.key(); - ASSERT_TRUE(IsValidKey(current)) << current; - } - ASSERT_LE(pos, current) << "should not go backwards"; - - // Verify that everything in [pos,current) was not present in - // initial_state. - while (pos < current) { - ASSERT_LT(key(pos), K) << pos; - - // Note that generation 0 is never inserted, so it is ok if - // <*,0,*> is missing. - ASSERT_TRUE((gen(pos) == 0) || - (gen(pos) > initial_state.Get(key(pos))) - ) << "key: " << key(pos) - << "; gen: " << gen(pos) - << "; initgen: " - << initial_state.Get(key(pos)); - - // Advance to next key in the valid key space - if (key(pos) < key(current)) { - pos = MakeKey(key(pos) + 1, 0); - } else { - pos = MakeKey(key(pos), gen(pos) + 1); - } - } - - if (!iter.Valid()) { - break; - } - - if (rnd->Next() % 2) { - iter.Next(); - pos = MakeKey(key(pos), gen(pos) + 1); - } else { - Key new_target = RandomTarget(rnd); - if (new_target > pos) { - pos = new_target; - iter.Seek(new_target); - } - } - } - } -}; -const uint32_t ConcurrentTest::K; - -// Simple test that does single-threaded testing of the ConcurrentTest -// scaffolding. -TEST(SkipTest, ConcurrentWithoutThreads) { - ConcurrentTest test; - Random rnd(test::RandomSeed()); - for (int i = 0; i < 10000; i++) { - test.ReadStep(&rnd); - test.WriteStep(&rnd); - } -} - -class TestState { - public: - ConcurrentTest t_; - int seed_; - port::AtomicPointer quit_flag_; - - enum ReaderState { - STARTING, - RUNNING, - DONE - }; - - explicit TestState(int s) - : seed_(s), - quit_flag_(NULL), - state_(STARTING), - state_cv_(&mu_) {} - - void Wait(ReaderState s) { - mu_.Lock(); - while (state_ != s) { - state_cv_.Wait(); - } - mu_.Unlock(); - } - - void Change(ReaderState s) { - mu_.Lock(); - state_ = s; - state_cv_.Signal(); - mu_.Unlock(); - } - - private: - port::Mutex mu_; - ReaderState state_; - port::CondVar state_cv_; -}; - -static void ConcurrentReader(void* arg) { - TestState* state = reinterpret_cast(arg); - Random rnd(state->seed_); - int64_t reads = 0; - state->Change(TestState::RUNNING); - while (!state->quit_flag_.Acquire_Load()) { - state->t_.ReadStep(&rnd); - ++reads; - } - state->Change(TestState::DONE); -} - -static void RunConcurrent(int run) { - const int seed = test::RandomSeed() + (run * 100); - Random rnd(seed); - const int N = 1000; - const int kSize = 1000; - for (int i = 0; i < N; i++) { - if ((i % 100) == 0) { - fprintf(stderr, "Run %d of %d\n", i, N); - } - TestState state(seed + 1); - Env::Default()->Schedule(ConcurrentReader, &state); - state.Wait(TestState::RUNNING); - for (int i = 0; i < kSize; i++) { - state.t_.WriteStep(&rnd); - } - state.quit_flag_.Release_Store(&state); // Any non-NULL arg will do - state.Wait(TestState::DONE); - } -} - -TEST(SkipTest, Concurrent1) { RunConcurrent(1); } -TEST(SkipTest, Concurrent2) { RunConcurrent(2); } -TEST(SkipTest, Concurrent3) { RunConcurrent(3); } -TEST(SkipTest, Concurrent4) { RunConcurrent(4); } -TEST(SkipTest, Concurrent5) { RunConcurrent(5); } - -} // namespace leveldb - -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} diff --git a/leveldb/db/snapshot.h b/leveldb/db/snapshot.h deleted file mode 100644 index e7f8fd2..0000000 --- a/leveldb/db/snapshot.h +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_DB_SNAPSHOT_H_ -#define STORAGE_LEVELDB_DB_SNAPSHOT_H_ - -#include "leveldb/db.h" - -namespace leveldb { - -class SnapshotList; - -// Snapshots are kept in a doubly-linked list in the DB. -// Each SnapshotImpl corresponds to a particular sequence number. -class SnapshotImpl : public Snapshot { - public: - SequenceNumber number_; // const after creation - - private: - friend class SnapshotList; - - // SnapshotImpl is kept in a doubly-linked circular list - SnapshotImpl* prev_; - SnapshotImpl* next_; - - SnapshotList* list_; // just for sanity checks -}; - -class SnapshotList { - public: - SnapshotList() { - list_.prev_ = &list_; - list_.next_ = &list_; - } - - bool empty() const { return list_.next_ == &list_; } - SnapshotImpl* oldest() const { assert(!empty()); return list_.next_; } - SnapshotImpl* newest() const { assert(!empty()); return list_.prev_; } - - const SnapshotImpl* New(SequenceNumber seq) { - SnapshotImpl* s = new SnapshotImpl; - s->number_ = seq; - s->list_ = this; - s->next_ = &list_; - s->prev_ = list_.prev_; - s->prev_->next_ = s; - s->next_->prev_ = s; - return s; - } - - void Delete(const SnapshotImpl* s) { - assert(s->list_ == this); - s->prev_->next_ = s->next_; - s->next_->prev_ = s->prev_; - delete s; - } - - private: - // Dummy head of doubly-linked list of snapshots - SnapshotImpl list_; -}; - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_DB_SNAPSHOT_H_ diff --git a/leveldb/db/table_cache.cc b/leveldb/db/table_cache.cc deleted file mode 100644 index 497db27..0000000 --- a/leveldb/db/table_cache.cc +++ /dev/null @@ -1,121 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/table_cache.h" - -#include "db/filename.h" -#include "leveldb/env.h" -#include "leveldb/table.h" -#include "util/coding.h" - -namespace leveldb { - -struct TableAndFile { - RandomAccessFile* file; - Table* table; -}; - -static void DeleteEntry(const Slice& key, void* value) { - TableAndFile* tf = reinterpret_cast(value); - delete tf->table; - delete tf->file; - delete tf; -} - -static void UnrefEntry(void* arg1, void* arg2) { - Cache* cache = reinterpret_cast(arg1); - Cache::Handle* h = reinterpret_cast(arg2); - cache->Release(h); -} - -TableCache::TableCache(const std::string& dbname, - const Options* options, - int entries) - : env_(options->env), - dbname_(dbname), - options_(options), - cache_(NewLRUCache(entries)) { -} - -TableCache::~TableCache() { - delete cache_; -} - -Status TableCache::FindTable(uint64_t file_number, uint64_t file_size, - Cache::Handle** handle) { - Status s; - char buf[sizeof(file_number)]; - EncodeFixed64(buf, file_number); - Slice key(buf, sizeof(buf)); - *handle = cache_->Lookup(key); - if (*handle == NULL) { - std::string fname = TableFileName(dbname_, file_number); - RandomAccessFile* file = NULL; - Table* table = NULL; - s = env_->NewRandomAccessFile(fname, &file); - if (s.ok()) { - s = Table::Open(*options_, file, file_size, &table); - } - - if (!s.ok()) { - assert(table == NULL); - delete file; - // We do not cache error results so that if the error is transient, - // or somebody repairs the file, we recover automatically. - } else { - TableAndFile* tf = new TableAndFile; - tf->file = file; - tf->table = table; - *handle = cache_->Insert(key, tf, 1, &DeleteEntry); - } - } - return s; -} - -Iterator* TableCache::NewIterator(const ReadOptions& options, - uint64_t file_number, - uint64_t file_size, - Table** tableptr) { - if (tableptr != NULL) { - *tableptr = NULL; - } - - Cache::Handle* handle = NULL; - Status s = FindTable(file_number, file_size, &handle); - if (!s.ok()) { - return NewErrorIterator(s); - } - - Table* table = reinterpret_cast(cache_->Value(handle))->table; - Iterator* result = table->NewIterator(options); - result->RegisterCleanup(&UnrefEntry, cache_, handle); - if (tableptr != NULL) { - *tableptr = table; - } - return result; -} - -Status TableCache::Get(const ReadOptions& options, - uint64_t file_number, - uint64_t file_size, - const Slice& k, - void* arg, - void (*saver)(void*, const Slice&, const Slice&)) { - Cache::Handle* handle = NULL; - Status s = FindTable(file_number, file_size, &handle); - if (s.ok()) { - Table* t = reinterpret_cast(cache_->Value(handle))->table; - s = t->InternalGet(options, k, arg, saver); - cache_->Release(handle); - } - return s; -} - -void TableCache::Evict(uint64_t file_number) { - char buf[sizeof(file_number)]; - EncodeFixed64(buf, file_number); - cache_->Erase(Slice(buf, sizeof(buf))); -} - -} // namespace leveldb diff --git a/leveldb/db/table_cache.h b/leveldb/db/table_cache.h deleted file mode 100644 index 8cf4aaf..0000000 --- a/leveldb/db/table_cache.h +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// Thread-safe (provides internal synchronization) - -#ifndef STORAGE_LEVELDB_DB_TABLE_CACHE_H_ -#define STORAGE_LEVELDB_DB_TABLE_CACHE_H_ - -#include -#include -#include "db/dbformat.h" -#include "leveldb/cache.h" -#include "leveldb/table.h" -#include "port/port.h" - -namespace leveldb { - -class Env; - -class TableCache { - public: - TableCache(const std::string& dbname, const Options* options, int entries); - ~TableCache(); - - // Return an iterator for the specified file number (the corresponding - // file length must be exactly "file_size" bytes). If "tableptr" is - // non-NULL, also sets "*tableptr" to point to the Table object - // underlying the returned iterator, or NULL if no Table object underlies - // the returned iterator. The returned "*tableptr" object is owned by - // the cache and should not be deleted, and is valid for as long as the - // returned iterator is live. - Iterator* NewIterator(const ReadOptions& options, - uint64_t file_number, - uint64_t file_size, - Table** tableptr = NULL); - - // If a seek to internal key "k" in specified file finds an entry, - // call (*handle_result)(arg, found_key, found_value). - Status Get(const ReadOptions& options, - uint64_t file_number, - uint64_t file_size, - const Slice& k, - void* arg, - void (*handle_result)(void*, const Slice&, const Slice&)); - - // Evict any entry for the specified file number - void Evict(uint64_t file_number); - - private: - Env* const env_; - const std::string dbname_; - const Options* options_; - Cache* cache_; - - Status FindTable(uint64_t file_number, uint64_t file_size, Cache::Handle**); -}; - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_DB_TABLE_CACHE_H_ diff --git a/leveldb/db/version_edit.cc b/leveldb/db/version_edit.cc deleted file mode 100644 index f10a2d5..0000000 --- a/leveldb/db/version_edit.cc +++ /dev/null @@ -1,266 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/version_edit.h" - -#include "db/version_set.h" -#include "util/coding.h" - -namespace leveldb { - -// Tag numbers for serialized VersionEdit. These numbers are written to -// disk and should not be changed. -enum Tag { - kComparator = 1, - kLogNumber = 2, - kNextFileNumber = 3, - kLastSequence = 4, - kCompactPointer = 5, - kDeletedFile = 6, - kNewFile = 7, - // 8 was used for large value refs - kPrevLogNumber = 9 -}; - -void VersionEdit::Clear() { - comparator_.clear(); - log_number_ = 0; - prev_log_number_ = 0; - last_sequence_ = 0; - next_file_number_ = 0; - has_comparator_ = false; - has_log_number_ = false; - has_prev_log_number_ = false; - has_next_file_number_ = false; - has_last_sequence_ = false; - deleted_files_.clear(); - new_files_.clear(); -} - -void VersionEdit::EncodeTo(std::string* dst) const { - if (has_comparator_) { - PutVarint32(dst, kComparator); - PutLengthPrefixedSlice(dst, comparator_); - } - if (has_log_number_) { - PutVarint32(dst, kLogNumber); - PutVarint64(dst, log_number_); - } - if (has_prev_log_number_) { - PutVarint32(dst, kPrevLogNumber); - PutVarint64(dst, prev_log_number_); - } - if (has_next_file_number_) { - PutVarint32(dst, kNextFileNumber); - PutVarint64(dst, next_file_number_); - } - if (has_last_sequence_) { - PutVarint32(dst, kLastSequence); - PutVarint64(dst, last_sequence_); - } - - for (size_t i = 0; i < compact_pointers_.size(); i++) { - PutVarint32(dst, kCompactPointer); - PutVarint32(dst, compact_pointers_[i].first); // level - PutLengthPrefixedSlice(dst, compact_pointers_[i].second.Encode()); - } - - for (DeletedFileSet::const_iterator iter = deleted_files_.begin(); - iter != deleted_files_.end(); - ++iter) { - PutVarint32(dst, kDeletedFile); - PutVarint32(dst, iter->first); // level - PutVarint64(dst, iter->second); // file number - } - - for (size_t i = 0; i < new_files_.size(); i++) { - const FileMetaData& f = new_files_[i].second; - PutVarint32(dst, kNewFile); - PutVarint32(dst, new_files_[i].first); // level - PutVarint64(dst, f.number); - PutVarint64(dst, f.file_size); - PutLengthPrefixedSlice(dst, f.smallest.Encode()); - PutLengthPrefixedSlice(dst, f.largest.Encode()); - } -} - -static bool GetInternalKey(Slice* input, InternalKey* dst) { - Slice str; - if (GetLengthPrefixedSlice(input, &str)) { - dst->DecodeFrom(str); - return true; - } else { - return false; - } -} - -static bool GetLevel(Slice* input, int* level) { - uint32_t v; - if (GetVarint32(input, &v) && - v < config::kNumLevels) { - *level = v; - return true; - } else { - return false; - } -} - -Status VersionEdit::DecodeFrom(const Slice& src) { - Clear(); - Slice input = src; - const char* msg = NULL; - uint32_t tag; - - // Temporary storage for parsing - int level; - uint64_t number; - FileMetaData f; - Slice str; - InternalKey key; - - while (msg == NULL && GetVarint32(&input, &tag)) { - switch (tag) { - case kComparator: - if (GetLengthPrefixedSlice(&input, &str)) { - comparator_ = str.ToString(); - has_comparator_ = true; - } else { - msg = "comparator name"; - } - break; - - case kLogNumber: - if (GetVarint64(&input, &log_number_)) { - has_log_number_ = true; - } else { - msg = "log number"; - } - break; - - case kPrevLogNumber: - if (GetVarint64(&input, &prev_log_number_)) { - has_prev_log_number_ = true; - } else { - msg = "previous log number"; - } - break; - - case kNextFileNumber: - if (GetVarint64(&input, &next_file_number_)) { - has_next_file_number_ = true; - } else { - msg = "next file number"; - } - break; - - case kLastSequence: - if (GetVarint64(&input, &last_sequence_)) { - has_last_sequence_ = true; - } else { - msg = "last sequence number"; - } - break; - - case kCompactPointer: - if (GetLevel(&input, &level) && - GetInternalKey(&input, &key)) { - compact_pointers_.push_back(std::make_pair(level, key)); - } else { - msg = "compaction pointer"; - } - break; - - case kDeletedFile: - if (GetLevel(&input, &level) && - GetVarint64(&input, &number)) { - deleted_files_.insert(std::make_pair(level, number)); - } else { - msg = "deleted file"; - } - break; - - case kNewFile: - if (GetLevel(&input, &level) && - GetVarint64(&input, &f.number) && - GetVarint64(&input, &f.file_size) && - GetInternalKey(&input, &f.smallest) && - GetInternalKey(&input, &f.largest)) { - new_files_.push_back(std::make_pair(level, f)); - } else { - msg = "new-file entry"; - } - break; - - default: - msg = "unknown tag"; - break; - } - } - - if (msg == NULL && !input.empty()) { - msg = "invalid tag"; - } - - Status result; - if (msg != NULL) { - result = Status::Corruption("VersionEdit", msg); - } - return result; -} - -std::string VersionEdit::DebugString() const { - std::string r; - r.append("VersionEdit {"); - if (has_comparator_) { - r.append("\n Comparator: "); - r.append(comparator_); - } - if (has_log_number_) { - r.append("\n LogNumber: "); - AppendNumberTo(&r, log_number_); - } - if (has_prev_log_number_) { - r.append("\n PrevLogNumber: "); - AppendNumberTo(&r, prev_log_number_); - } - if (has_next_file_number_) { - r.append("\n NextFile: "); - AppendNumberTo(&r, next_file_number_); - } - if (has_last_sequence_) { - r.append("\n LastSeq: "); - AppendNumberTo(&r, last_sequence_); - } - for (size_t i = 0; i < compact_pointers_.size(); i++) { - r.append("\n CompactPointer: "); - AppendNumberTo(&r, compact_pointers_[i].first); - r.append(" "); - r.append(compact_pointers_[i].second.DebugString()); - } - for (DeletedFileSet::const_iterator iter = deleted_files_.begin(); - iter != deleted_files_.end(); - ++iter) { - r.append("\n DeleteFile: "); - AppendNumberTo(&r, iter->first); - r.append(" "); - AppendNumberTo(&r, iter->second); - } - for (size_t i = 0; i < new_files_.size(); i++) { - const FileMetaData& f = new_files_[i].second; - r.append("\n AddFile: "); - AppendNumberTo(&r, new_files_[i].first); - r.append(" "); - AppendNumberTo(&r, f.number); - r.append(" "); - AppendNumberTo(&r, f.file_size); - r.append(" "); - r.append(f.smallest.DebugString()); - r.append(" .. "); - r.append(f.largest.DebugString()); - } - r.append("\n}\n"); - return r; -} - -} // namespace leveldb diff --git a/leveldb/db/version_edit.h b/leveldb/db/version_edit.h deleted file mode 100644 index eaef77b..0000000 --- a/leveldb/db/version_edit.h +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_DB_VERSION_EDIT_H_ -#define STORAGE_LEVELDB_DB_VERSION_EDIT_H_ - -#include -#include -#include -#include "db/dbformat.h" - -namespace leveldb { - -class VersionSet; - -struct FileMetaData { - int refs; - int allowed_seeks; // Seeks allowed until compaction - uint64_t number; - uint64_t file_size; // File size in bytes - InternalKey smallest; // Smallest internal key served by table - InternalKey largest; // Largest internal key served by table - - FileMetaData() : refs(0), allowed_seeks(1 << 30), file_size(0) { } -}; - -class VersionEdit { - public: - VersionEdit() { Clear(); } - ~VersionEdit() { } - - void Clear(); - - void SetComparatorName(const Slice& name) { - has_comparator_ = true; - comparator_ = name.ToString(); - } - void SetLogNumber(uint64_t num) { - has_log_number_ = true; - log_number_ = num; - } - void SetPrevLogNumber(uint64_t num) { - has_prev_log_number_ = true; - prev_log_number_ = num; - } - void SetNextFile(uint64_t num) { - has_next_file_number_ = true; - next_file_number_ = num; - } - void SetLastSequence(SequenceNumber seq) { - has_last_sequence_ = true; - last_sequence_ = seq; - } - void SetCompactPointer(int level, const InternalKey& key) { - compact_pointers_.push_back(std::make_pair(level, key)); - } - - // Add the specified file at the specified number. - // REQUIRES: This version has not been saved (see VersionSet::SaveTo) - // REQUIRES: "smallest" and "largest" are smallest and largest keys in file - void AddFile(int level, uint64_t file, - uint64_t file_size, - const InternalKey& smallest, - const InternalKey& largest) { - FileMetaData f; - f.number = file; - f.file_size = file_size; - f.smallest = smallest; - f.largest = largest; - new_files_.push_back(std::make_pair(level, f)); - } - - // Delete the specified "file" from the specified "level". - void DeleteFile(int level, uint64_t file) { - deleted_files_.insert(std::make_pair(level, file)); - } - - void EncodeTo(std::string* dst) const; - Status DecodeFrom(const Slice& src); - - std::string DebugString() const; - - private: - friend class VersionSet; - - typedef std::set< std::pair > DeletedFileSet; - - std::string comparator_; - uint64_t log_number_; - uint64_t prev_log_number_; - uint64_t next_file_number_; - SequenceNumber last_sequence_; - bool has_comparator_; - bool has_log_number_; - bool has_prev_log_number_; - bool has_next_file_number_; - bool has_last_sequence_; - - std::vector< std::pair > compact_pointers_; - DeletedFileSet deleted_files_; - std::vector< std::pair > new_files_; -}; - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_DB_VERSION_EDIT_H_ diff --git a/leveldb/db/version_edit_test.cc b/leveldb/db/version_edit_test.cc deleted file mode 100644 index 280310b..0000000 --- a/leveldb/db/version_edit_test.cc +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/version_edit.h" -#include "util/testharness.h" - -namespace leveldb { - -static void TestEncodeDecode(const VersionEdit& edit) { - std::string encoded, encoded2; - edit.EncodeTo(&encoded); - VersionEdit parsed; - Status s = parsed.DecodeFrom(encoded); - ASSERT_TRUE(s.ok()) << s.ToString(); - parsed.EncodeTo(&encoded2); - ASSERT_EQ(encoded, encoded2); -} - -class VersionEditTest { }; - -TEST(VersionEditTest, EncodeDecode) { - static const uint64_t kBig = 1ull << 50; - - VersionEdit edit; - for (int i = 0; i < 4; i++) { - TestEncodeDecode(edit); - edit.AddFile(3, kBig + 300 + i, kBig + 400 + i, - InternalKey("foo", kBig + 500 + i, kTypeValue), - InternalKey("zoo", kBig + 600 + i, kTypeDeletion)); - edit.DeleteFile(4, kBig + 700 + i); - edit.SetCompactPointer(i, InternalKey("x", kBig + 900 + i, kTypeValue)); - } - - edit.SetComparatorName("foo"); - edit.SetLogNumber(kBig + 100); - edit.SetNextFile(kBig + 200); - edit.SetLastSequence(kBig + 1000); - TestEncodeDecode(edit); -} - -} // namespace leveldb - -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} diff --git a/leveldb/db/version_set.cc b/leveldb/db/version_set.cc deleted file mode 100644 index cf976b4..0000000 --- a/leveldb/db/version_set.cc +++ /dev/null @@ -1,1402 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/version_set.h" - -#include -#include -#include "db/filename.h" -#include "db/log_reader.h" -#include "db/log_writer.h" -#include "db/memtable.h" -#include "db/table_cache.h" -#include "leveldb/env.h" -#include "leveldb/table_builder.h" -#include "table/merger.h" -#include "table/two_level_iterator.h" -#include "util/coding.h" -#include "util/logging.h" - -namespace leveldb { - -static const int kTargetFileSize = 2 * 1048576; - -// Maximum bytes of overlaps in grandparent (i.e., level+2) before we -// stop building a single file in a level->level+1 compaction. -static const int64_t kMaxGrandParentOverlapBytes = 10 * kTargetFileSize; - -// Maximum number of bytes in all compacted files. We avoid expanding -// the lower level file set of a compaction if it would make the -// total compaction cover more than this many bytes. -static const int64_t kExpandedCompactionByteSizeLimit = 25 * kTargetFileSize; - -static double MaxBytesForLevel(int level) { - // Note: the result for level zero is not really used since we set - // the level-0 compaction threshold based on number of files. - double result = 10 * 1048576.0; // Result for both level-0 and level-1 - while (level > 1) { - result *= 10; - level--; - } - return result; -} - -static uint64_t MaxFileSizeForLevel(int level) { - return kTargetFileSize; // We could vary per level to reduce number of files? -} - -static int64_t TotalFileSize(const std::vector& files) { - int64_t sum = 0; - for (size_t i = 0; i < files.size(); i++) { - sum += files[i]->file_size; - } - return sum; -} - -namespace { -std::string IntSetToString(const std::set& s) { - std::string result = "{"; - for (std::set::const_iterator it = s.begin(); - it != s.end(); - ++it) { - result += (result.size() > 1) ? "," : ""; - result += NumberToString(*it); - } - result += "}"; - return result; -} -} // namespace - -Version::~Version() { - assert(refs_ == 0); - - // Remove from linked list - prev_->next_ = next_; - next_->prev_ = prev_; - - // Drop references to files - for (int level = 0; level < config::kNumLevels; level++) { - for (size_t i = 0; i < files_[level].size(); i++) { - FileMetaData* f = files_[level][i]; - assert(f->refs > 0); - f->refs--; - if (f->refs <= 0) { - delete f; - } - } - } -} - -int FindFile(const InternalKeyComparator& icmp, - const std::vector& files, - const Slice& key) { - uint32_t left = 0; - uint32_t right = files.size(); - while (left < right) { - uint32_t mid = (left + right) / 2; - const FileMetaData* f = files[mid]; - if (icmp.InternalKeyComparator::Compare(f->largest.Encode(), key) < 0) { - // Key at "mid.largest" is < "target". Therefore all - // files at or before "mid" are uninteresting. - left = mid + 1; - } else { - // Key at "mid.largest" is >= "target". Therefore all files - // after "mid" are uninteresting. - right = mid; - } - } - return right; -} - -static bool AfterFile(const Comparator* ucmp, - const Slice* user_key, const FileMetaData* f) { - // NULL user_key occurs before all keys and is therefore never after *f - return (user_key != NULL && - ucmp->Compare(*user_key, f->largest.user_key()) > 0); -} - -static bool BeforeFile(const Comparator* ucmp, - const Slice* user_key, const FileMetaData* f) { - // NULL user_key occurs after all keys and is therefore never before *f - return (user_key != NULL && - ucmp->Compare(*user_key, f->smallest.user_key()) < 0); -} - -bool SomeFileOverlapsRange( - const InternalKeyComparator& icmp, - bool disjoint_sorted_files, - const std::vector& files, - const Slice* smallest_user_key, - const Slice* largest_user_key) { - const Comparator* ucmp = icmp.user_comparator(); - if (!disjoint_sorted_files) { - // Need to check against all files - for (size_t i = 0; i < files.size(); i++) { - const FileMetaData* f = files[i]; - if (AfterFile(ucmp, smallest_user_key, f) || - BeforeFile(ucmp, largest_user_key, f)) { - // No overlap - } else { - return true; // Overlap - } - } - return false; - } - - // Binary search over file list - uint32_t index = 0; - if (smallest_user_key != NULL) { - // Find the earliest possible internal key for smallest_user_key - InternalKey small(*smallest_user_key, kMaxSequenceNumber,kValueTypeForSeek); - index = FindFile(icmp, files, small.Encode()); - } - - if (index >= files.size()) { - // beginning of range is after all files, so no overlap. - return false; - } - - return !BeforeFile(ucmp, largest_user_key, files[index]); -} - -// An internal iterator. For a given version/level pair, yields -// information about the files in the level. For a given entry, key() -// is the largest key that occurs in the file, and value() is an -// 16-byte value containing the file number and file size, both -// encoded using EncodeFixed64. -class Version::LevelFileNumIterator : public Iterator { - public: - LevelFileNumIterator(const InternalKeyComparator& icmp, - const std::vector* flist) - : icmp_(icmp), - flist_(flist), - index_(flist->size()) { // Marks as invalid - } - virtual bool Valid() const { - return index_ < flist_->size(); - } - virtual void Seek(const Slice& target) { - index_ = FindFile(icmp_, *flist_, target); - } - virtual void SeekToFirst() { index_ = 0; } - virtual void SeekToLast() { - index_ = flist_->empty() ? 0 : flist_->size() - 1; - } - virtual void Next() { - assert(Valid()); - index_++; - } - virtual void Prev() { - assert(Valid()); - if (index_ == 0) { - index_ = flist_->size(); // Marks as invalid - } else { - index_--; - } - } - Slice key() const { - assert(Valid()); - return (*flist_)[index_]->largest.Encode(); - } - Slice value() const { - assert(Valid()); - EncodeFixed64(value_buf_, (*flist_)[index_]->number); - EncodeFixed64(value_buf_+8, (*flist_)[index_]->file_size); - return Slice(value_buf_, sizeof(value_buf_)); - } - virtual Status status() const { return Status::OK(); } - private: - const InternalKeyComparator icmp_; - const std::vector* const flist_; - uint32_t index_; - - // Backing store for value(). Holds the file number and size. - mutable char value_buf_[16]; -}; - -static Iterator* GetFileIterator(void* arg, - const ReadOptions& options, - const Slice& file_value) { - TableCache* cache = reinterpret_cast(arg); - if (file_value.size() != 16) { - return NewErrorIterator( - Status::Corruption("FileReader invoked with unexpected value")); - } else { - return cache->NewIterator(options, - DecodeFixed64(file_value.data()), - DecodeFixed64(file_value.data() + 8)); - } -} - -Iterator* Version::NewConcatenatingIterator(const ReadOptions& options, - int level) const { - return NewTwoLevelIterator( - new LevelFileNumIterator(vset_->icmp_, &files_[level]), - &GetFileIterator, vset_->table_cache_, options); -} - -void Version::AddIterators(const ReadOptions& options, - std::vector* iters) { - // Merge all level zero files together since they may overlap - for (size_t i = 0; i < files_[0].size(); i++) { - iters->push_back( - vset_->table_cache_->NewIterator( - options, files_[0][i]->number, files_[0][i]->file_size)); - } - - // For levels > 0, we can use a concatenating iterator that sequentially - // walks through the non-overlapping files in the level, opening them - // lazily. - for (int level = 1; level < config::kNumLevels; level++) { - if (!files_[level].empty()) { - iters->push_back(NewConcatenatingIterator(options, level)); - } - } -} - -// Callback from TableCache::Get() -namespace { -enum SaverState { - kNotFound, - kFound, - kDeleted, - kCorrupt, -}; -struct Saver { - SaverState state; - const Comparator* ucmp; - Slice user_key; - std::string* value; -}; -} -static void SaveValue(void* arg, const Slice& ikey, const Slice& v) { - Saver* s = reinterpret_cast(arg); - ParsedInternalKey parsed_key; - if (!ParseInternalKey(ikey, &parsed_key)) { - s->state = kCorrupt; - } else { - if (s->ucmp->Compare(parsed_key.user_key, s->user_key) == 0) { - s->state = (parsed_key.type == kTypeValue) ? kFound : kDeleted; - if (s->state == kFound) { - s->value->assign(v.data(), v.size()); - } - } - } -} - -static bool NewestFirst(FileMetaData* a, FileMetaData* b) { - return a->number > b->number; -} - -Status Version::Get(const ReadOptions& options, - const LookupKey& k, - std::string* value, - GetStats* stats) { - Slice ikey = k.internal_key(); - Slice user_key = k.user_key(); - const Comparator* ucmp = vset_->icmp_.user_comparator(); - Status s; - - stats->seek_file = NULL; - stats->seek_file_level = -1; - FileMetaData* last_file_read = NULL; - int last_file_read_level = -1; - - // We can search level-by-level since entries never hop across - // levels. Therefore we are guaranteed that if we find data - // in an smaller level, later levels are irrelevant. - std::vector tmp; - FileMetaData* tmp2; - for (int level = 0; level < config::kNumLevels; level++) { - size_t num_files = files_[level].size(); - if (num_files == 0) continue; - - // Get the list of files to search in this level - FileMetaData* const* files = &files_[level][0]; - if (level == 0) { - // Level-0 files may overlap each other. Find all files that - // overlap user_key and process them in order from newest to oldest. - tmp.reserve(num_files); - for (uint32_t i = 0; i < num_files; i++) { - FileMetaData* f = files[i]; - if (ucmp->Compare(user_key, f->smallest.user_key()) >= 0 && - ucmp->Compare(user_key, f->largest.user_key()) <= 0) { - tmp.push_back(f); - } - } - if (tmp.empty()) continue; - - std::sort(tmp.begin(), tmp.end(), NewestFirst); - files = &tmp[0]; - num_files = tmp.size(); - } else { - // Binary search to find earliest index whose largest key >= ikey. - uint32_t index = FindFile(vset_->icmp_, files_[level], ikey); - if (index >= num_files) { - files = NULL; - num_files = 0; - } else { - tmp2 = files[index]; - if (ucmp->Compare(user_key, tmp2->smallest.user_key()) < 0) { - // All of "tmp2" is past any data for user_key - files = NULL; - num_files = 0; - } else { - files = &tmp2; - num_files = 1; - } - } - } - - for (uint32_t i = 0; i < num_files; ++i) { - if (last_file_read != NULL && stats->seek_file == NULL) { - // We have had more than one seek for this read. Charge the 1st file. - stats->seek_file = last_file_read; - stats->seek_file_level = last_file_read_level; - } - - FileMetaData* f = files[i]; - last_file_read = f; - last_file_read_level = level; - - Saver saver; - saver.state = kNotFound; - saver.ucmp = ucmp; - saver.user_key = user_key; - saver.value = value; - s = vset_->table_cache_->Get(options, f->number, f->file_size, - ikey, &saver, SaveValue); - if (!s.ok()) { - return s; - } - switch (saver.state) { - case kNotFound: - break; // Keep searching in other files - case kFound: - return s; - case kDeleted: - s = Status::NotFound(Slice()); // Use empty error message for speed - return s; - case kCorrupt: - s = Status::Corruption("corrupted key for ", user_key); - return s; - } - } - } - - return Status::NotFound(Slice()); // Use an empty error message for speed -} - -bool Version::UpdateStats(const GetStats& stats) { - FileMetaData* f = stats.seek_file; - if (f != NULL) { - f->allowed_seeks--; - if (f->allowed_seeks <= 0 && file_to_compact_ == NULL) { - file_to_compact_ = f; - file_to_compact_level_ = stats.seek_file_level; - return true; - } - } - return false; -} - -void Version::Ref() { - ++refs_; -} - -void Version::Unref() { - assert(this != &vset_->dummy_versions_); - assert(refs_ >= 1); - --refs_; - if (refs_ == 0) { - delete this; - } -} - -bool Version::OverlapInLevel(int level, - const Slice* smallest_user_key, - const Slice* largest_user_key) { - return SomeFileOverlapsRange(vset_->icmp_, (level > 0), files_[level], - smallest_user_key, largest_user_key); -} - -int Version::PickLevelForMemTableOutput( - const Slice& smallest_user_key, - const Slice& largest_user_key) { - int level = 0; - if (!OverlapInLevel(0, &smallest_user_key, &largest_user_key)) { - // Push to next level if there is no overlap in next level, - // and the #bytes overlapping in the level after that are limited. - InternalKey start(smallest_user_key, kMaxSequenceNumber, kValueTypeForSeek); - InternalKey limit(largest_user_key, 0, static_cast(0)); - std::vector overlaps; - while (level < config::kMaxMemCompactLevel) { - if (OverlapInLevel(level + 1, &smallest_user_key, &largest_user_key)) { - break; - } - GetOverlappingInputs(level + 2, &start, &limit, &overlaps); - const int64_t sum = TotalFileSize(overlaps); - if (sum > kMaxGrandParentOverlapBytes) { - break; - } - level++; - } - } - return level; -} - -// Store in "*inputs" all files in "level" that overlap [begin,end] -void Version::GetOverlappingInputs( - int level, - const InternalKey* begin, - const InternalKey* end, - std::vector* inputs) { - inputs->clear(); - Slice user_begin, user_end; - if (begin != NULL) { - user_begin = begin->user_key(); - } - if (end != NULL) { - user_end = end->user_key(); - } - const Comparator* user_cmp = vset_->icmp_.user_comparator(); - for (size_t i = 0; i < files_[level].size(); ) { - FileMetaData* f = files_[level][i++]; - const Slice file_start = f->smallest.user_key(); - const Slice file_limit = f->largest.user_key(); - if (begin != NULL && user_cmp->Compare(file_limit, user_begin) < 0) { - // "f" is completely before specified range; skip it - } else if (end != NULL && user_cmp->Compare(file_start, user_end) > 0) { - // "f" is completely after specified range; skip it - } else { - inputs->push_back(f); - if (level == 0) { - // Level-0 files may overlap each other. So check if the newly - // added file has expanded the range. If so, restart search. - if (begin != NULL && user_cmp->Compare(file_start, user_begin) < 0) { - user_begin = file_start; - inputs->clear(); - i = 0; - } else if (end != NULL && user_cmp->Compare(file_limit, user_end) > 0) { - user_end = file_limit; - inputs->clear(); - i = 0; - } - } - } - } -} - -std::string Version::DebugString() const { - std::string r; - for (int level = 0; level < config::kNumLevels; level++) { - // E.g., - // --- level 1 --- - // 17:123['a' .. 'd'] - // 20:43['e' .. 'g'] - r.append("--- level "); - AppendNumberTo(&r, level); - r.append(" ---\n"); - const std::vector& files = files_[level]; - for (size_t i = 0; i < files.size(); i++) { - r.push_back(' '); - AppendNumberTo(&r, files[i]->number); - r.push_back(':'); - AppendNumberTo(&r, files[i]->file_size); - r.append("["); - r.append(files[i]->smallest.DebugString()); - r.append(" .. "); - r.append(files[i]->largest.DebugString()); - r.append("]\n"); - } - } - return r; -} - -// A helper class so we can efficiently apply a whole sequence -// of edits to a particular state without creating intermediate -// Versions that contain full copies of the intermediate state. -class VersionSet::Builder { - private: - // Helper to sort by v->files_[file_number].smallest - struct BySmallestKey { - const InternalKeyComparator* internal_comparator; - - bool operator()(FileMetaData* f1, FileMetaData* f2) const { - int r = internal_comparator->Compare(f1->smallest, f2->smallest); - if (r != 0) { - return (r < 0); - } else { - // Break ties by file number - return (f1->number < f2->number); - } - } - }; - - typedef std::set FileSet; - struct LevelState { - std::set deleted_files; - FileSet* added_files; - }; - - VersionSet* vset_; - Version* base_; - LevelState levels_[config::kNumLevels]; - - public: - // Initialize a builder with the files from *base and other info from *vset - Builder(VersionSet* vset, Version* base) - : vset_(vset), - base_(base) { - base_->Ref(); - BySmallestKey cmp; - cmp.internal_comparator = &vset_->icmp_; - for (int level = 0; level < config::kNumLevels; level++) { - levels_[level].added_files = new FileSet(cmp); - } - } - - ~Builder() { - for (int level = 0; level < config::kNumLevels; level++) { - const FileSet* added = levels_[level].added_files; - std::vector to_unref; - to_unref.reserve(added->size()); - for (FileSet::const_iterator it = added->begin(); - it != added->end(); ++it) { - to_unref.push_back(*it); - } - delete added; - for (uint32_t i = 0; i < to_unref.size(); i++) { - FileMetaData* f = to_unref[i]; - f->refs--; - if (f->refs <= 0) { - delete f; - } - } - } - base_->Unref(); - } - - // Apply all of the edits in *edit to the current state. - void Apply(VersionEdit* edit) { - // Update compaction pointers - for (size_t i = 0; i < edit->compact_pointers_.size(); i++) { - const int level = edit->compact_pointers_[i].first; - vset_->compact_pointer_[level] = - edit->compact_pointers_[i].second.Encode().ToString(); - } - - // Delete files - const VersionEdit::DeletedFileSet& del = edit->deleted_files_; - for (VersionEdit::DeletedFileSet::const_iterator iter = del.begin(); - iter != del.end(); - ++iter) { - const int level = iter->first; - const uint64_t number = iter->second; - levels_[level].deleted_files.insert(number); - } - - // Add new files - for (size_t i = 0; i < edit->new_files_.size(); i++) { - const int level = edit->new_files_[i].first; - FileMetaData* f = new FileMetaData(edit->new_files_[i].second); - f->refs = 1; - - // We arrange to automatically compact this file after - // a certain number of seeks. Let's assume: - // (1) One seek costs 10ms - // (2) Writing or reading 1MB costs 10ms (100MB/s) - // (3) A compaction of 1MB does 25MB of IO: - // 1MB read from this level - // 10-12MB read from next level (boundaries may be misaligned) - // 10-12MB written to next level - // This implies that 25 seeks cost the same as the compaction - // of 1MB of data. I.e., one seek costs approximately the - // same as the compaction of 40KB of data. We are a little - // conservative and allow approximately one seek for every 16KB - // of data before triggering a compaction. - f->allowed_seeks = (f->file_size / 16384); - if (f->allowed_seeks < 100) f->allowed_seeks = 100; - - levels_[level].deleted_files.erase(f->number); - levels_[level].added_files->insert(f); - } - } - - // Save the current state in *v. - void SaveTo(Version* v) { - BySmallestKey cmp; - cmp.internal_comparator = &vset_->icmp_; - for (int level = 0; level < config::kNumLevels; level++) { - // Merge the set of added files with the set of pre-existing files. - // Drop any deleted files. Store the result in *v. - const std::vector& base_files = base_->files_[level]; - std::vector::const_iterator base_iter = base_files.begin(); - std::vector::const_iterator base_end = base_files.end(); - const FileSet* added = levels_[level].added_files; - v->files_[level].reserve(base_files.size() + added->size()); - for (FileSet::const_iterator added_iter = added->begin(); - added_iter != added->end(); - ++added_iter) { - // Add all smaller files listed in base_ - for (std::vector::const_iterator bpos - = std::upper_bound(base_iter, base_end, *added_iter, cmp); - base_iter != bpos; - ++base_iter) { - MaybeAddFile(v, level, *base_iter); - } - - MaybeAddFile(v, level, *added_iter); - } - - // Add remaining base files - for (; base_iter != base_end; ++base_iter) { - MaybeAddFile(v, level, *base_iter); - } - -#ifndef NDEBUG - // Make sure there is no overlap in levels > 0 - if (level > 0) { - for (uint32_t i = 1; i < v->files_[level].size(); i++) { - const InternalKey& prev_end = v->files_[level][i-1]->largest; - const InternalKey& this_begin = v->files_[level][i]->smallest; - if (vset_->icmp_.Compare(prev_end, this_begin) >= 0) { - fprintf(stderr, "overlapping ranges in same level %s vs. %s\n", - prev_end.DebugString().c_str(), - this_begin.DebugString().c_str()); - abort(); - } - } - } -#endif - } - } - - void MaybeAddFile(Version* v, int level, FileMetaData* f) { - if (levels_[level].deleted_files.count(f->number) > 0) { - // File is deleted: do nothing - } else { - std::vector* files = &v->files_[level]; - if (level > 0 && !files->empty()) { - // Must not overlap - assert(vset_->icmp_.Compare((*files)[files->size()-1]->largest, - f->smallest) < 0); - } - f->refs++; - files->push_back(f); - } - } -}; - -VersionSet::VersionSet(const std::string& dbname, - const Options* options, - TableCache* table_cache, - const InternalKeyComparator* cmp) - : env_(options->env), - dbname_(dbname), - options_(options), - table_cache_(table_cache), - icmp_(*cmp), - next_file_number_(2), - manifest_file_number_(0), // Filled by Recover() - last_sequence_(0), - log_number_(0), - prev_log_number_(0), - descriptor_file_(NULL), - descriptor_log_(NULL), - dummy_versions_(this), - current_(NULL) { - AppendVersion(new Version(this)); -} - -VersionSet::~VersionSet() { - current_->Unref(); - assert(dummy_versions_.next_ == &dummy_versions_); // List must be empty - delete descriptor_log_; - delete descriptor_file_; -} - -void VersionSet::AppendVersion(Version* v) { - // Make "v" current - assert(v->refs_ == 0); - assert(v != current_); - if (current_ != NULL) { - current_->Unref(); - } - current_ = v; - v->Ref(); - - // Append to linked list - v->prev_ = dummy_versions_.prev_; - v->next_ = &dummy_versions_; - v->prev_->next_ = v; - v->next_->prev_ = v; -} - -Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu) { - if (edit->has_log_number_) { - assert(edit->log_number_ >= log_number_); - assert(edit->log_number_ < next_file_number_); - } else { - edit->SetLogNumber(log_number_); - } - - if (!edit->has_prev_log_number_) { - edit->SetPrevLogNumber(prev_log_number_); - } - - edit->SetNextFile(next_file_number_); - edit->SetLastSequence(last_sequence_); - - Version* v = new Version(this); - { - Builder builder(this, current_); - builder.Apply(edit); - builder.SaveTo(v); - } - Finalize(v); - - // Initialize new descriptor log file if necessary by creating - // a temporary file that contains a snapshot of the current version. - std::string new_manifest_file; - Status s; - if (descriptor_log_ == NULL) { - // No reason to unlock *mu here since we only hit this path in the - // first call to LogAndApply (when opening the database). - assert(descriptor_file_ == NULL); - new_manifest_file = DescriptorFileName(dbname_, manifest_file_number_); - edit->SetNextFile(next_file_number_); - s = env_->NewWritableFile(new_manifest_file, &descriptor_file_); - if (s.ok()) { - descriptor_log_ = new log::Writer(descriptor_file_); - s = WriteSnapshot(descriptor_log_); - } - } - - // Unlock during expensive MANIFEST log write - { - mu->Unlock(); - - // Write new record to MANIFEST log - if (s.ok()) { - std::string record; - edit->EncodeTo(&record); - s = descriptor_log_->AddRecord(record); - if (s.ok()) { - s = descriptor_file_->Sync(); - } - } - - // If we just created a new descriptor file, install it by writing a - // new CURRENT file that points to it. - if (s.ok() && !new_manifest_file.empty()) { - s = SetCurrentFile(env_, dbname_, manifest_file_number_); - } - - mu->Lock(); - } - - // Install the new version - if (s.ok()) { - AppendVersion(v); - log_number_ = edit->log_number_; - prev_log_number_ = edit->prev_log_number_; - } else { - delete v; - if (!new_manifest_file.empty()) { - delete descriptor_log_; - delete descriptor_file_; - descriptor_log_ = NULL; - descriptor_file_ = NULL; - env_->DeleteFile(new_manifest_file); - } - } - - return s; -} - -Status VersionSet::Recover() { - struct LogReporter : public log::Reader::Reporter { - Status* status; - virtual void Corruption(size_t bytes, const Status& s) { - if (this->status->ok()) *this->status = s; - } - }; - - // Read "CURRENT" file, which contains a pointer to the current manifest file - std::string current; - Status s = ReadFileToString(env_, CurrentFileName(dbname_), ¤t); - if (!s.ok()) { - return s; - } - if (current.empty() || current[current.size()-1] != '\n') { - return Status::Corruption("CURRENT file does not end with newline"); - } - current.resize(current.size() - 1); - - std::string dscname = dbname_ + "/" + current; - SequentialFile* file; - s = env_->NewSequentialFile(dscname, &file); - if (!s.ok()) { - return s; - } - - bool have_log_number = false; - bool have_prev_log_number = false; - bool have_next_file = false; - bool have_last_sequence = false; - uint64_t next_file = 0; - uint64_t last_sequence = 0; - uint64_t log_number = 0; - uint64_t prev_log_number = 0; - Builder builder(this, current_); - - { - LogReporter reporter; - reporter.status = &s; - log::Reader reader(file, &reporter, true/*checksum*/, 0/*initial_offset*/); - Slice record; - std::string scratch; - while (reader.ReadRecord(&record, &scratch) && s.ok()) { - VersionEdit edit; - s = edit.DecodeFrom(record); - if (s.ok()) { - if (edit.has_comparator_ && - edit.comparator_ != icmp_.user_comparator()->Name()) { - s = Status::InvalidArgument( - edit.comparator_ + "does not match existing comparator ", - icmp_.user_comparator()->Name()); - } - } - - if (s.ok()) { - builder.Apply(&edit); - } - - if (edit.has_log_number_) { - log_number = edit.log_number_; - have_log_number = true; - } - - if (edit.has_prev_log_number_) { - prev_log_number = edit.prev_log_number_; - have_prev_log_number = true; - } - - if (edit.has_next_file_number_) { - next_file = edit.next_file_number_; - have_next_file = true; - } - - if (edit.has_last_sequence_) { - last_sequence = edit.last_sequence_; - have_last_sequence = true; - } - } - } - delete file; - file = NULL; - - if (s.ok()) { - if (!have_next_file) { - s = Status::Corruption("no meta-nextfile entry in descriptor"); - } else if (!have_log_number) { - s = Status::Corruption("no meta-lognumber entry in descriptor"); - } else if (!have_last_sequence) { - s = Status::Corruption("no last-sequence-number entry in descriptor"); - } - - if (!have_prev_log_number) { - prev_log_number = 0; - } - - MarkFileNumberUsed(prev_log_number); - MarkFileNumberUsed(log_number); - } - - if (s.ok()) { - Version* v = new Version(this); - builder.SaveTo(v); - // Install recovered version - Finalize(v); - AppendVersion(v); - manifest_file_number_ = next_file; - next_file_number_ = next_file + 1; - last_sequence_ = last_sequence; - log_number_ = log_number; - prev_log_number_ = prev_log_number; - } - - return s; -} - -void VersionSet::MarkFileNumberUsed(uint64_t number) { - if (next_file_number_ <= number) { - next_file_number_ = number + 1; - } -} - -void VersionSet::Finalize(Version* v) { - // Precomputed best level for next compaction - int best_level = -1; - double best_score = -1; - - for (int level = 0; level < config::kNumLevels-1; level++) { - double score; - if (level == 0) { - // We treat level-0 specially by bounding the number of files - // instead of number of bytes for two reasons: - // - // (1) With larger write-buffer sizes, it is nice not to do too - // many level-0 compactions. - // - // (2) The files in level-0 are merged on every read and - // therefore we wish to avoid too many files when the individual - // file size is small (perhaps because of a small write-buffer - // setting, or very high compression ratios, or lots of - // overwrites/deletions). - score = v->files_[level].size() / - static_cast(config::kL0_CompactionTrigger); - } else { - // Compute the ratio of current size to size limit. - const uint64_t level_bytes = TotalFileSize(v->files_[level]); - score = static_cast(level_bytes) / MaxBytesForLevel(level); - } - - if (score > best_score) { - best_level = level; - best_score = score; - } - } - - v->compaction_level_ = best_level; - v->compaction_score_ = best_score; -} - -Status VersionSet::WriteSnapshot(log::Writer* log) { - // TODO: Break up into multiple records to reduce memory usage on recovery? - - // Save metadata - VersionEdit edit; - edit.SetComparatorName(icmp_.user_comparator()->Name()); - - // Save compaction pointers - for (int level = 0; level < config::kNumLevels; level++) { - if (!compact_pointer_[level].empty()) { - InternalKey key; - key.DecodeFrom(compact_pointer_[level]); - edit.SetCompactPointer(level, key); - } - } - - // Save files - for (int level = 0; level < config::kNumLevels; level++) { - const std::vector& files = current_->files_[level]; - for (size_t i = 0; i < files.size(); i++) { - const FileMetaData* f = files[i]; - edit.AddFile(level, f->number, f->file_size, f->smallest, f->largest); - } - } - - std::string record; - edit.EncodeTo(&record); - return log->AddRecord(record); -} - -int VersionSet::NumLevelFiles(int level) const { - assert(level >= 0); - assert(level < config::kNumLevels); - return current_->files_[level].size(); -} - -const char* VersionSet::LevelSummary(LevelSummaryStorage* scratch) const { - // Update code if kNumLevels changes - assert(config::kNumLevels == 7); - snprintf(scratch->buffer, sizeof(scratch->buffer), - "files[ %d %d %d %d %d %d %d ]", - int(current_->files_[0].size()), - int(current_->files_[1].size()), - int(current_->files_[2].size()), - int(current_->files_[3].size()), - int(current_->files_[4].size()), - int(current_->files_[5].size()), - int(current_->files_[6].size())); - return scratch->buffer; -} - -uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) { - uint64_t result = 0; - for (int level = 0; level < config::kNumLevels; level++) { - const std::vector& files = v->files_[level]; - for (size_t i = 0; i < files.size(); i++) { - if (icmp_.Compare(files[i]->largest, ikey) <= 0) { - // Entire file is before "ikey", so just add the file size - result += files[i]->file_size; - } else if (icmp_.Compare(files[i]->smallest, ikey) > 0) { - // Entire file is after "ikey", so ignore - if (level > 0) { - // Files other than level 0 are sorted by meta->smallest, so - // no further files in this level will contain data for - // "ikey". - break; - } - } else { - // "ikey" falls in the range for this table. Add the - // approximate offset of "ikey" within the table. - Table* tableptr; - Iterator* iter = table_cache_->NewIterator( - ReadOptions(), files[i]->number, files[i]->file_size, &tableptr); - if (tableptr != NULL) { - result += tableptr->ApproximateOffsetOf(ikey.Encode()); - } - delete iter; - } - } - } - return result; -} - -void VersionSet::AddLiveFiles(std::set* live) { - for (Version* v = dummy_versions_.next_; - v != &dummy_versions_; - v = v->next_) { - for (int level = 0; level < config::kNumLevels; level++) { - const std::vector& files = v->files_[level]; - for (size_t i = 0; i < files.size(); i++) { - live->insert(files[i]->number); - } - } - } -} - -int64_t VersionSet::NumLevelBytes(int level) const { - assert(level >= 0); - assert(level < config::kNumLevels); - return TotalFileSize(current_->files_[level]); -} - -int64_t VersionSet::MaxNextLevelOverlappingBytes() { - int64_t result = 0; - std::vector overlaps; - for (int level = 1; level < config::kNumLevels - 1; level++) { - for (size_t i = 0; i < current_->files_[level].size(); i++) { - const FileMetaData* f = current_->files_[level][i]; - current_->GetOverlappingInputs(level+1, &f->smallest, &f->largest, - &overlaps); - const int64_t sum = TotalFileSize(overlaps); - if (sum > result) { - result = sum; - } - } - } - return result; -} - -// Stores the minimal range that covers all entries in inputs in -// *smallest, *largest. -// REQUIRES: inputs is not empty -void VersionSet::GetRange(const std::vector& inputs, - InternalKey* smallest, - InternalKey* largest) { - assert(!inputs.empty()); - smallest->Clear(); - largest->Clear(); - for (size_t i = 0; i < inputs.size(); i++) { - FileMetaData* f = inputs[i]; - if (i == 0) { - *smallest = f->smallest; - *largest = f->largest; - } else { - if (icmp_.Compare(f->smallest, *smallest) < 0) { - *smallest = f->smallest; - } - if (icmp_.Compare(f->largest, *largest) > 0) { - *largest = f->largest; - } - } - } -} - -// Stores the minimal range that covers all entries in inputs1 and inputs2 -// in *smallest, *largest. -// REQUIRES: inputs is not empty -void VersionSet::GetRange2(const std::vector& inputs1, - const std::vector& inputs2, - InternalKey* smallest, - InternalKey* largest) { - std::vector all = inputs1; - all.insert(all.end(), inputs2.begin(), inputs2.end()); - GetRange(all, smallest, largest); -} - -Iterator* VersionSet::MakeInputIterator(Compaction* c) { - ReadOptions options; - options.verify_checksums = options_->paranoid_checks; - options.fill_cache = false; - - // Level-0 files have to be merged together. For other levels, - // we will make a concatenating iterator per level. - // TODO(opt): use concatenating iterator for level-0 if there is no overlap - const int space = (c->level() == 0 ? c->inputs_[0].size() + 1 : 2); - Iterator** list = new Iterator*[space]; - int num = 0; - for (int which = 0; which < 2; which++) { - if (!c->inputs_[which].empty()) { - if (c->level() + which == 0) { - const std::vector& files = c->inputs_[which]; - for (size_t i = 0; i < files.size(); i++) { - list[num++] = table_cache_->NewIterator( - options, files[i]->number, files[i]->file_size); - } - } else { - // Create concatenating iterator for the files from this level - list[num++] = NewTwoLevelIterator( - new Version::LevelFileNumIterator(icmp_, &c->inputs_[which]), - &GetFileIterator, table_cache_, options); - } - } - } - assert(num <= space); - Iterator* result = NewMergingIterator(&icmp_, list, num); - delete[] list; - return result; -} - -Compaction* VersionSet::PickCompaction() { - Compaction* c; - int level; - - // We prefer compactions triggered by too much data in a level over - // the compactions triggered by seeks. - const bool size_compaction = (current_->compaction_score_ >= 1); - const bool seek_compaction = (current_->file_to_compact_ != NULL); - if (size_compaction) { - level = current_->compaction_level_; - assert(level >= 0); - assert(level+1 < config::kNumLevels); - c = new Compaction(level); - - // Pick the first file that comes after compact_pointer_[level] - for (size_t i = 0; i < current_->files_[level].size(); i++) { - FileMetaData* f = current_->files_[level][i]; - if (compact_pointer_[level].empty() || - icmp_.Compare(f->largest.Encode(), compact_pointer_[level]) > 0) { - c->inputs_[0].push_back(f); - break; - } - } - if (c->inputs_[0].empty()) { - // Wrap-around to the beginning of the key space - c->inputs_[0].push_back(current_->files_[level][0]); - } - } else if (seek_compaction) { - level = current_->file_to_compact_level_; - c = new Compaction(level); - c->inputs_[0].push_back(current_->file_to_compact_); - } else { - return NULL; - } - - c->input_version_ = current_; - c->input_version_->Ref(); - - // Files in level 0 may overlap each other, so pick up all overlapping ones - if (level == 0) { - InternalKey smallest, largest; - GetRange(c->inputs_[0], &smallest, &largest); - // Note that the next call will discard the file we placed in - // c->inputs_[0] earlier and replace it with an overlapping set - // which will include the picked file. - current_->GetOverlappingInputs(0, &smallest, &largest, &c->inputs_[0]); - assert(!c->inputs_[0].empty()); - } - - SetupOtherInputs(c); - - return c; -} - -void VersionSet::SetupOtherInputs(Compaction* c) { - const int level = c->level(); - InternalKey smallest, largest; - GetRange(c->inputs_[0], &smallest, &largest); - - current_->GetOverlappingInputs(level+1, &smallest, &largest, &c->inputs_[1]); - - // Get entire range covered by compaction - InternalKey all_start, all_limit; - GetRange2(c->inputs_[0], c->inputs_[1], &all_start, &all_limit); - - // See if we can grow the number of inputs in "level" without - // changing the number of "level+1" files we pick up. - if (!c->inputs_[1].empty()) { - std::vector expanded0; - current_->GetOverlappingInputs(level, &all_start, &all_limit, &expanded0); - const int64_t inputs0_size = TotalFileSize(c->inputs_[0]); - const int64_t inputs1_size = TotalFileSize(c->inputs_[1]); - const int64_t expanded0_size = TotalFileSize(expanded0); - if (expanded0.size() > c->inputs_[0].size() && - inputs1_size + expanded0_size < kExpandedCompactionByteSizeLimit) { - InternalKey new_start, new_limit; - GetRange(expanded0, &new_start, &new_limit); - std::vector expanded1; - current_->GetOverlappingInputs(level+1, &new_start, &new_limit, - &expanded1); - if (expanded1.size() == c->inputs_[1].size()) { - Log(options_->info_log, - "Expanding@%d %d+%d (%ld+%ld bytes) to %d+%d (%ld+%ld bytes)\n", - level, - int(c->inputs_[0].size()), - int(c->inputs_[1].size()), - long(inputs0_size), long(inputs1_size), - int(expanded0.size()), - int(expanded1.size()), - long(expanded0_size), long(inputs1_size)); - smallest = new_start; - largest = new_limit; - c->inputs_[0] = expanded0; - c->inputs_[1] = expanded1; - GetRange2(c->inputs_[0], c->inputs_[1], &all_start, &all_limit); - } - } - } - - // Compute the set of grandparent files that overlap this compaction - // (parent == level+1; grandparent == level+2) - if (level + 2 < config::kNumLevels) { - current_->GetOverlappingInputs(level + 2, &all_start, &all_limit, - &c->grandparents_); - } - - if (false) { - Log(options_->info_log, "Compacting %d '%s' .. '%s'", - level, - smallest.DebugString().c_str(), - largest.DebugString().c_str()); - } - - // Update the place where we will do the next compaction for this level. - // We update this immediately instead of waiting for the VersionEdit - // to be applied so that if the compaction fails, we will try a different - // key range next time. - compact_pointer_[level] = largest.Encode().ToString(); - c->edit_.SetCompactPointer(level, largest); -} - -Compaction* VersionSet::CompactRange( - int level, - const InternalKey* begin, - const InternalKey* end) { - std::vector inputs; - current_->GetOverlappingInputs(level, begin, end, &inputs); - if (inputs.empty()) { - return NULL; - } - - // Avoid compacting too much in one shot in case the range is large. - const uint64_t limit = MaxFileSizeForLevel(level); - uint64_t total = 0; - for (size_t i = 0; i < inputs.size(); i++) { - uint64_t s = inputs[i]->file_size; - total += s; - if (total >= limit) { - inputs.resize(i + 1); - break; - } - } - - Compaction* c = new Compaction(level); - c->input_version_ = current_; - c->input_version_->Ref(); - c->inputs_[0] = inputs; - SetupOtherInputs(c); - return c; -} - -Compaction::Compaction(int level) - : level_(level), - max_output_file_size_(MaxFileSizeForLevel(level)), - input_version_(NULL), - grandparent_index_(0), - seen_key_(false), - overlapped_bytes_(0) { - for (int i = 0; i < config::kNumLevels; i++) { - level_ptrs_[i] = 0; - } -} - -Compaction::~Compaction() { - if (input_version_ != NULL) { - input_version_->Unref(); - } -} - -bool Compaction::IsTrivialMove() const { - // Avoid a move if there is lots of overlapping grandparent data. - // Otherwise, the move could create a parent file that will require - // a very expensive merge later on. - return (num_input_files(0) == 1 && - num_input_files(1) == 0 && - TotalFileSize(grandparents_) <= kMaxGrandParentOverlapBytes); -} - -void Compaction::AddInputDeletions(VersionEdit* edit) { - for (int which = 0; which < 2; which++) { - for (size_t i = 0; i < inputs_[which].size(); i++) { - edit->DeleteFile(level_ + which, inputs_[which][i]->number); - } - } -} - -bool Compaction::IsBaseLevelForKey(const Slice& user_key) { - // Maybe use binary search to find right entry instead of linear search? - const Comparator* user_cmp = input_version_->vset_->icmp_.user_comparator(); - for (int lvl = level_ + 2; lvl < config::kNumLevels; lvl++) { - const std::vector& files = input_version_->files_[lvl]; - for (; level_ptrs_[lvl] < files.size(); ) { - FileMetaData* f = files[level_ptrs_[lvl]]; - if (user_cmp->Compare(user_key, f->largest.user_key()) <= 0) { - // We've advanced far enough - if (user_cmp->Compare(user_key, f->smallest.user_key()) >= 0) { - // Key falls in this file's range, so definitely not base level - return false; - } - break; - } - level_ptrs_[lvl]++; - } - } - return true; -} - -bool Compaction::ShouldStopBefore(const Slice& internal_key) { - // Scan to find earliest grandparent file that contains key. - const InternalKeyComparator* icmp = &input_version_->vset_->icmp_; - while (grandparent_index_ < grandparents_.size() && - icmp->Compare(internal_key, - grandparents_[grandparent_index_]->largest.Encode()) > 0) { - if (seen_key_) { - overlapped_bytes_ += grandparents_[grandparent_index_]->file_size; - } - grandparent_index_++; - } - seen_key_ = true; - - if (overlapped_bytes_ > kMaxGrandParentOverlapBytes) { - // Too much overlap for current output; start new output - overlapped_bytes_ = 0; - return true; - } else { - return false; - } -} - -void Compaction::ReleaseInputs() { - if (input_version_ != NULL) { - input_version_->Unref(); - input_version_ = NULL; - } -} - -} // namespace leveldb diff --git a/leveldb/db/version_set.h b/leveldb/db/version_set.h deleted file mode 100644 index 61c4c99..0000000 --- a/leveldb/db/version_set.h +++ /dev/null @@ -1,379 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// The representation of a DBImpl consists of a set of Versions. The -// newest version is called "current". Older versions may be kept -// around to provide a consistent view to live iterators. -// -// Each Version keeps track of a set of Table files per level. The -// entire set of versions is maintained in a VersionSet. -// -// Version,VersionSet are thread-compatible, but require external -// synchronization on all accesses. - -#ifndef STORAGE_LEVELDB_DB_VERSION_SET_H_ -#define STORAGE_LEVELDB_DB_VERSION_SET_H_ - -#include -#include -#include -#include "db/dbformat.h" -#include "db/version_edit.h" -#include "port/port.h" - -namespace leveldb { - -namespace log { class Writer; } - -class Compaction; -class Iterator; -class MemTable; -class TableBuilder; -class TableCache; -class Version; -class VersionSet; -class WritableFile; - -// Return the smallest index i such that files[i]->largest >= key. -// Return files.size() if there is no such file. -// REQUIRES: "files" contains a sorted list of non-overlapping files. -extern int FindFile(const InternalKeyComparator& icmp, - const std::vector& files, - const Slice& key); - -// Returns true iff some file in "files" overlaps the user key range -// [*smallest,*largest]. -// smallest==NULL represents a key smaller than all keys in the DB. -// largest==NULL represents a key largest than all keys in the DB. -// REQUIRES: If disjoint_sorted_files, files[] contains disjoint ranges -// in sorted order. -extern bool SomeFileOverlapsRange( - const InternalKeyComparator& icmp, - bool disjoint_sorted_files, - const std::vector& files, - const Slice* smallest_user_key, - const Slice* largest_user_key); - -class Version { - public: - // Append to *iters a sequence of iterators that will - // yield the contents of this Version when merged together. - // REQUIRES: This version has been saved (see VersionSet::SaveTo) - void AddIterators(const ReadOptions&, std::vector* iters); - - // Lookup the value for key. If found, store it in *val and - // return OK. Else return a non-OK status. Fills *stats. - // REQUIRES: lock is not held - struct GetStats { - FileMetaData* seek_file; - int seek_file_level; - }; - Status Get(const ReadOptions&, const LookupKey& key, std::string* val, - GetStats* stats); - - // Adds "stats" into the current state. Returns true if a new - // compaction may need to be triggered, false otherwise. - // REQUIRES: lock is held - bool UpdateStats(const GetStats& stats); - - // Reference count management (so Versions do not disappear out from - // under live iterators) - void Ref(); - void Unref(); - - void GetOverlappingInputs( - int level, - const InternalKey* begin, // NULL means before all keys - const InternalKey* end, // NULL means after all keys - std::vector* inputs); - - // Returns true iff some file in the specified level overlaps - // some part of [*smallest_user_key,*largest_user_key]. - // smallest_user_key==NULL represents a key smaller than all keys in the DB. - // largest_user_key==NULL represents a key largest than all keys in the DB. - bool OverlapInLevel(int level, - const Slice* smallest_user_key, - const Slice* largest_user_key); - - // Return the level at which we should place a new memtable compaction - // result that covers the range [smallest_user_key,largest_user_key]. - int PickLevelForMemTableOutput(const Slice& smallest_user_key, - const Slice& largest_user_key); - - int NumFiles(int level) const { return files_[level].size(); } - - // Return a human readable string that describes this version's contents. - std::string DebugString() const; - - private: - friend class Compaction; - friend class VersionSet; - - class LevelFileNumIterator; - Iterator* NewConcatenatingIterator(const ReadOptions&, int level) const; - - VersionSet* vset_; // VersionSet to which this Version belongs - Version* next_; // Next version in linked list - Version* prev_; // Previous version in linked list - int refs_; // Number of live refs to this version - - // List of files per level - std::vector files_[config::kNumLevels]; - - // Next file to compact based on seek stats. - FileMetaData* file_to_compact_; - int file_to_compact_level_; - - // Level that should be compacted next and its compaction score. - // Score < 1 means compaction is not strictly needed. These fields - // are initialized by Finalize(). - double compaction_score_; - int compaction_level_; - - explicit Version(VersionSet* vset) - : vset_(vset), next_(this), prev_(this), refs_(0), - file_to_compact_(NULL), - file_to_compact_level_(-1), - compaction_score_(-1), - compaction_level_(-1) { - } - - ~Version(); - - // No copying allowed - Version(const Version&); - void operator=(const Version&); -}; - -class VersionSet { - public: - VersionSet(const std::string& dbname, - const Options* options, - TableCache* table_cache, - const InternalKeyComparator*); - ~VersionSet(); - - // Apply *edit to the current version to form a new descriptor that - // is both saved to persistent state and installed as the new - // current version. Will release *mu while actually writing to the file. - // REQUIRES: *mu is held on entry. - // REQUIRES: no other thread concurrently calls LogAndApply() - Status LogAndApply(VersionEdit* edit, port::Mutex* mu); - - // Recover the last saved descriptor from persistent storage. - Status Recover(); - - // Return the current version. - Version* current() const { return current_; } - - // Return the current manifest file number - uint64_t ManifestFileNumber() const { return manifest_file_number_; } - - // Allocate and return a new file number - uint64_t NewFileNumber() { return next_file_number_++; } - - // Arrange to reuse "file_number" unless a newer file number has - // already been allocated. - // REQUIRES: "file_number" was returned by a call to NewFileNumber(). - void ReuseFileNumber(uint64_t file_number) { - if (next_file_number_ == file_number + 1) { - next_file_number_ = file_number; - } - } - - // Return the number of Table files at the specified level. - int NumLevelFiles(int level) const; - - // Return the combined file size of all files at the specified level. - int64_t NumLevelBytes(int level) const; - - // Return the last sequence number. - uint64_t LastSequence() const { return last_sequence_; } - - // Set the last sequence number to s. - void SetLastSequence(uint64_t s) { - assert(s >= last_sequence_); - last_sequence_ = s; - } - - // Mark the specified file number as used. - void MarkFileNumberUsed(uint64_t number); - - // Return the current log file number. - uint64_t LogNumber() const { return log_number_; } - - // Return the log file number for the log file that is currently - // being compacted, or zero if there is no such log file. - uint64_t PrevLogNumber() const { return prev_log_number_; } - - // Pick level and inputs for a new compaction. - // Returns NULL if there is no compaction to be done. - // Otherwise returns a pointer to a heap-allocated object that - // describes the compaction. Caller should delete the result. - Compaction* PickCompaction(); - - // Return a compaction object for compacting the range [begin,end] in - // the specified level. Returns NULL if there is nothing in that - // level that overlaps the specified range. Caller should delete - // the result. - Compaction* CompactRange( - int level, - const InternalKey* begin, - const InternalKey* end); - - // Return the maximum overlapping data (in bytes) at next level for any - // file at a level >= 1. - int64_t MaxNextLevelOverlappingBytes(); - - // Create an iterator that reads over the compaction inputs for "*c". - // The caller should delete the iterator when no longer needed. - Iterator* MakeInputIterator(Compaction* c); - - // Returns true iff some level needs a compaction. - bool NeedsCompaction() const { - Version* v = current_; - return (v->compaction_score_ >= 1) || (v->file_to_compact_ != NULL); - } - - // Add all files listed in any live version to *live. - // May also mutate some internal state. - void AddLiveFiles(std::set* live); - - // Return the approximate offset in the database of the data for - // "key" as of version "v". - uint64_t ApproximateOffsetOf(Version* v, const InternalKey& key); - - // Return a human-readable short (single-line) summary of the number - // of files per level. Uses *scratch as backing store. - struct LevelSummaryStorage { - char buffer[100]; - }; - const char* LevelSummary(LevelSummaryStorage* scratch) const; - - private: - class Builder; - - friend class Compaction; - friend class Version; - - void Finalize(Version* v); - - void GetRange(const std::vector& inputs, - InternalKey* smallest, - InternalKey* largest); - - void GetRange2(const std::vector& inputs1, - const std::vector& inputs2, - InternalKey* smallest, - InternalKey* largest); - - void SetupOtherInputs(Compaction* c); - - // Save current contents to *log - Status WriteSnapshot(log::Writer* log); - - void AppendVersion(Version* v); - - Env* const env_; - const std::string dbname_; - const Options* const options_; - TableCache* const table_cache_; - const InternalKeyComparator icmp_; - uint64_t next_file_number_; - uint64_t manifest_file_number_; - uint64_t last_sequence_; - uint64_t log_number_; - uint64_t prev_log_number_; // 0 or backing store for memtable being compacted - - // Opened lazily - WritableFile* descriptor_file_; - log::Writer* descriptor_log_; - Version dummy_versions_; // Head of circular doubly-linked list of versions. - Version* current_; // == dummy_versions_.prev_ - - // Per-level key at which the next compaction at that level should start. - // Either an empty string, or a valid InternalKey. - std::string compact_pointer_[config::kNumLevels]; - - // No copying allowed - VersionSet(const VersionSet&); - void operator=(const VersionSet&); -}; - -// A Compaction encapsulates information about a compaction. -class Compaction { - public: - ~Compaction(); - - // Return the level that is being compacted. Inputs from "level" - // and "level+1" will be merged to produce a set of "level+1" files. - int level() const { return level_; } - - // Return the object that holds the edits to the descriptor done - // by this compaction. - VersionEdit* edit() { return &edit_; } - - // "which" must be either 0 or 1 - int num_input_files(int which) const { return inputs_[which].size(); } - - // Return the ith input file at "level()+which" ("which" must be 0 or 1). - FileMetaData* input(int which, int i) const { return inputs_[which][i]; } - - // Maximum size of files to build during this compaction. - uint64_t MaxOutputFileSize() const { return max_output_file_size_; } - - // Is this a trivial compaction that can be implemented by just - // moving a single input file to the next level (no merging or splitting) - bool IsTrivialMove() const; - - // Add all inputs to this compaction as delete operations to *edit. - void AddInputDeletions(VersionEdit* edit); - - // Returns true if the information we have available guarantees that - // the compaction is producing data in "level+1" for which no data exists - // in levels greater than "level+1". - bool IsBaseLevelForKey(const Slice& user_key); - - // Returns true iff we should stop building the current output - // before processing "internal_key". - bool ShouldStopBefore(const Slice& internal_key); - - // Release the input version for the compaction, once the compaction - // is successful. - void ReleaseInputs(); - - private: - friend class Version; - friend class VersionSet; - - explicit Compaction(int level); - - int level_; - uint64_t max_output_file_size_; - Version* input_version_; - VersionEdit edit_; - - // Each compaction reads inputs from "level_" and "level_+1" - std::vector inputs_[2]; // The two sets of inputs - - // State used to check for number of of overlapping grandparent files - // (parent == level_ + 1, grandparent == level_ + 2) - std::vector grandparents_; - size_t grandparent_index_; // Index in grandparent_starts_ - bool seen_key_; // Some output key has been seen - int64_t overlapped_bytes_; // Bytes of overlap between current output - // and grandparent files - - // State for implementing IsBaseLevelForKey - - // level_ptrs_ holds indices into input_version_->levels_: our state - // is that we are positioned at one of the file ranges for each - // higher level than the ones involved in this compaction (i.e. for - // all L >= level_ + 2). - size_t level_ptrs_[config::kNumLevels]; -}; - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_DB_VERSION_SET_H_ diff --git a/leveldb/db/version_set_test.cc b/leveldb/db/version_set_test.cc deleted file mode 100644 index 501e34d..0000000 --- a/leveldb/db/version_set_test.cc +++ /dev/null @@ -1,179 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "db/version_set.h" -#include "util/logging.h" -#include "util/testharness.h" -#include "util/testutil.h" - -namespace leveldb { - -class FindFileTest { - public: - std::vector files_; - bool disjoint_sorted_files_; - - FindFileTest() : disjoint_sorted_files_(true) { } - - ~FindFileTest() { - for (int i = 0; i < files_.size(); i++) { - delete files_[i]; - } - } - - void Add(const char* smallest, const char* largest, - SequenceNumber smallest_seq = 100, - SequenceNumber largest_seq = 100) { - FileMetaData* f = new FileMetaData; - f->number = files_.size() + 1; - f->smallest = InternalKey(smallest, smallest_seq, kTypeValue); - f->largest = InternalKey(largest, largest_seq, kTypeValue); - files_.push_back(f); - } - - int Find(const char* key) { - InternalKey target(key, 100, kTypeValue); - InternalKeyComparator cmp(BytewiseComparator()); - return FindFile(cmp, files_, target.Encode()); - } - - bool Overlaps(const char* smallest, const char* largest) { - InternalKeyComparator cmp(BytewiseComparator()); - Slice s(smallest != NULL ? smallest : ""); - Slice l(largest != NULL ? largest : ""); - return SomeFileOverlapsRange(cmp, disjoint_sorted_files_, files_, - (smallest != NULL ? &s : NULL), - (largest != NULL ? &l : NULL)); - } -}; - -TEST(FindFileTest, Empty) { - ASSERT_EQ(0, Find("foo")); - ASSERT_TRUE(! Overlaps("a", "z")); - ASSERT_TRUE(! Overlaps(NULL, "z")); - ASSERT_TRUE(! Overlaps("a", NULL)); - ASSERT_TRUE(! Overlaps(NULL, NULL)); -} - -TEST(FindFileTest, Single) { - Add("p", "q"); - ASSERT_EQ(0, Find("a")); - ASSERT_EQ(0, Find("p")); - ASSERT_EQ(0, Find("p1")); - ASSERT_EQ(0, Find("q")); - ASSERT_EQ(1, Find("q1")); - ASSERT_EQ(1, Find("z")); - - ASSERT_TRUE(! Overlaps("a", "b")); - ASSERT_TRUE(! Overlaps("z1", "z2")); - ASSERT_TRUE(Overlaps("a", "p")); - ASSERT_TRUE(Overlaps("a", "q")); - ASSERT_TRUE(Overlaps("a", "z")); - ASSERT_TRUE(Overlaps("p", "p1")); - ASSERT_TRUE(Overlaps("p", "q")); - ASSERT_TRUE(Overlaps("p", "z")); - ASSERT_TRUE(Overlaps("p1", "p2")); - ASSERT_TRUE(Overlaps("p1", "z")); - ASSERT_TRUE(Overlaps("q", "q")); - ASSERT_TRUE(Overlaps("q", "q1")); - - ASSERT_TRUE(! Overlaps(NULL, "j")); - ASSERT_TRUE(! Overlaps("r", NULL)); - ASSERT_TRUE(Overlaps(NULL, "p")); - ASSERT_TRUE(Overlaps(NULL, "p1")); - ASSERT_TRUE(Overlaps("q", NULL)); - ASSERT_TRUE(Overlaps(NULL, NULL)); -} - - -TEST(FindFileTest, Multiple) { - Add("150", "200"); - Add("200", "250"); - Add("300", "350"); - Add("400", "450"); - ASSERT_EQ(0, Find("100")); - ASSERT_EQ(0, Find("150")); - ASSERT_EQ(0, Find("151")); - ASSERT_EQ(0, Find("199")); - ASSERT_EQ(0, Find("200")); - ASSERT_EQ(1, Find("201")); - ASSERT_EQ(1, Find("249")); - ASSERT_EQ(1, Find("250")); - ASSERT_EQ(2, Find("251")); - ASSERT_EQ(2, Find("299")); - ASSERT_EQ(2, Find("300")); - ASSERT_EQ(2, Find("349")); - ASSERT_EQ(2, Find("350")); - ASSERT_EQ(3, Find("351")); - ASSERT_EQ(3, Find("400")); - ASSERT_EQ(3, Find("450")); - ASSERT_EQ(4, Find("451")); - - ASSERT_TRUE(! Overlaps("100", "149")); - ASSERT_TRUE(! Overlaps("251", "299")); - ASSERT_TRUE(! Overlaps("451", "500")); - ASSERT_TRUE(! Overlaps("351", "399")); - - ASSERT_TRUE(Overlaps("100", "150")); - ASSERT_TRUE(Overlaps("100", "200")); - ASSERT_TRUE(Overlaps("100", "300")); - ASSERT_TRUE(Overlaps("100", "400")); - ASSERT_TRUE(Overlaps("100", "500")); - ASSERT_TRUE(Overlaps("375", "400")); - ASSERT_TRUE(Overlaps("450", "450")); - ASSERT_TRUE(Overlaps("450", "500")); -} - -TEST(FindFileTest, MultipleNullBoundaries) { - Add("150", "200"); - Add("200", "250"); - Add("300", "350"); - Add("400", "450"); - ASSERT_TRUE(! Overlaps(NULL, "149")); - ASSERT_TRUE(! Overlaps("451", NULL)); - ASSERT_TRUE(Overlaps(NULL, NULL)); - ASSERT_TRUE(Overlaps(NULL, "150")); - ASSERT_TRUE(Overlaps(NULL, "199")); - ASSERT_TRUE(Overlaps(NULL, "200")); - ASSERT_TRUE(Overlaps(NULL, "201")); - ASSERT_TRUE(Overlaps(NULL, "400")); - ASSERT_TRUE(Overlaps(NULL, "800")); - ASSERT_TRUE(Overlaps("100", NULL)); - ASSERT_TRUE(Overlaps("200", NULL)); - ASSERT_TRUE(Overlaps("449", NULL)); - ASSERT_TRUE(Overlaps("450", NULL)); -} - -TEST(FindFileTest, OverlapSequenceChecks) { - Add("200", "200", 5000, 3000); - ASSERT_TRUE(! Overlaps("199", "199")); - ASSERT_TRUE(! Overlaps("201", "300")); - ASSERT_TRUE(Overlaps("200", "200")); - ASSERT_TRUE(Overlaps("190", "200")); - ASSERT_TRUE(Overlaps("200", "210")); -} - -TEST(FindFileTest, OverlappingFiles) { - Add("150", "600"); - Add("400", "500"); - disjoint_sorted_files_ = false; - ASSERT_TRUE(! Overlaps("100", "149")); - ASSERT_TRUE(! Overlaps("601", "700")); - ASSERT_TRUE(Overlaps("100", "150")); - ASSERT_TRUE(Overlaps("100", "200")); - ASSERT_TRUE(Overlaps("100", "300")); - ASSERT_TRUE(Overlaps("100", "400")); - ASSERT_TRUE(Overlaps("100", "500")); - ASSERT_TRUE(Overlaps("375", "400")); - ASSERT_TRUE(Overlaps("450", "450")); - ASSERT_TRUE(Overlaps("450", "500")); - ASSERT_TRUE(Overlaps("450", "700")); - ASSERT_TRUE(Overlaps("600", "700")); -} - -} // namespace leveldb - -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} diff --git a/leveldb/db/write_batch.cc b/leveldb/db/write_batch.cc deleted file mode 100644 index 33f4a42..0000000 --- a/leveldb/db/write_batch.cc +++ /dev/null @@ -1,147 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// WriteBatch::rep_ := -// sequence: fixed64 -// count: fixed32 -// data: record[count] -// record := -// kTypeValue varstring varstring | -// kTypeDeletion varstring -// varstring := -// len: varint32 -// data: uint8[len] - -#include "leveldb/write_batch.h" - -#include "leveldb/db.h" -#include "db/dbformat.h" -#include "db/memtable.h" -#include "db/write_batch_internal.h" -#include "util/coding.h" - -namespace leveldb { - -// WriteBatch header has an 8-byte sequence number followed by a 4-byte count. -static const size_t kHeader = 12; - -WriteBatch::WriteBatch() { - Clear(); -} - -WriteBatch::~WriteBatch() { } - -WriteBatch::Handler::~Handler() { } - -void WriteBatch::Clear() { - rep_.clear(); - rep_.resize(kHeader); -} - -Status WriteBatch::Iterate(Handler* handler) const { - Slice input(rep_); - if (input.size() < kHeader) { - return Status::Corruption("malformed WriteBatch (too small)"); - } - - input.remove_prefix(kHeader); - Slice key, value; - int found = 0; - while (!input.empty()) { - found++; - char tag = input[0]; - input.remove_prefix(1); - switch (tag) { - case kTypeValue: - if (GetLengthPrefixedSlice(&input, &key) && - GetLengthPrefixedSlice(&input, &value)) { - handler->Put(key, value); - } else { - return Status::Corruption("bad WriteBatch Put"); - } - break; - case kTypeDeletion: - if (GetLengthPrefixedSlice(&input, &key)) { - handler->Delete(key); - } else { - return Status::Corruption("bad WriteBatch Delete"); - } - break; - default: - return Status::Corruption("unknown WriteBatch tag"); - } - } - if (found != WriteBatchInternal::Count(this)) { - return Status::Corruption("WriteBatch has wrong count"); - } else { - return Status::OK(); - } -} - -int WriteBatchInternal::Count(const WriteBatch* b) { - return DecodeFixed32(b->rep_.data() + 8); -} - -void WriteBatchInternal::SetCount(WriteBatch* b, int n) { - EncodeFixed32(&b->rep_[8], n); -} - -SequenceNumber WriteBatchInternal::Sequence(const WriteBatch* b) { - return SequenceNumber(DecodeFixed64(b->rep_.data())); -} - -void WriteBatchInternal::SetSequence(WriteBatch* b, SequenceNumber seq) { - EncodeFixed64(&b->rep_[0], seq); -} - -void WriteBatch::Put(const Slice& key, const Slice& value) { - WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1); - rep_.push_back(static_cast(kTypeValue)); - PutLengthPrefixedSlice(&rep_, key); - PutLengthPrefixedSlice(&rep_, value); -} - -void WriteBatch::Delete(const Slice& key) { - WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1); - rep_.push_back(static_cast(kTypeDeletion)); - PutLengthPrefixedSlice(&rep_, key); -} - -namespace { -class MemTableInserter : public WriteBatch::Handler { - public: - SequenceNumber sequence_; - MemTable* mem_; - - virtual void Put(const Slice& key, const Slice& value) { - mem_->Add(sequence_, kTypeValue, key, value); - sequence_++; - } - virtual void Delete(const Slice& key) { - mem_->Add(sequence_, kTypeDeletion, key, Slice()); - sequence_++; - } -}; -} // namespace - -Status WriteBatchInternal::InsertInto(const WriteBatch* b, - MemTable* memtable) { - MemTableInserter inserter; - inserter.sequence_ = WriteBatchInternal::Sequence(b); - inserter.mem_ = memtable; - return b->Iterate(&inserter); -} - -void WriteBatchInternal::SetContents(WriteBatch* b, const Slice& contents) { - assert(contents.size() >= kHeader); - b->rep_.assign(contents.data(), contents.size()); -} - -void WriteBatchInternal::Append(WriteBatch* dst, const WriteBatch* src) { - SetCount(dst, Count(dst) + Count(src)); - assert(src->rep_.size() >= kHeader); - dst->rep_.append(src->rep_.data() + kHeader, src->rep_.size() - kHeader); -} - -} // namespace leveldb diff --git a/leveldb/db/write_batch_internal.h b/leveldb/db/write_batch_internal.h deleted file mode 100644 index 4423a7f..0000000 --- a/leveldb/db/write_batch_internal.h +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_ -#define STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_ - -#include "leveldb/write_batch.h" - -namespace leveldb { - -class MemTable; - -// WriteBatchInternal provides static methods for manipulating a -// WriteBatch that we don't want in the public WriteBatch interface. -class WriteBatchInternal { - public: - // Return the number of entries in the batch. - static int Count(const WriteBatch* batch); - - // Set the count for the number of entries in the batch. - static void SetCount(WriteBatch* batch, int n); - - // Return the seqeunce number for the start of this batch. - static SequenceNumber Sequence(const WriteBatch* batch); - - // Store the specified number as the seqeunce number for the start of - // this batch. - static void SetSequence(WriteBatch* batch, SequenceNumber seq); - - static Slice Contents(const WriteBatch* batch) { - return Slice(batch->rep_); - } - - static size_t ByteSize(const WriteBatch* batch) { - return batch->rep_.size(); - } - - static void SetContents(WriteBatch* batch, const Slice& contents); - - static Status InsertInto(const WriteBatch* batch, MemTable* memtable); - - static void Append(WriteBatch* dst, const WriteBatch* src); -}; - -} // namespace leveldb - - -#endif // STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_ diff --git a/leveldb/db/write_batch_test.cc b/leveldb/db/write_batch_test.cc deleted file mode 100644 index 9064e3d..0000000 --- a/leveldb/db/write_batch_test.cc +++ /dev/null @@ -1,120 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "leveldb/db.h" - -#include "db/memtable.h" -#include "db/write_batch_internal.h" -#include "leveldb/env.h" -#include "util/logging.h" -#include "util/testharness.h" - -namespace leveldb { - -static std::string PrintContents(WriteBatch* b) { - InternalKeyComparator cmp(BytewiseComparator()); - MemTable* mem = new MemTable(cmp); - mem->Ref(); - std::string state; - Status s = WriteBatchInternal::InsertInto(b, mem); - int count = 0; - Iterator* iter = mem->NewIterator(); - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - ParsedInternalKey ikey; - ASSERT_TRUE(ParseInternalKey(iter->key(), &ikey)); - switch (ikey.type) { - case kTypeValue: - state.append("Put("); - state.append(ikey.user_key.ToString()); - state.append(", "); - state.append(iter->value().ToString()); - state.append(")"); - count++; - break; - case kTypeDeletion: - state.append("Delete("); - state.append(ikey.user_key.ToString()); - state.append(")"); - count++; - break; - } - state.append("@"); - state.append(NumberToString(ikey.sequence)); - } - delete iter; - if (!s.ok()) { - state.append("ParseError()"); - } else if (count != WriteBatchInternal::Count(b)) { - state.append("CountMismatch()"); - } - mem->Unref(); - return state; -} - -class WriteBatchTest { }; - -TEST(WriteBatchTest, Empty) { - WriteBatch batch; - ASSERT_EQ("", PrintContents(&batch)); - ASSERT_EQ(0, WriteBatchInternal::Count(&batch)); -} - -TEST(WriteBatchTest, Multiple) { - WriteBatch batch; - batch.Put(Slice("foo"), Slice("bar")); - batch.Delete(Slice("box")); - batch.Put(Slice("baz"), Slice("boo")); - WriteBatchInternal::SetSequence(&batch, 100); - ASSERT_EQ(100, WriteBatchInternal::Sequence(&batch)); - ASSERT_EQ(3, WriteBatchInternal::Count(&batch)); - ASSERT_EQ("Put(baz, boo)@102" - "Delete(box)@101" - "Put(foo, bar)@100", - PrintContents(&batch)); -} - -TEST(WriteBatchTest, Corruption) { - WriteBatch batch; - batch.Put(Slice("foo"), Slice("bar")); - batch.Delete(Slice("box")); - WriteBatchInternal::SetSequence(&batch, 200); - Slice contents = WriteBatchInternal::Contents(&batch); - WriteBatchInternal::SetContents(&batch, - Slice(contents.data(),contents.size()-1)); - ASSERT_EQ("Put(foo, bar)@200" - "ParseError()", - PrintContents(&batch)); -} - -TEST(WriteBatchTest, Append) { - WriteBatch b1, b2; - WriteBatchInternal::SetSequence(&b1, 200); - WriteBatchInternal::SetSequence(&b2, 300); - WriteBatchInternal::Append(&b1, &b2); - ASSERT_EQ("", - PrintContents(&b1)); - b2.Put("a", "va"); - WriteBatchInternal::Append(&b1, &b2); - ASSERT_EQ("Put(a, va)@200", - PrintContents(&b1)); - b2.Clear(); - b2.Put("b", "vb"); - WriteBatchInternal::Append(&b1, &b2); - ASSERT_EQ("Put(a, va)@200" - "Put(b, vb)@201", - PrintContents(&b1)); - b2.Delete("foo"); - WriteBatchInternal::Append(&b1, &b2); - ASSERT_EQ("Put(a, va)@200" - "Put(b, vb)@202" - "Put(b, vb)@201" - "Delete(foo)@203", - PrintContents(&b1)); -} - -} // namespace leveldb - -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} diff --git a/leveldb/helpers/memenv/memenv.cc b/leveldb/helpers/memenv/memenv.cc deleted file mode 100644 index 2082083..0000000 --- a/leveldb/helpers/memenv/memenv.cc +++ /dev/null @@ -1,374 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "helpers/memenv/memenv.h" - -#include "leveldb/env.h" -#include "leveldb/status.h" -#include "port/port.h" -#include "util/mutexlock.h" -#include -#include -#include -#include - -namespace leveldb { - -namespace { - -class FileState { - public: - // FileStates are reference counted. The initial reference count is zero - // and the caller must call Ref() at least once. - FileState() : refs_(0), size_(0) {} - - // Increase the reference count. - void Ref() { - MutexLock lock(&refs_mutex_); - ++refs_; - } - - // Decrease the reference count. Delete if this is the last reference. - void Unref() { - bool do_delete = false; - - { - MutexLock lock(&refs_mutex_); - --refs_; - assert(refs_ >= 0); - if (refs_ <= 0) { - do_delete = true; - } - } - - if (do_delete) { - delete this; - } - } - - uint64_t Size() const { return size_; } - - Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const { - if (offset > size_) { - return Status::IOError("Offset greater than file size."); - } - const uint64_t available = size_ - offset; - if (n > available) { - n = available; - } - if (n == 0) { - *result = Slice(); - return Status::OK(); - } - - size_t block = offset / kBlockSize; - size_t block_offset = offset % kBlockSize; - - if (n <= kBlockSize - block_offset) { - // The requested bytes are all in the first block. - *result = Slice(blocks_[block] + block_offset, n); - return Status::OK(); - } - - size_t bytes_to_copy = n; - char* dst = scratch; - - while (bytes_to_copy > 0) { - size_t avail = kBlockSize - block_offset; - if (avail > bytes_to_copy) { - avail = bytes_to_copy; - } - memcpy(dst, blocks_[block] + block_offset, avail); - - bytes_to_copy -= avail; - dst += avail; - block++; - block_offset = 0; - } - - *result = Slice(scratch, n); - return Status::OK(); - } - - Status Append(const Slice& data) { - const char* src = data.data(); - size_t src_len = data.size(); - - while (src_len > 0) { - size_t avail; - size_t offset = size_ % kBlockSize; - - if (offset != 0) { - // There is some room in the last block. - avail = kBlockSize - offset; - } else { - // No room in the last block; push new one. - blocks_.push_back(new char[kBlockSize]); - avail = kBlockSize; - } - - if (avail > src_len) { - avail = src_len; - } - memcpy(blocks_.back() + offset, src, avail); - src_len -= avail; - src += avail; - size_ += avail; - } - - return Status::OK(); - } - - private: - // Private since only Unref() should be used to delete it. - ~FileState() { - for (std::vector::iterator i = blocks_.begin(); i != blocks_.end(); - ++i) { - delete [] *i; - } - } - - // No copying allowed. - FileState(const FileState&); - void operator=(const FileState&); - - port::Mutex refs_mutex_; - int refs_; // Protected by refs_mutex_; - - // The following fields are not protected by any mutex. They are only mutable - // while the file is being written, and concurrent access is not allowed - // to writable files. - std::vector blocks_; - uint64_t size_; - - enum { kBlockSize = 8 * 1024 }; -}; - -class SequentialFileImpl : public SequentialFile { - public: - explicit SequentialFileImpl(FileState* file) : file_(file), pos_(0) { - file_->Ref(); - } - - ~SequentialFileImpl() { - file_->Unref(); - } - - virtual Status Read(size_t n, Slice* result, char* scratch) { - Status s = file_->Read(pos_, n, result, scratch); - if (s.ok()) { - pos_ += result->size(); - } - return s; - } - - virtual Status Skip(uint64_t n) { - if (pos_ > file_->Size()) { - return Status::IOError("pos_ > file_->Size()"); - } - const size_t available = file_->Size() - pos_; - if (n > available) { - n = available; - } - pos_ += n; - return Status::OK(); - } - - private: - FileState* file_; - size_t pos_; -}; - -class RandomAccessFileImpl : public RandomAccessFile { - public: - explicit RandomAccessFileImpl(FileState* file) : file_(file) { - file_->Ref(); - } - - ~RandomAccessFileImpl() { - file_->Unref(); - } - - virtual Status Read(uint64_t offset, size_t n, Slice* result, - char* scratch) const { - return file_->Read(offset, n, result, scratch); - } - - private: - FileState* file_; -}; - -class WritableFileImpl : public WritableFile { - public: - WritableFileImpl(FileState* file) : file_(file) { - file_->Ref(); - } - - ~WritableFileImpl() { - file_->Unref(); - } - - virtual Status Append(const Slice& data) { - return file_->Append(data); - } - - virtual Status Close() { return Status::OK(); } - virtual Status Flush() { return Status::OK(); } - virtual Status Sync() { return Status::OK(); } - - private: - FileState* file_; -}; - -class InMemoryEnv : public EnvWrapper { - public: - explicit InMemoryEnv(Env* base_env) : EnvWrapper(base_env) { } - - virtual ~InMemoryEnv() { - for (FileSystem::iterator i = file_map_.begin(); i != file_map_.end(); ++i){ - i->second->Unref(); - } - } - - // Partial implementation of the Env interface. - virtual Status NewSequentialFile(const std::string& fname, - SequentialFile** result) { - MutexLock lock(&mutex_); - if (file_map_.find(fname) == file_map_.end()) { - *result = NULL; - return Status::IOError(fname, "File not found"); - } - - *result = new SequentialFileImpl(file_map_[fname]); - return Status::OK(); - } - - virtual Status NewRandomAccessFile(const std::string& fname, - RandomAccessFile** result) { - MutexLock lock(&mutex_); - if (file_map_.find(fname) == file_map_.end()) { - *result = NULL; - return Status::IOError(fname, "File not found"); - } - - *result = new RandomAccessFileImpl(file_map_[fname]); - return Status::OK(); - } - - virtual Status NewWritableFile(const std::string& fname, - WritableFile** result) { - MutexLock lock(&mutex_); - if (file_map_.find(fname) != file_map_.end()) { - DeleteFileInternal(fname); - } - - FileState* file = new FileState(); - file->Ref(); - file_map_[fname] = file; - - *result = new WritableFileImpl(file); - return Status::OK(); - } - - virtual bool FileExists(const std::string& fname) { - MutexLock lock(&mutex_); - return file_map_.find(fname) != file_map_.end(); - } - - virtual Status GetChildren(const std::string& dir, - std::vector* result) { - MutexLock lock(&mutex_); - result->clear(); - - for (FileSystem::iterator i = file_map_.begin(); i != file_map_.end(); ++i){ - const std::string& filename = i->first; - - if (filename.size() >= dir.size() + 1 && filename[dir.size()] == '/' && - Slice(filename).starts_with(Slice(dir))) { - result->push_back(filename.substr(dir.size() + 1)); - } - } - - return Status::OK(); - } - - void DeleteFileInternal(const std::string& fname) { - if (file_map_.find(fname) == file_map_.end()) { - return; - } - - file_map_[fname]->Unref(); - file_map_.erase(fname); - } - - virtual Status DeleteFile(const std::string& fname) { - MutexLock lock(&mutex_); - if (file_map_.find(fname) == file_map_.end()) { - return Status::IOError(fname, "File not found"); - } - - DeleteFileInternal(fname); - return Status::OK(); - } - - virtual Status CreateDir(const std::string& dirname) { - return Status::OK(); - } - - virtual Status DeleteDir(const std::string& dirname) { - return Status::OK(); - } - - virtual Status GetFileSize(const std::string& fname, uint64_t* file_size) { - MutexLock lock(&mutex_); - if (file_map_.find(fname) == file_map_.end()) { - return Status::IOError(fname, "File not found"); - } - - *file_size = file_map_[fname]->Size(); - return Status::OK(); - } - - virtual Status RenameFile(const std::string& src, - const std::string& target) { - MutexLock lock(&mutex_); - if (file_map_.find(src) == file_map_.end()) { - return Status::IOError(src, "File not found"); - } - - DeleteFileInternal(target); - file_map_[target] = file_map_[src]; - file_map_.erase(src); - return Status::OK(); - } - - virtual Status LockFile(const std::string& fname, FileLock** lock) { - *lock = new FileLock; - return Status::OK(); - } - - virtual Status UnlockFile(FileLock* lock) { - delete lock; - return Status::OK(); - } - - virtual Status GetTestDirectory(std::string* path) { - *path = "/test"; - return Status::OK(); - } - - private: - // Map from filenames to FileState objects, representing a simple file system. - typedef std::map FileSystem; - port::Mutex mutex_; - FileSystem file_map_; // Protected by mutex_. -}; - -} // namespace - -Env* NewMemEnv(Env* base_env) { - return new InMemoryEnv(base_env); -} - -} // namespace leveldb diff --git a/leveldb/helpers/memenv/memenv.h b/leveldb/helpers/memenv/memenv.h deleted file mode 100644 index 03b88de..0000000 --- a/leveldb/helpers/memenv/memenv.h +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_HELPERS_MEMENV_MEMENV_H_ -#define STORAGE_LEVELDB_HELPERS_MEMENV_MEMENV_H_ - -namespace leveldb { - -class Env; - -// Returns a new environment that stores its data in memory and delegates -// all non-file-storage tasks to base_env. The caller must delete the result -// when it is no longer needed. -// *base_env must remain live while the result is in use. -Env* NewMemEnv(Env* base_env); - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_HELPERS_MEMENV_MEMENV_H_ diff --git a/leveldb/helpers/memenv/memenv_test.cc b/leveldb/helpers/memenv/memenv_test.cc deleted file mode 100644 index a44310f..0000000 --- a/leveldb/helpers/memenv/memenv_test.cc +++ /dev/null @@ -1,232 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "helpers/memenv/memenv.h" - -#include "db/db_impl.h" -#include "leveldb/db.h" -#include "leveldb/env.h" -#include "util/testharness.h" -#include -#include - -namespace leveldb { - -class MemEnvTest { - public: - Env* env_; - - MemEnvTest() - : env_(NewMemEnv(Env::Default())) { - } - ~MemEnvTest() { - delete env_; - } -}; - -TEST(MemEnvTest, Basics) { - uint64_t file_size; - WritableFile* writable_file; - std::vector children; - - ASSERT_OK(env_->CreateDir("/dir")); - - // Check that the directory is empty. - ASSERT_TRUE(!env_->FileExists("/dir/non_existent")); - ASSERT_TRUE(!env_->GetFileSize("/dir/non_existent", &file_size).ok()); - ASSERT_OK(env_->GetChildren("/dir", &children)); - ASSERT_EQ(0, children.size()); - - // Create a file. - ASSERT_OK(env_->NewWritableFile("/dir/f", &writable_file)); - delete writable_file; - - // Check that the file exists. - ASSERT_TRUE(env_->FileExists("/dir/f")); - ASSERT_OK(env_->GetFileSize("/dir/f", &file_size)); - ASSERT_EQ(0, file_size); - ASSERT_OK(env_->GetChildren("/dir", &children)); - ASSERT_EQ(1, children.size()); - ASSERT_EQ("f", children[0]); - - // Write to the file. - ASSERT_OK(env_->NewWritableFile("/dir/f", &writable_file)); - ASSERT_OK(writable_file->Append("abc")); - delete writable_file; - - // Check for expected size. - ASSERT_OK(env_->GetFileSize("/dir/f", &file_size)); - ASSERT_EQ(3, file_size); - - // Check that renaming works. - ASSERT_TRUE(!env_->RenameFile("/dir/non_existent", "/dir/g").ok()); - ASSERT_OK(env_->RenameFile("/dir/f", "/dir/g")); - ASSERT_TRUE(!env_->FileExists("/dir/f")); - ASSERT_TRUE(env_->FileExists("/dir/g")); - ASSERT_OK(env_->GetFileSize("/dir/g", &file_size)); - ASSERT_EQ(3, file_size); - - // Check that opening non-existent file fails. - SequentialFile* seq_file; - RandomAccessFile* rand_file; - ASSERT_TRUE(!env_->NewSequentialFile("/dir/non_existent", &seq_file).ok()); - ASSERT_TRUE(!seq_file); - ASSERT_TRUE(!env_->NewRandomAccessFile("/dir/non_existent", &rand_file).ok()); - ASSERT_TRUE(!rand_file); - - // Check that deleting works. - ASSERT_TRUE(!env_->DeleteFile("/dir/non_existent").ok()); - ASSERT_OK(env_->DeleteFile("/dir/g")); - ASSERT_TRUE(!env_->FileExists("/dir/g")); - ASSERT_OK(env_->GetChildren("/dir", &children)); - ASSERT_EQ(0, children.size()); - ASSERT_OK(env_->DeleteDir("/dir")); -} - -TEST(MemEnvTest, ReadWrite) { - WritableFile* writable_file; - SequentialFile* seq_file; - RandomAccessFile* rand_file; - Slice result; - char scratch[100]; - - ASSERT_OK(env_->CreateDir("/dir")); - - ASSERT_OK(env_->NewWritableFile("/dir/f", &writable_file)); - ASSERT_OK(writable_file->Append("hello ")); - ASSERT_OK(writable_file->Append("world")); - delete writable_file; - - // Read sequentially. - ASSERT_OK(env_->NewSequentialFile("/dir/f", &seq_file)); - ASSERT_OK(seq_file->Read(5, &result, scratch)); // Read "hello". - ASSERT_EQ(0, result.compare("hello")); - ASSERT_OK(seq_file->Skip(1)); - ASSERT_OK(seq_file->Read(1000, &result, scratch)); // Read "world". - ASSERT_EQ(0, result.compare("world")); - ASSERT_OK(seq_file->Read(1000, &result, scratch)); // Try reading past EOF. - ASSERT_EQ(0, result.size()); - ASSERT_OK(seq_file->Skip(100)); // Try to skip past end of file. - ASSERT_OK(seq_file->Read(1000, &result, scratch)); - ASSERT_EQ(0, result.size()); - delete seq_file; - - // Random reads. - ASSERT_OK(env_->NewRandomAccessFile("/dir/f", &rand_file)); - ASSERT_OK(rand_file->Read(6, 5, &result, scratch)); // Read "world". - ASSERT_EQ(0, result.compare("world")); - ASSERT_OK(rand_file->Read(0, 5, &result, scratch)); // Read "hello". - ASSERT_EQ(0, result.compare("hello")); - ASSERT_OK(rand_file->Read(10, 100, &result, scratch)); // Read "d". - ASSERT_EQ(0, result.compare("d")); - - // Too high offset. - ASSERT_TRUE(!rand_file->Read(1000, 5, &result, scratch).ok()); - delete rand_file; -} - -TEST(MemEnvTest, Locks) { - FileLock* lock; - - // These are no-ops, but we test they return success. - ASSERT_OK(env_->LockFile("some file", &lock)); - ASSERT_OK(env_->UnlockFile(lock)); -} - -TEST(MemEnvTest, Misc) { - std::string test_dir; - ASSERT_OK(env_->GetTestDirectory(&test_dir)); - ASSERT_TRUE(!test_dir.empty()); - - WritableFile* writable_file; - ASSERT_OK(env_->NewWritableFile("/a/b", &writable_file)); - - // These are no-ops, but we test they return success. - ASSERT_OK(writable_file->Sync()); - ASSERT_OK(writable_file->Flush()); - ASSERT_OK(writable_file->Close()); - delete writable_file; -} - -TEST(MemEnvTest, LargeWrite) { - const size_t kWriteSize = 300 * 1024; - char* scratch = new char[kWriteSize * 2]; - - std::string write_data; - for (size_t i = 0; i < kWriteSize; ++i) { - write_data.append(1, static_cast(i)); - } - - WritableFile* writable_file; - ASSERT_OK(env_->NewWritableFile("/dir/f", &writable_file)); - ASSERT_OK(writable_file->Append("foo")); - ASSERT_OK(writable_file->Append(write_data)); - delete writable_file; - - SequentialFile* seq_file; - Slice result; - ASSERT_OK(env_->NewSequentialFile("/dir/f", &seq_file)); - ASSERT_OK(seq_file->Read(3, &result, scratch)); // Read "foo". - ASSERT_EQ(0, result.compare("foo")); - - size_t read = 0; - std::string read_data; - while (read < kWriteSize) { - ASSERT_OK(seq_file->Read(kWriteSize - read, &result, scratch)); - read_data.append(result.data(), result.size()); - read += result.size(); - } - ASSERT_TRUE(write_data == read_data); - delete seq_file; - delete [] scratch; -} - -TEST(MemEnvTest, DBTest) { - Options options; - options.create_if_missing = true; - options.env = env_; - DB* db; - - const Slice keys[] = {Slice("aaa"), Slice("bbb"), Slice("ccc")}; - const Slice vals[] = {Slice("foo"), Slice("bar"), Slice("baz")}; - - ASSERT_OK(DB::Open(options, "/dir/db", &db)); - for (size_t i = 0; i < 3; ++i) { - ASSERT_OK(db->Put(WriteOptions(), keys[i], vals[i])); - } - - for (size_t i = 0; i < 3; ++i) { - std::string res; - ASSERT_OK(db->Get(ReadOptions(), keys[i], &res)); - ASSERT_TRUE(res == vals[i]); - } - - Iterator* iterator = db->NewIterator(ReadOptions()); - iterator->SeekToFirst(); - for (size_t i = 0; i < 3; ++i) { - ASSERT_TRUE(iterator->Valid()); - ASSERT_TRUE(keys[i] == iterator->key()); - ASSERT_TRUE(vals[i] == iterator->value()); - iterator->Next(); - } - ASSERT_TRUE(!iterator->Valid()); - delete iterator; - - DBImpl* dbi = reinterpret_cast(db); - ASSERT_OK(dbi->TEST_CompactMemTable()); - - for (size_t i = 0; i < 3; ++i) { - std::string res; - ASSERT_OK(db->Get(ReadOptions(), keys[i], &res)); - ASSERT_TRUE(res == vals[i]); - } - - delete db; -} - -} // namespace leveldb - -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} diff --git a/leveldb/include/leveldb/c.h b/leveldb/include/leveldb/c.h deleted file mode 100644 index 70e3cc6..0000000 --- a/leveldb/include/leveldb/c.h +++ /dev/null @@ -1,275 +0,0 @@ -/* Copyright (c) 2011 The LevelDB Authors. All rights reserved. - Use of this source code is governed by a BSD-style license that can be - found in the LICENSE file. See the AUTHORS file for names of contributors. - - C bindings for leveldb. May be useful as a stable ABI that can be - used by programs that keep leveldb in a shared library, or for - a JNI api. - - Does not support: - . getters for the option types - . custom comparators that implement key shortening - . capturing post-write-snapshot - . custom iter, db, env, cache implementations using just the C bindings - - Some conventions: - - (1) We expose just opaque struct pointers and functions to clients. - This allows us to change internal representations without having to - recompile clients. - - (2) For simplicity, there is no equivalent to the Slice type. Instead, - the caller has to pass the pointer and length as separate - arguments. - - (3) Errors are represented by a null-terminated c string. NULL - means no error. All operations that can raise an error are passed - a "char** errptr" as the last argument. One of the following must - be true on entry: - *errptr == NULL - *errptr points to a malloc()ed null-terminated error message - On success, a leveldb routine leaves *errptr unchanged. - On failure, leveldb frees the old value of *errptr and - set *errptr to a malloc()ed error message. - - (4) Bools have the type unsigned char (0 == false; rest == true) - - (5) All of the pointer arguments must be non-NULL. -*/ - -#ifndef STORAGE_LEVELDB_INCLUDE_C_H_ -#define STORAGE_LEVELDB_INCLUDE_C_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include -#include - -/* Exported types */ - -typedef struct leveldb_t leveldb_t; -typedef struct leveldb_cache_t leveldb_cache_t; -typedef struct leveldb_comparator_t leveldb_comparator_t; -typedef struct leveldb_env_t leveldb_env_t; -typedef struct leveldb_filelock_t leveldb_filelock_t; -typedef struct leveldb_filterpolicy_t leveldb_filterpolicy_t; -typedef struct leveldb_iterator_t leveldb_iterator_t; -typedef struct leveldb_logger_t leveldb_logger_t; -typedef struct leveldb_options_t leveldb_options_t; -typedef struct leveldb_randomfile_t leveldb_randomfile_t; -typedef struct leveldb_readoptions_t leveldb_readoptions_t; -typedef struct leveldb_seqfile_t leveldb_seqfile_t; -typedef struct leveldb_snapshot_t leveldb_snapshot_t; -typedef struct leveldb_writablefile_t leveldb_writablefile_t; -typedef struct leveldb_writebatch_t leveldb_writebatch_t; -typedef struct leveldb_writeoptions_t leveldb_writeoptions_t; - -/* DB operations */ - -extern leveldb_t* leveldb_open( - const leveldb_options_t* options, - const char* name, - char** errptr); - -extern void leveldb_close(leveldb_t* db); - -extern void leveldb_put( - leveldb_t* db, - const leveldb_writeoptions_t* options, - const char* key, size_t keylen, - const char* val, size_t vallen, - char** errptr); - -extern void leveldb_delete( - leveldb_t* db, - const leveldb_writeoptions_t* options, - const char* key, size_t keylen, - char** errptr); - -extern void leveldb_write( - leveldb_t* db, - const leveldb_writeoptions_t* options, - leveldb_writebatch_t* batch, - char** errptr); - -/* Returns NULL if not found. A malloc()ed array otherwise. - Stores the length of the array in *vallen. */ -extern char* leveldb_get( - leveldb_t* db, - const leveldb_readoptions_t* options, - const char* key, size_t keylen, - size_t* vallen, - char** errptr); - -extern leveldb_iterator_t* leveldb_create_iterator( - leveldb_t* db, - const leveldb_readoptions_t* options); - -extern const leveldb_snapshot_t* leveldb_create_snapshot( - leveldb_t* db); - -extern void leveldb_release_snapshot( - leveldb_t* db, - const leveldb_snapshot_t* snapshot); - -/* Returns NULL if property name is unknown. - Else returns a pointer to a malloc()-ed null-terminated value. */ -extern char* leveldb_property_value( - leveldb_t* db, - const char* propname); - -extern void leveldb_approximate_sizes( - leveldb_t* db, - int num_ranges, - const char* const* range_start_key, const size_t* range_start_key_len, - const char* const* range_limit_key, const size_t* range_limit_key_len, - uint64_t* sizes); - -extern void leveldb_compact_range( - leveldb_t* db, - const char* start_key, size_t start_key_len, - const char* limit_key, size_t limit_key_len); - -/* Management operations */ - -extern void leveldb_destroy_db( - const leveldb_options_t* options, - const char* name, - char** errptr); - -extern void leveldb_repair_db( - const leveldb_options_t* options, - const char* name, - char** errptr); - -/* Iterator */ - -extern void leveldb_iter_destroy(leveldb_iterator_t*); -extern unsigned char leveldb_iter_valid(const leveldb_iterator_t*); -extern void leveldb_iter_seek_to_first(leveldb_iterator_t*); -extern void leveldb_iter_seek_to_last(leveldb_iterator_t*); -extern void leveldb_iter_seek(leveldb_iterator_t*, const char* k, size_t klen); -extern void leveldb_iter_next(leveldb_iterator_t*); -extern void leveldb_iter_prev(leveldb_iterator_t*); -extern const char* leveldb_iter_key(const leveldb_iterator_t*, size_t* klen); -extern const char* leveldb_iter_value(const leveldb_iterator_t*, size_t* vlen); -extern void leveldb_iter_get_error(const leveldb_iterator_t*, char** errptr); - -/* Write batch */ - -extern leveldb_writebatch_t* leveldb_writebatch_create(); -extern void leveldb_writebatch_destroy(leveldb_writebatch_t*); -extern void leveldb_writebatch_clear(leveldb_writebatch_t*); -extern void leveldb_writebatch_put( - leveldb_writebatch_t*, - const char* key, size_t klen, - const char* val, size_t vlen); -extern void leveldb_writebatch_delete( - leveldb_writebatch_t*, - const char* key, size_t klen); -extern void leveldb_writebatch_iterate( - leveldb_writebatch_t*, - void* state, - void (*put)(void*, const char* k, size_t klen, const char* v, size_t vlen), - void (*deleted)(void*, const char* k, size_t klen)); - -/* Options */ - -extern leveldb_options_t* leveldb_options_create(); -extern void leveldb_options_destroy(leveldb_options_t*); -extern void leveldb_options_set_comparator( - leveldb_options_t*, - leveldb_comparator_t*); -extern void leveldb_options_set_filter_policy( - leveldb_options_t*, - leveldb_filterpolicy_t*); -extern void leveldb_options_set_create_if_missing( - leveldb_options_t*, unsigned char); -extern void leveldb_options_set_error_if_exists( - leveldb_options_t*, unsigned char); -extern void leveldb_options_set_paranoid_checks( - leveldb_options_t*, unsigned char); -extern void leveldb_options_set_env(leveldb_options_t*, leveldb_env_t*); -extern void leveldb_options_set_info_log(leveldb_options_t*, leveldb_logger_t*); -extern void leveldb_options_set_write_buffer_size(leveldb_options_t*, size_t); -extern void leveldb_options_set_max_open_files(leveldb_options_t*, int); -extern void leveldb_options_set_cache(leveldb_options_t*, leveldb_cache_t*); -extern void leveldb_options_set_block_size(leveldb_options_t*, size_t); -extern void leveldb_options_set_block_restart_interval(leveldb_options_t*, int); - -enum { - leveldb_no_compression = 0, - leveldb_snappy_compression = 1 -}; -extern void leveldb_options_set_compression(leveldb_options_t*, int); - -/* Comparator */ - -extern leveldb_comparator_t* leveldb_comparator_create( - void* state, - void (*destructor)(void*), - int (*compare)( - void*, - const char* a, size_t alen, - const char* b, size_t blen), - const char* (*name)(void*)); -extern void leveldb_comparator_destroy(leveldb_comparator_t*); - -/* Filter policy */ - -extern leveldb_filterpolicy_t* leveldb_filterpolicy_create( - void* state, - void (*destructor)(void*), - char* (*create_filter)( - void*, - const char* const* key_array, const size_t* key_length_array, - int num_keys, - size_t* filter_length), - unsigned char (*key_may_match)( - void*, - const char* key, size_t length, - const char* filter, size_t filter_length), - const char* (*name)(void*)); -extern void leveldb_filterpolicy_destroy(leveldb_filterpolicy_t*); - -extern leveldb_filterpolicy_t* leveldb_filterpolicy_create_bloom( - int bits_per_key); - -/* Read options */ - -extern leveldb_readoptions_t* leveldb_readoptions_create(); -extern void leveldb_readoptions_destroy(leveldb_readoptions_t*); -extern void leveldb_readoptions_set_verify_checksums( - leveldb_readoptions_t*, - unsigned char); -extern void leveldb_readoptions_set_fill_cache( - leveldb_readoptions_t*, unsigned char); -extern void leveldb_readoptions_set_snapshot( - leveldb_readoptions_t*, - const leveldb_snapshot_t*); - -/* Write options */ - -extern leveldb_writeoptions_t* leveldb_writeoptions_create(); -extern void leveldb_writeoptions_destroy(leveldb_writeoptions_t*); -extern void leveldb_writeoptions_set_sync( - leveldb_writeoptions_t*, unsigned char); - -/* Cache */ - -extern leveldb_cache_t* leveldb_cache_create_lru(size_t capacity); -extern void leveldb_cache_destroy(leveldb_cache_t* cache); - -/* Env */ - -extern leveldb_env_t* leveldb_create_default_env(); -extern void leveldb_env_destroy(leveldb_env_t*); - -#ifdef __cplusplus -} /* end extern "C" */ -#endif - -#endif /* STORAGE_LEVELDB_INCLUDE_C_H_ */ diff --git a/leveldb/include/leveldb/cache.h b/leveldb/include/leveldb/cache.h deleted file mode 100644 index 5e3b476..0000000 --- a/leveldb/include/leveldb/cache.h +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// A Cache is an interface that maps keys to values. It has internal -// synchronization and may be safely accessed concurrently from -// multiple threads. It may automatically evict entries to make room -// for new entries. Values have a specified charge against the cache -// capacity. For example, a cache where the values are variable -// length strings, may use the length of the string as the charge for -// the string. -// -// A builtin cache implementation with a least-recently-used eviction -// policy is provided. Clients may use their own implementations if -// they want something more sophisticated (like scan-resistance, a -// custom eviction policy, variable cache sizing, etc.) - -#ifndef STORAGE_LEVELDB_INCLUDE_CACHE_H_ -#define STORAGE_LEVELDB_INCLUDE_CACHE_H_ - -#include -#include "leveldb/slice.h" - -namespace leveldb { - -class Cache; - -// Create a new cache with a fixed size capacity. This implementation -// of Cache uses a least-recently-used eviction policy. -extern Cache* NewLRUCache(size_t capacity); - -class Cache { - public: - Cache() { } - - // Destroys all existing entries by calling the "deleter" - // function that was passed to the constructor. - virtual ~Cache(); - - // Opaque handle to an entry stored in the cache. - struct Handle { }; - - // Insert a mapping from key->value into the cache and assign it - // the specified charge against the total cache capacity. - // - // Returns a handle that corresponds to the mapping. The caller - // must call this->Release(handle) when the returned mapping is no - // longer needed. - // - // When the inserted entry is no longer needed, the key and - // value will be passed to "deleter". - virtual Handle* Insert(const Slice& key, void* value, size_t charge, - void (*deleter)(const Slice& key, void* value)) = 0; - - // If the cache has no mapping for "key", returns NULL. - // - // Else return a handle that corresponds to the mapping. The caller - // must call this->Release(handle) when the returned mapping is no - // longer needed. - virtual Handle* Lookup(const Slice& key) = 0; - - // Release a mapping returned by a previous Lookup(). - // REQUIRES: handle must not have been released yet. - // REQUIRES: handle must have been returned by a method on *this. - virtual void Release(Handle* handle) = 0; - - // Return the value encapsulated in a handle returned by a - // successful Lookup(). - // REQUIRES: handle must not have been released yet. - // REQUIRES: handle must have been returned by a method on *this. - virtual void* Value(Handle* handle) = 0; - - // If the cache contains entry for key, erase it. Note that the - // underlying entry will be kept around until all existing handles - // to it have been released. - virtual void Erase(const Slice& key) = 0; - - // Return a new numeric id. May be used by multiple clients who are - // sharing the same cache to partition the key space. Typically the - // client will allocate a new id at startup and prepend the id to - // its cache keys. - virtual uint64_t NewId() = 0; - - private: - void LRU_Remove(Handle* e); - void LRU_Append(Handle* e); - void Unref(Handle* e); - - struct Rep; - Rep* rep_; - - // No copying allowed - Cache(const Cache&); - void operator=(const Cache&); -}; - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_UTIL_CACHE_H_ diff --git a/leveldb/include/leveldb/comparator.h b/leveldb/include/leveldb/comparator.h deleted file mode 100644 index 556b984..0000000 --- a/leveldb/include/leveldb/comparator.h +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_INCLUDE_COMPARATOR_H_ -#define STORAGE_LEVELDB_INCLUDE_COMPARATOR_H_ - -#include - -namespace leveldb { - -class Slice; - -// A Comparator object provides a total order across slices that are -// used as keys in an sstable or a database. A Comparator implementation -// must be thread-safe since leveldb may invoke its methods concurrently -// from multiple threads. -class Comparator { - public: - virtual ~Comparator(); - - // Three-way comparison. Returns value: - // < 0 iff "a" < "b", - // == 0 iff "a" == "b", - // > 0 iff "a" > "b" - virtual int Compare(const Slice& a, const Slice& b) const = 0; - - // The name of the comparator. Used to check for comparator - // mismatches (i.e., a DB created with one comparator is - // accessed using a different comparator. - // - // The client of this package should switch to a new name whenever - // the comparator implementation changes in a way that will cause - // the relative ordering of any two keys to change. - // - // Names starting with "leveldb." are reserved and should not be used - // by any clients of this package. - virtual const char* Name() const = 0; - - // Advanced functions: these are used to reduce the space requirements - // for internal data structures like index blocks. - - // If *start < limit, changes *start to a short string in [start,limit). - // Simple comparator implementations may return with *start unchanged, - // i.e., an implementation of this method that does nothing is correct. - virtual void FindShortestSeparator( - std::string* start, - const Slice& limit) const = 0; - - // Changes *key to a short string >= *key. - // Simple comparator implementations may return with *key unchanged, - // i.e., an implementation of this method that does nothing is correct. - virtual void FindShortSuccessor(std::string* key) const = 0; -}; - -// Return a builtin comparator that uses lexicographic byte-wise -// ordering. The result remains the property of this module and -// must not be deleted. -extern const Comparator* BytewiseComparator(); - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_INCLUDE_COMPARATOR_H_ diff --git a/leveldb/include/leveldb/db.h b/leveldb/include/leveldb/db.h deleted file mode 100644 index ed56b87..0000000 --- a/leveldb/include/leveldb/db.h +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_INCLUDE_DB_H_ -#define STORAGE_LEVELDB_INCLUDE_DB_H_ - -#include -#include -#include "leveldb/iterator.h" -#include "leveldb/options.h" - -namespace leveldb { - -// Update Makefile if you change these -static const int kMajorVersion = 1; -static const int kMinorVersion = 5; - -struct Options; -struct ReadOptions; -struct WriteOptions; -class WriteBatch; - -// Abstract handle to particular state of a DB. -// A Snapshot is an immutable object and can therefore be safely -// accessed from multiple threads without any external synchronization. -class Snapshot { - protected: - virtual ~Snapshot(); -}; - -// A range of keys -struct Range { - Slice start; // Included in the range - Slice limit; // Not included in the range - - Range() { } - Range(const Slice& s, const Slice& l) : start(s), limit(l) { } -}; - -// A DB is a persistent ordered map from keys to values. -// A DB is safe for concurrent access from multiple threads without -// any external synchronization. -class DB { - public: - // Open the database with the specified "name". - // Stores a pointer to a heap-allocated database in *dbptr and returns - // OK on success. - // Stores NULL in *dbptr and returns a non-OK status on error. - // Caller should delete *dbptr when it is no longer needed. - static Status Open(const Options& options, - const std::string& name, - DB** dbptr); - - DB() { } - virtual ~DB(); - - // Set the database entry for "key" to "value". Returns OK on success, - // and a non-OK status on error. - // Note: consider setting options.sync = true. - virtual Status Put(const WriteOptions& options, - const Slice& key, - const Slice& value) = 0; - - // Remove the database entry (if any) for "key". Returns OK on - // success, and a non-OK status on error. It is not an error if "key" - // did not exist in the database. - // Note: consider setting options.sync = true. - virtual Status Delete(const WriteOptions& options, const Slice& key) = 0; - - // Apply the specified updates to the database. - // Returns OK on success, non-OK on failure. - // Note: consider setting options.sync = true. - virtual Status Write(const WriteOptions& options, WriteBatch* updates) = 0; - - // If the database contains an entry for "key" store the - // corresponding value in *value and return OK. - // - // If there is no entry for "key" leave *value unchanged and return - // a status for which Status::IsNotFound() returns true. - // - // May return some other Status on an error. - virtual Status Get(const ReadOptions& options, - const Slice& key, std::string* value) = 0; - - // Return a heap-allocated iterator over the contents of the database. - // The result of NewIterator() is initially invalid (caller must - // call one of the Seek methods on the iterator before using it). - // - // Caller should delete the iterator when it is no longer needed. - // The returned iterator should be deleted before this db is deleted. - virtual Iterator* NewIterator(const ReadOptions& options) = 0; - - // Return a handle to the current DB state. Iterators created with - // this handle will all observe a stable snapshot of the current DB - // state. The caller must call ReleaseSnapshot(result) when the - // snapshot is no longer needed. - virtual const Snapshot* GetSnapshot() = 0; - - // Release a previously acquired snapshot. The caller must not - // use "snapshot" after this call. - virtual void ReleaseSnapshot(const Snapshot* snapshot) = 0; - - // DB implementations can export properties about their state - // via this method. If "property" is a valid property understood by this - // DB implementation, fills "*value" with its current value and returns - // true. Otherwise returns false. - // - // - // Valid property names include: - // - // "leveldb.num-files-at-level" - return the number of files at level , - // where is an ASCII representation of a level number (e.g. "0"). - // "leveldb.stats" - returns a multi-line string that describes statistics - // about the internal operation of the DB. - // "leveldb.sstables" - returns a multi-line string that describes all - // of the sstables that make up the db contents. - virtual bool GetProperty(const Slice& property, std::string* value) = 0; - - // For each i in [0,n-1], store in "sizes[i]", the approximate - // file system space used by keys in "[range[i].start .. range[i].limit)". - // - // Note that the returned sizes measure file system space usage, so - // if the user data compresses by a factor of ten, the returned - // sizes will be one-tenth the size of the corresponding user data size. - // - // The results may not include the sizes of recently written data. - virtual void GetApproximateSizes(const Range* range, int n, - uint64_t* sizes) = 0; - - // Compact the underlying storage for the key range [*begin,*end]. - // In particular, deleted and overwritten versions are discarded, - // and the data is rearranged to reduce the cost of operations - // needed to access the data. This operation should typically only - // be invoked by users who understand the underlying implementation. - // - // begin==NULL is treated as a key before all keys in the database. - // end==NULL is treated as a key after all keys in the database. - // Therefore the following call will compact the entire database: - // db->CompactRange(NULL, NULL); - virtual void CompactRange(const Slice* begin, const Slice* end) = 0; - - private: - // No copying allowed - DB(const DB&); - void operator=(const DB&); -}; - -// Destroy the contents of the specified database. -// Be very careful using this method. -Status DestroyDB(const std::string& name, const Options& options); - -// If a DB cannot be opened, you may attempt to call this method to -// resurrect as much of the contents of the database as possible. -// Some data may be lost, so be careful when calling this function -// on a database that contains important information. -Status RepairDB(const std::string& dbname, const Options& options); - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_INCLUDE_DB_H_ diff --git a/leveldb/include/leveldb/env.h b/leveldb/include/leveldb/env.h deleted file mode 100644 index 2720667..0000000 --- a/leveldb/include/leveldb/env.h +++ /dev/null @@ -1,323 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// An Env is an interface used by the leveldb implementation to access -// operating system functionality like the filesystem etc. Callers -// may wish to provide a custom Env object when opening a database to -// get fine gain control; e.g., to rate limit file system operations. -// -// All Env implementations are safe for concurrent access from -// multiple threads without any external synchronization. - -#ifndef STORAGE_LEVELDB_INCLUDE_ENV_H_ -#define STORAGE_LEVELDB_INCLUDE_ENV_H_ - -#include -#include -#include -#include -#include "leveldb/status.h" - -namespace leveldb { - -class FileLock; -class Logger; -class RandomAccessFile; -class SequentialFile; -class Slice; -class WritableFile; - -class Env { - public: - Env() { } - virtual ~Env(); - - // Return a default environment suitable for the current operating - // system. Sophisticated users may wish to provide their own Env - // implementation instead of relying on this default environment. - // - // The result of Default() belongs to leveldb and must never be deleted. - static Env* Default(); - - // Create a brand new sequentially-readable file with the specified name. - // On success, stores a pointer to the new file in *result and returns OK. - // On failure stores NULL in *result and returns non-OK. If the file does - // not exist, returns a non-OK status. - // - // The returned file will only be accessed by one thread at a time. - virtual Status NewSequentialFile(const std::string& fname, - SequentialFile** result) = 0; - - // Create a brand new random access read-only file with the - // specified name. On success, stores a pointer to the new file in - // *result and returns OK. On failure stores NULL in *result and - // returns non-OK. If the file does not exist, returns a non-OK - // status. - // - // The returned file may be concurrently accessed by multiple threads. - virtual Status NewRandomAccessFile(const std::string& fname, - RandomAccessFile** result) = 0; - - // Create an object that writes to a new file with the specified - // name. Deletes any existing file with the same name and creates a - // new file. On success, stores a pointer to the new file in - // *result and returns OK. On failure stores NULL in *result and - // returns non-OK. - // - // The returned file will only be accessed by one thread at a time. - virtual Status NewWritableFile(const std::string& fname, - WritableFile** result) = 0; - - // Returns true iff the named file exists. - virtual bool FileExists(const std::string& fname) = 0; - - // Store in *result the names of the children of the specified directory. - // The names are relative to "dir". - // Original contents of *results are dropped. - virtual Status GetChildren(const std::string& dir, - std::vector* result) = 0; - - // Delete the named file. - virtual Status DeleteFile(const std::string& fname) = 0; - - // Create the specified directory. - virtual Status CreateDir(const std::string& dirname) = 0; - - // Delete the specified directory. - virtual Status DeleteDir(const std::string& dirname) = 0; - - // Store the size of fname in *file_size. - virtual Status GetFileSize(const std::string& fname, uint64_t* file_size) = 0; - - // Rename file src to target. - virtual Status RenameFile(const std::string& src, - const std::string& target) = 0; - - // Lock the specified file. Used to prevent concurrent access to - // the same db by multiple processes. On failure, stores NULL in - // *lock and returns non-OK. - // - // On success, stores a pointer to the object that represents the - // acquired lock in *lock and returns OK. The caller should call - // UnlockFile(*lock) to release the lock. If the process exits, - // the lock will be automatically released. - // - // If somebody else already holds the lock, finishes immediately - // with a failure. I.e., this call does not wait for existing locks - // to go away. - // - // May create the named file if it does not already exist. - virtual Status LockFile(const std::string& fname, FileLock** lock) = 0; - - // Release the lock acquired by a previous successful call to LockFile. - // REQUIRES: lock was returned by a successful LockFile() call - // REQUIRES: lock has not already been unlocked. - virtual Status UnlockFile(FileLock* lock) = 0; - - // Arrange to run "(*function)(arg)" once in a background thread. - // - // "function" may run in an unspecified thread. Multiple functions - // added to the same Env may run concurrently in different threads. - // I.e., the caller may not assume that background work items are - // serialized. - virtual void Schedule( - void (*function)(void* arg), - void* arg) = 0; - - // Start a new thread, invoking "function(arg)" within the new thread. - // When "function(arg)" returns, the thread will be destroyed. - virtual void StartThread(void (*function)(void* arg), void* arg) = 0; - - // *path is set to a temporary directory that can be used for testing. It may - // or many not have just been created. The directory may or may not differ - // between runs of the same process, but subsequent calls will return the - // same directory. - virtual Status GetTestDirectory(std::string* path) = 0; - - // Create and return a log file for storing informational messages. - virtual Status NewLogger(const std::string& fname, Logger** result) = 0; - - // Returns the number of micro-seconds since some fixed point in time. Only - // useful for computing deltas of time. - virtual uint64_t NowMicros() = 0; - - // Sleep/delay the thread for the perscribed number of micro-seconds. - virtual void SleepForMicroseconds(int micros) = 0; - - private: - // No copying allowed - Env(const Env&); - void operator=(const Env&); -}; - -// A file abstraction for reading sequentially through a file -class SequentialFile { - public: - SequentialFile() { } - virtual ~SequentialFile(); - - // Read up to "n" bytes from the file. "scratch[0..n-1]" may be - // written by this routine. Sets "*result" to the data that was - // read (including if fewer than "n" bytes were successfully read). - // May set "*result" to point at data in "scratch[0..n-1]", so - // "scratch[0..n-1]" must be live when "*result" is used. - // If an error was encountered, returns a non-OK status. - // - // REQUIRES: External synchronization - virtual Status Read(size_t n, Slice* result, char* scratch) = 0; - - // Skip "n" bytes from the file. This is guaranteed to be no - // slower that reading the same data, but may be faster. - // - // If end of file is reached, skipping will stop at the end of the - // file, and Skip will return OK. - // - // REQUIRES: External synchronization - virtual Status Skip(uint64_t n) = 0; -}; - -// A file abstraction for randomly reading the contents of a file. -class RandomAccessFile { - public: - RandomAccessFile() { } - virtual ~RandomAccessFile(); - - // Read up to "n" bytes from the file starting at "offset". - // "scratch[0..n-1]" may be written by this routine. Sets "*result" - // to the data that was read (including if fewer than "n" bytes were - // successfully read). May set "*result" to point at data in - // "scratch[0..n-1]", so "scratch[0..n-1]" must be live when - // "*result" is used. If an error was encountered, returns a non-OK - // status. - // - // Safe for concurrent use by multiple threads. - virtual Status Read(uint64_t offset, size_t n, Slice* result, - char* scratch) const = 0; -}; - -// A file abstraction for sequential writing. The implementation -// must provide buffering since callers may append small fragments -// at a time to the file. -class WritableFile { - public: - WritableFile() { } - virtual ~WritableFile(); - - virtual Status Append(const Slice& data) = 0; - virtual Status Close() = 0; - virtual Status Flush() = 0; - virtual Status Sync() = 0; - - private: - // No copying allowed - WritableFile(const WritableFile&); - void operator=(const WritableFile&); -}; - -// An interface for writing log messages. -class Logger { - public: - Logger() { } - virtual ~Logger(); - - // Write an entry to the log file with the specified format. - virtual void Logv(const char* format, va_list ap) = 0; - - private: - // No copying allowed - Logger(const Logger&); - void operator=(const Logger&); -}; - - -// Identifies a locked file. -class FileLock { - public: - FileLock() { } - virtual ~FileLock(); - private: - // No copying allowed - FileLock(const FileLock&); - void operator=(const FileLock&); -}; - -// Log the specified data to *info_log if info_log is non-NULL. -extern void Log(Logger* info_log, const char* format, ...) -# if defined(__GNUC__) || defined(__clang__) - __attribute__((__format__ (__printf__, 2, 3))) -# endif - ; - -// A utility routine: write "data" to the named file. -extern Status WriteStringToFile(Env* env, const Slice& data, - const std::string& fname); - -// A utility routine: read contents of named file into *data -extern Status ReadFileToString(Env* env, const std::string& fname, - std::string* data); - -// An implementation of Env that forwards all calls to another Env. -// May be useful to clients who wish to override just part of the -// functionality of another Env. -class EnvWrapper : public Env { - public: - // Initialize an EnvWrapper that delegates all calls to *t - explicit EnvWrapper(Env* t) : target_(t) { } - virtual ~EnvWrapper(); - - // Return the target to which this Env forwards all calls - Env* target() const { return target_; } - - // The following text is boilerplate that forwards all methods to target() - Status NewSequentialFile(const std::string& f, SequentialFile** r) { - return target_->NewSequentialFile(f, r); - } - Status NewRandomAccessFile(const std::string& f, RandomAccessFile** r) { - return target_->NewRandomAccessFile(f, r); - } - Status NewWritableFile(const std::string& f, WritableFile** r) { - return target_->NewWritableFile(f, r); - } - bool FileExists(const std::string& f) { return target_->FileExists(f); } - Status GetChildren(const std::string& dir, std::vector* r) { - return target_->GetChildren(dir, r); - } - Status DeleteFile(const std::string& f) { return target_->DeleteFile(f); } - Status CreateDir(const std::string& d) { return target_->CreateDir(d); } - Status DeleteDir(const std::string& d) { return target_->DeleteDir(d); } - Status GetFileSize(const std::string& f, uint64_t* s) { - return target_->GetFileSize(f, s); - } - Status RenameFile(const std::string& s, const std::string& t) { - return target_->RenameFile(s, t); - } - Status LockFile(const std::string& f, FileLock** l) { - return target_->LockFile(f, l); - } - Status UnlockFile(FileLock* l) { return target_->UnlockFile(l); } - void Schedule(void (*f)(void*), void* a) { - return target_->Schedule(f, a); - } - void StartThread(void (*f)(void*), void* a) { - return target_->StartThread(f, a); - } - virtual Status GetTestDirectory(std::string* path) { - return target_->GetTestDirectory(path); - } - virtual Status NewLogger(const std::string& fname, Logger** result) { - return target_->NewLogger(fname, result); - } - uint64_t NowMicros() { - return target_->NowMicros(); - } - void SleepForMicroseconds(int micros) { - target_->SleepForMicroseconds(micros); - } - private: - Env* target_; -}; - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_INCLUDE_ENV_H_ diff --git a/leveldb/include/leveldb/filter_policy.h b/leveldb/include/leveldb/filter_policy.h deleted file mode 100644 index 1fba080..0000000 --- a/leveldb/include/leveldb/filter_policy.h +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright (c) 2012 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// A database can be configured with a custom FilterPolicy object. -// This object is responsible for creating a small filter from a set -// of keys. These filters are stored in leveldb and are consulted -// automatically by leveldb to decide whether or not to read some -// information from disk. In many cases, a filter can cut down the -// number of disk seeks form a handful to a single disk seek per -// DB::Get() call. -// -// Most people will want to use the builtin bloom filter support (see -// NewBloomFilterPolicy() below). - -#ifndef STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_ -#define STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_ - -#include - -namespace leveldb { - -class Slice; - -class FilterPolicy { - public: - virtual ~FilterPolicy(); - - // Return the name of this policy. Note that if the filter encoding - // changes in an incompatible way, the name returned by this method - // must be changed. Otherwise, old incompatible filters may be - // passed to methods of this type. - virtual const char* Name() const = 0; - - // keys[0,n-1] contains a list of keys (potentially with duplicates) - // that are ordered according to the user supplied comparator. - // Append a filter that summarizes keys[0,n-1] to *dst. - // - // Warning: do not change the initial contents of *dst. Instead, - // append the newly constructed filter to *dst. - virtual void CreateFilter(const Slice* keys, int n, std::string* dst) - const = 0; - - // "filter" contains the data appended by a preceding call to - // CreateFilter() on this class. This method must return true if - // the key was in the list of keys passed to CreateFilter(). - // This method may return true or false if the key was not on the - // list, but it should aim to return false with a high probability. - virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const = 0; -}; - -// Return a new filter policy that uses a bloom filter with approximately -// the specified number of bits per key. A good value for bits_per_key -// is 10, which yields a filter with ~ 1% false positive rate. -// -// Callers must delete the result after any database that is using the -// result has been closed. -// -// Note: if you are using a custom comparator that ignores some parts -// of the keys being compared, you must not use NewBloomFilterPolicy() -// and must provide your own FilterPolicy that also ignores the -// corresponding parts of the keys. For example, if the comparator -// ignores trailing spaces, it would be incorrect to use a -// FilterPolicy (like NewBloomFilterPolicy) that does not ignore -// trailing spaces in keys. -extern const FilterPolicy* NewBloomFilterPolicy(int bits_per_key); - -} - -#endif // STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_ diff --git a/leveldb/include/leveldb/iterator.h b/leveldb/include/leveldb/iterator.h deleted file mode 100644 index ad543eb..0000000 --- a/leveldb/include/leveldb/iterator.h +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// An iterator yields a sequence of key/value pairs from a source. -// The following class defines the interface. Multiple implementations -// are provided by this library. In particular, iterators are provided -// to access the contents of a Table or a DB. -// -// Multiple threads can invoke const methods on an Iterator without -// external synchronization, but if any of the threads may call a -// non-const method, all threads accessing the same Iterator must use -// external synchronization. - -#ifndef STORAGE_LEVELDB_INCLUDE_ITERATOR_H_ -#define STORAGE_LEVELDB_INCLUDE_ITERATOR_H_ - -#include "leveldb/slice.h" -#include "leveldb/status.h" - -namespace leveldb { - -class Iterator { - public: - Iterator(); - virtual ~Iterator(); - - // An iterator is either positioned at a key/value pair, or - // not valid. This method returns true iff the iterator is valid. - virtual bool Valid() const = 0; - - // Position at the first key in the source. The iterator is Valid() - // after this call iff the source is not empty. - virtual void SeekToFirst() = 0; - - // Position at the last key in the source. The iterator is - // Valid() after this call iff the source is not empty. - virtual void SeekToLast() = 0; - - // Position at the first key in the source that at or past target - // The iterator is Valid() after this call iff the source contains - // an entry that comes at or past target. - virtual void Seek(const Slice& target) = 0; - - // Moves to the next entry in the source. After this call, Valid() is - // true iff the iterator was not positioned at the last entry in the source. - // REQUIRES: Valid() - virtual void Next() = 0; - - // Moves to the previous entry in the source. After this call, Valid() is - // true iff the iterator was not positioned at the first entry in source. - // REQUIRES: Valid() - virtual void Prev() = 0; - - // Return the key for the current entry. The underlying storage for - // the returned slice is valid only until the next modification of - // the iterator. - // REQUIRES: Valid() - virtual Slice key() const = 0; - - // Return the value for the current entry. The underlying storage for - // the returned slice is valid only until the next modification of - // the iterator. - // REQUIRES: !AtEnd() && !AtStart() - virtual Slice value() const = 0; - - // If an error has occurred, return it. Else return an ok status. - virtual Status status() const = 0; - - // Clients are allowed to register function/arg1/arg2 triples that - // will be invoked when this iterator is destroyed. - // - // Note that unlike all of the preceding methods, this method is - // not abstract and therefore clients should not override it. - typedef void (*CleanupFunction)(void* arg1, void* arg2); - void RegisterCleanup(CleanupFunction function, void* arg1, void* arg2); - - private: - struct Cleanup { - CleanupFunction function; - void* arg1; - void* arg2; - Cleanup* next; - }; - Cleanup cleanup_; - - // No copying allowed - Iterator(const Iterator&); - void operator=(const Iterator&); -}; - -// Return an empty iterator (yields nothing). -extern Iterator* NewEmptyIterator(); - -// Return an empty iterator with the specified status. -extern Iterator* NewErrorIterator(const Status& status); - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_INCLUDE_ITERATOR_H_ diff --git a/leveldb/include/leveldb/options.h b/leveldb/include/leveldb/options.h deleted file mode 100644 index fdda718..0000000 --- a/leveldb/include/leveldb/options.h +++ /dev/null @@ -1,195 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_INCLUDE_OPTIONS_H_ -#define STORAGE_LEVELDB_INCLUDE_OPTIONS_H_ - -#include - -namespace leveldb { - -class Cache; -class Comparator; -class Env; -class FilterPolicy; -class Logger; -class Snapshot; - -// DB contents are stored in a set of blocks, each of which holds a -// sequence of key,value pairs. Each block may be compressed before -// being stored in a file. The following enum describes which -// compression method (if any) is used to compress a block. -enum CompressionType { - // NOTE: do not change the values of existing entries, as these are - // part of the persistent format on disk. - kNoCompression = 0x0, - kSnappyCompression = 0x1 -}; - -// Options to control the behavior of a database (passed to DB::Open) -struct Options { - // ------------------- - // Parameters that affect behavior - - // Comparator used to define the order of keys in the table. - // Default: a comparator that uses lexicographic byte-wise ordering - // - // REQUIRES: The client must ensure that the comparator supplied - // here has the same name and orders keys *exactly* the same as the - // comparator provided to previous open calls on the same DB. - const Comparator* comparator; - - // If true, the database will be created if it is missing. - // Default: false - bool create_if_missing; - - // If true, an error is raised if the database already exists. - // Default: false - bool error_if_exists; - - // If true, the implementation will do aggressive checking of the - // data it is processing and will stop early if it detects any - // errors. This may have unforeseen ramifications: for example, a - // corruption of one DB entry may cause a large number of entries to - // become unreadable or for the entire DB to become unopenable. - // Default: false - bool paranoid_checks; - - // Use the specified object to interact with the environment, - // e.g. to read/write files, schedule background work, etc. - // Default: Env::Default() - Env* env; - - // Any internal progress/error information generated by the db will - // be written to info_log if it is non-NULL, or to a file stored - // in the same directory as the DB contents if info_log is NULL. - // Default: NULL - Logger* info_log; - - // ------------------- - // Parameters that affect performance - - // Amount of data to build up in memory (backed by an unsorted log - // on disk) before converting to a sorted on-disk file. - // - // Larger values increase performance, especially during bulk loads. - // Up to two write buffers may be held in memory at the same time, - // so you may wish to adjust this parameter to control memory usage. - // Also, a larger write buffer will result in a longer recovery time - // the next time the database is opened. - // - // Default: 4MB - size_t write_buffer_size; - - // Number of open files that can be used by the DB. You may need to - // increase this if your database has a large working set (budget - // one open file per 2MB of working set). - // - // Default: 1000 - int max_open_files; - - // Control over blocks (user data is stored in a set of blocks, and - // a block is the unit of reading from disk). - - // If non-NULL, use the specified cache for blocks. - // If NULL, leveldb will automatically create and use an 8MB internal cache. - // Default: NULL - Cache* block_cache; - - // Approximate size of user data packed per block. Note that the - // block size specified here corresponds to uncompressed data. The - // actual size of the unit read from disk may be smaller if - // compression is enabled. This parameter can be changed dynamically. - // - // Default: 4K - size_t block_size; - - // Number of keys between restart points for delta encoding of keys. - // This parameter can be changed dynamically. Most clients should - // leave this parameter alone. - // - // Default: 16 - int block_restart_interval; - - // Compress blocks using the specified compression algorithm. This - // parameter can be changed dynamically. - // - // Default: kSnappyCompression, which gives lightweight but fast - // compression. - // - // Typical speeds of kSnappyCompression on an Intel(R) Core(TM)2 2.4GHz: - // ~200-500MB/s compression - // ~400-800MB/s decompression - // Note that these speeds are significantly faster than most - // persistent storage speeds, and therefore it is typically never - // worth switching to kNoCompression. Even if the input data is - // incompressible, the kSnappyCompression implementation will - // efficiently detect that and will switch to uncompressed mode. - CompressionType compression; - - // If non-NULL, use the specified filter policy to reduce disk reads. - // Many applications will benefit from passing the result of - // NewBloomFilterPolicy() here. - // - // Default: NULL - const FilterPolicy* filter_policy; - - // Create an Options object with default values for all fields. - Options(); -}; - -// Options that control read operations -struct ReadOptions { - // If true, all data read from underlying storage will be - // verified against corresponding checksums. - // Default: false - bool verify_checksums; - - // Should the data read for this iteration be cached in memory? - // Callers may wish to set this field to false for bulk scans. - // Default: true - bool fill_cache; - - // If "snapshot" is non-NULL, read as of the supplied snapshot - // (which must belong to the DB that is being read and which must - // not have been released). If "snapshot" is NULL, use an impliicit - // snapshot of the state at the beginning of this read operation. - // Default: NULL - const Snapshot* snapshot; - - ReadOptions() - : verify_checksums(false), - fill_cache(true), - snapshot(NULL) { - } -}; - -// Options that control write operations -struct WriteOptions { - // If true, the write will be flushed from the operating system - // buffer cache (by calling WritableFile::Sync()) before the write - // is considered complete. If this flag is true, writes will be - // slower. - // - // If this flag is false, and the machine crashes, some recent - // writes may be lost. Note that if it is just the process that - // crashes (i.e., the machine does not reboot), no writes will be - // lost even if sync==false. - // - // In other words, a DB write with sync==false has similar - // crash semantics as the "write()" system call. A DB write - // with sync==true has similar crash semantics to a "write()" - // system call followed by "fsync()". - // - // Default: false - bool sync; - - WriteOptions() - : sync(false) { - } -}; - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_INCLUDE_OPTIONS_H_ diff --git a/leveldb/include/leveldb/slice.h b/leveldb/include/leveldb/slice.h deleted file mode 100644 index 74ea8fa..0000000 --- a/leveldb/include/leveldb/slice.h +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// Slice is a simple structure containing a pointer into some external -// storage and a size. The user of a Slice must ensure that the slice -// is not used after the corresponding external storage has been -// deallocated. -// -// Multiple threads can invoke const methods on a Slice without -// external synchronization, but if any of the threads may call a -// non-const method, all threads accessing the same Slice must use -// external synchronization. - -#ifndef STORAGE_LEVELDB_INCLUDE_SLICE_H_ -#define STORAGE_LEVELDB_INCLUDE_SLICE_H_ - -#include -#include -#include -#include - -namespace leveldb { - -class Slice { - public: - // Create an empty slice. - Slice() : data_(""), size_(0) { } - - // Create a slice that refers to d[0,n-1]. - Slice(const char* d, size_t n) : data_(d), size_(n) { } - - // Create a slice that refers to the contents of "s" - Slice(const std::string& s) : data_(s.data()), size_(s.size()) { } - - // Create a slice that refers to s[0,strlen(s)-1] - Slice(const char* s) : data_(s), size_(strlen(s)) { } - - // Return a pointer to the beginning of the referenced data - const char* data() const { return data_; } - - // Return the length (in bytes) of the referenced data - size_t size() const { return size_; } - - // Return true iff the length of the referenced data is zero - bool empty() const { return size_ == 0; } - - // Return the ith byte in the referenced data. - // REQUIRES: n < size() - char operator[](size_t n) const { - assert(n < size()); - return data_[n]; - } - - // Change this slice to refer to an empty array - void clear() { data_ = ""; size_ = 0; } - - // Drop the first "n" bytes from this slice. - void remove_prefix(size_t n) { - assert(n <= size()); - data_ += n; - size_ -= n; - } - - // Return a string that contains the copy of the referenced data. - std::string ToString() const { return std::string(data_, size_); } - - // Three-way comparison. Returns value: - // < 0 iff "*this" < "b", - // == 0 iff "*this" == "b", - // > 0 iff "*this" > "b" - int compare(const Slice& b) const; - - // Return true iff "x" is a prefix of "*this" - bool starts_with(const Slice& x) const { - return ((size_ >= x.size_) && - (memcmp(data_, x.data_, x.size_) == 0)); - } - - private: - const char* data_; - size_t size_; - - // Intentionally copyable -}; - -inline bool operator==(const Slice& x, const Slice& y) { - return ((x.size() == y.size()) && - (memcmp(x.data(), y.data(), x.size()) == 0)); -} - -inline bool operator!=(const Slice& x, const Slice& y) { - return !(x == y); -} - -inline int Slice::compare(const Slice& b) const { - const int min_len = (size_ < b.size_) ? size_ : b.size_; - int r = memcmp(data_, b.data_, min_len); - if (r == 0) { - if (size_ < b.size_) r = -1; - else if (size_ > b.size_) r = +1; - } - return r; -} - -} // namespace leveldb - - -#endif // STORAGE_LEVELDB_INCLUDE_SLICE_H_ diff --git a/leveldb/include/leveldb/status.h b/leveldb/include/leveldb/status.h deleted file mode 100644 index 11dbd4b..0000000 --- a/leveldb/include/leveldb/status.h +++ /dev/null @@ -1,106 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// A Status encapsulates the result of an operation. It may indicate success, -// or it may indicate an error with an associated error message. -// -// Multiple threads can invoke const methods on a Status without -// external synchronization, but if any of the threads may call a -// non-const method, all threads accessing the same Status must use -// external synchronization. - -#ifndef STORAGE_LEVELDB_INCLUDE_STATUS_H_ -#define STORAGE_LEVELDB_INCLUDE_STATUS_H_ - -#include -#include "leveldb/slice.h" - -namespace leveldb { - -class Status { - public: - // Create a success status. - Status() : state_(NULL) { } - ~Status() { delete[] state_; } - - // Copy the specified status. - Status(const Status& s); - void operator=(const Status& s); - - // Return a success status. - static Status OK() { return Status(); } - - // Return error status of an appropriate type. - static Status NotFound(const Slice& msg, const Slice& msg2 = Slice()) { - return Status(kNotFound, msg, msg2); - } - static Status Corruption(const Slice& msg, const Slice& msg2 = Slice()) { - return Status(kCorruption, msg, msg2); - } - static Status NotSupported(const Slice& msg, const Slice& msg2 = Slice()) { - return Status(kNotSupported, msg, msg2); - } - static Status InvalidArgument(const Slice& msg, const Slice& msg2 = Slice()) { - return Status(kInvalidArgument, msg, msg2); - } - static Status IOError(const Slice& msg, const Slice& msg2 = Slice()) { - return Status(kIOError, msg, msg2); - } - - // Returns true iff the status indicates success. - bool ok() const { return (state_ == NULL); } - - // Returns true iff the status indicates a NotFound error. - bool IsNotFound() const { return code() == kNotFound; } - - // Returns true iff the status indicates a Corruption error. - bool IsCorruption() const { return code() == kCorruption; } - - // Returns true iff the status indicates an IOError. - bool IsIOError() const { return code() == kIOError; } - - // Return a string representation of this status suitable for printing. - // Returns the string "OK" for success. - std::string ToString() const; - - private: - // OK status has a NULL state_. Otherwise, state_ is a new[] array - // of the following form: - // state_[0..3] == length of message - // state_[4] == code - // state_[5..] == message - const char* state_; - - enum Code { - kOk = 0, - kNotFound = 1, - kCorruption = 2, - kNotSupported = 3, - kInvalidArgument = 4, - kIOError = 5 - }; - - Code code() const { - return (state_ == NULL) ? kOk : static_cast(state_[4]); - } - - Status(Code code, const Slice& msg, const Slice& msg2); - static const char* CopyState(const char* s); -}; - -inline Status::Status(const Status& s) { - state_ = (s.state_ == NULL) ? NULL : CopyState(s.state_); -} -inline void Status::operator=(const Status& s) { - // The following condition catches both aliasing (when this == &s), - // and the common case where both s and *this are ok. - if (state_ != s.state_) { - delete[] state_; - state_ = (s.state_ == NULL) ? NULL : CopyState(s.state_); - } -} - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_INCLUDE_STATUS_H_ diff --git a/leveldb/include/leveldb/table.h b/leveldb/include/leveldb/table.h deleted file mode 100644 index a9746c3..0000000 --- a/leveldb/include/leveldb/table.h +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_INCLUDE_TABLE_H_ -#define STORAGE_LEVELDB_INCLUDE_TABLE_H_ - -#include -#include "leveldb/iterator.h" - -namespace leveldb { - -class Block; -class BlockHandle; -class Footer; -struct Options; -class RandomAccessFile; -struct ReadOptions; -class TableCache; - -// A Table is a sorted map from strings to strings. Tables are -// immutable and persistent. A Table may be safely accessed from -// multiple threads without external synchronization. -class Table { - public: - // Attempt to open the table that is stored in bytes [0..file_size) - // of "file", and read the metadata entries necessary to allow - // retrieving data from the table. - // - // If successful, returns ok and sets "*table" to the newly opened - // table. The client should delete "*table" when no longer needed. - // If there was an error while initializing the table, sets "*table" - // to NULL and returns a non-ok status. Does not take ownership of - // "*source", but the client must ensure that "source" remains live - // for the duration of the returned table's lifetime. - // - // *file must remain live while this Table is in use. - static Status Open(const Options& options, - RandomAccessFile* file, - uint64_t file_size, - Table** table); - - ~Table(); - - // Returns a new iterator over the table contents. - // The result of NewIterator() is initially invalid (caller must - // call one of the Seek methods on the iterator before using it). - Iterator* NewIterator(const ReadOptions&) const; - - // Given a key, return an approximate byte offset in the file where - // the data for that key begins (or would begin if the key were - // present in the file). The returned value is in terms of file - // bytes, and so includes effects like compression of the underlying data. - // E.g., the approximate offset of the last key in the table will - // be close to the file length. - uint64_t ApproximateOffsetOf(const Slice& key) const; - - private: - struct Rep; - Rep* rep_; - - explicit Table(Rep* rep) { rep_ = rep; } - static Iterator* BlockReader(void*, const ReadOptions&, const Slice&); - - // Calls (*handle_result)(arg, ...) with the entry found after a call - // to Seek(key). May not make such a call if filter policy says - // that key is not present. - friend class TableCache; - Status InternalGet( - const ReadOptions&, const Slice& key, - void* arg, - void (*handle_result)(void* arg, const Slice& k, const Slice& v)); - - - void ReadMeta(const Footer& footer); - void ReadFilter(const Slice& filter_handle_value); - - // No copying allowed - Table(const Table&); - void operator=(const Table&); -}; - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_INCLUDE_TABLE_H_ diff --git a/leveldb/include/leveldb/table_builder.h b/leveldb/include/leveldb/table_builder.h deleted file mode 100644 index 5fd1dc7..0000000 --- a/leveldb/include/leveldb/table_builder.h +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// TableBuilder provides the interface used to build a Table -// (an immutable and sorted map from keys to values). -// -// Multiple threads can invoke const methods on a TableBuilder without -// external synchronization, but if any of the threads may call a -// non-const method, all threads accessing the same TableBuilder must use -// external synchronization. - -#ifndef STORAGE_LEVELDB_INCLUDE_TABLE_BUILDER_H_ -#define STORAGE_LEVELDB_INCLUDE_TABLE_BUILDER_H_ - -#include -#include "leveldb/options.h" -#include "leveldb/status.h" - -namespace leveldb { - -class BlockBuilder; -class BlockHandle; -class WritableFile; - -class TableBuilder { - public: - // Create a builder that will store the contents of the table it is - // building in *file. Does not close the file. It is up to the - // caller to close the file after calling Finish(). - TableBuilder(const Options& options, WritableFile* file); - - // REQUIRES: Either Finish() or Abandon() has been called. - ~TableBuilder(); - - // Change the options used by this builder. Note: only some of the - // option fields can be changed after construction. If a field is - // not allowed to change dynamically and its value in the structure - // passed to the constructor is different from its value in the - // structure passed to this method, this method will return an error - // without changing any fields. - Status ChangeOptions(const Options& options); - - // Add key,value to the table being constructed. - // REQUIRES: key is after any previously added key according to comparator. - // REQUIRES: Finish(), Abandon() have not been called - void Add(const Slice& key, const Slice& value); - - // Advanced operation: flush any buffered key/value pairs to file. - // Can be used to ensure that two adjacent entries never live in - // the same data block. Most clients should not need to use this method. - // REQUIRES: Finish(), Abandon() have not been called - void Flush(); - - // Return non-ok iff some error has been detected. - Status status() const; - - // Finish building the table. Stops using the file passed to the - // constructor after this function returns. - // REQUIRES: Finish(), Abandon() have not been called - Status Finish(); - - // Indicate that the contents of this builder should be abandoned. Stops - // using the file passed to the constructor after this function returns. - // If the caller is not going to call Finish(), it must call Abandon() - // before destroying this builder. - // REQUIRES: Finish(), Abandon() have not been called - void Abandon(); - - // Number of calls to Add() so far. - uint64_t NumEntries() const; - - // Size of the file generated so far. If invoked after a successful - // Finish() call, returns the size of the final generated file. - uint64_t FileSize() const; - - private: - bool ok() const { return status().ok(); } - void WriteBlock(BlockBuilder* block, BlockHandle* handle); - void WriteRawBlock(const Slice& data, CompressionType, BlockHandle* handle); - - struct Rep; - Rep* rep_; - - // No copying allowed - TableBuilder(const TableBuilder&); - void operator=(const TableBuilder&); -}; - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_INCLUDE_TABLE_BUILDER_H_ diff --git a/leveldb/include/leveldb/write_batch.h b/leveldb/include/leveldb/write_batch.h deleted file mode 100644 index ee9aab6..0000000 --- a/leveldb/include/leveldb/write_batch.h +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// WriteBatch holds a collection of updates to apply atomically to a DB. -// -// The updates are applied in the order in which they are added -// to the WriteBatch. For example, the value of "key" will be "v3" -// after the following batch is written: -// -// batch.Put("key", "v1"); -// batch.Delete("key"); -// batch.Put("key", "v2"); -// batch.Put("key", "v3"); -// -// Multiple threads can invoke const methods on a WriteBatch without -// external synchronization, but if any of the threads may call a -// non-const method, all threads accessing the same WriteBatch must use -// external synchronization. - -#ifndef STORAGE_LEVELDB_INCLUDE_WRITE_BATCH_H_ -#define STORAGE_LEVELDB_INCLUDE_WRITE_BATCH_H_ - -#include -#include "leveldb/status.h" - -namespace leveldb { - -class Slice; - -class WriteBatch { - public: - WriteBatch(); - ~WriteBatch(); - - // Store the mapping "key->value" in the database. - void Put(const Slice& key, const Slice& value); - - // If the database contains a mapping for "key", erase it. Else do nothing. - void Delete(const Slice& key); - - // Clear all updates buffered in this batch. - void Clear(); - - // Support for iterating over the contents of a batch. - class Handler { - public: - virtual ~Handler(); - virtual void Put(const Slice& key, const Slice& value) = 0; - virtual void Delete(const Slice& key) = 0; - }; - Status Iterate(Handler* handler) const; - - private: - friend class WriteBatchInternal; - - std::string rep_; // See comment in write_batch.cc for the format of rep_ - - // Intentionally copyable -}; - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_INCLUDE_WRITE_BATCH_H_ diff --git a/leveldb/port/README b/leveldb/port/README deleted file mode 100644 index 422563e..0000000 --- a/leveldb/port/README +++ /dev/null @@ -1,10 +0,0 @@ -This directory contains interfaces and implementations that isolate the -rest of the package from platform details. - -Code in the rest of the package includes "port.h" from this directory. -"port.h" in turn includes a platform specific "port_.h" file -that provides the platform specific implementation. - -See port_posix.h for an example of what must be provided in a platform -specific header file. - diff --git a/leveldb/port/atomic_pointer.h b/leveldb/port/atomic_pointer.h deleted file mode 100644 index c58bffb..0000000 --- a/leveldb/port/atomic_pointer.h +++ /dev/null @@ -1,152 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -// AtomicPointer provides storage for a lock-free pointer. -// Platform-dependent implementation of AtomicPointer: -// - If the platform provides a cheap barrier, we use it with raw pointers -// - If cstdatomic is present (on newer versions of gcc, it is), we use -// a cstdatomic-based AtomicPointer. However we prefer the memory -// barrier based version, because at least on a gcc 4.4 32-bit build -// on linux, we have encountered a buggy -// implementation. Also, some implementations are much -// slower than a memory-barrier based implementation (~16ns for -// based acquire-load vs. ~1ns for a barrier based -// acquire-load). -// This code is based on atomicops-internals-* in Google's perftools: -// http://code.google.com/p/google-perftools/source/browse/#svn%2Ftrunk%2Fsrc%2Fbase - -#ifndef PORT_ATOMIC_POINTER_H_ -#define PORT_ATOMIC_POINTER_H_ - -#include -#ifdef LEVELDB_CSTDATOMIC_PRESENT -#include -#endif -#ifdef OS_WIN -#include -#endif -#ifdef OS_MACOSX -#include -#endif - -#if defined(_M_X64) || defined(__x86_64__) -#define ARCH_CPU_X86_FAMILY 1 -#elif defined(_M_IX86) || defined(__i386__) || defined(__i386) -#define ARCH_CPU_X86_FAMILY 1 -#elif defined(__ARMEL__) -#define ARCH_CPU_ARM_FAMILY 1 -#endif - -namespace leveldb { -namespace port { - -// Define MemoryBarrier() if available -// Windows on x86 -#if defined(OS_WIN) && defined(COMPILER_MSVC) && defined(ARCH_CPU_X86_FAMILY) -// windows.h already provides a MemoryBarrier(void) macro -// http://msdn.microsoft.com/en-us/library/ms684208(v=vs.85).aspx -#define LEVELDB_HAVE_MEMORY_BARRIER - -// Gcc on x86 -#elif defined(ARCH_CPU_X86_FAMILY) && defined(__GNUC__) -inline void MemoryBarrier() { - // See http://gcc.gnu.org/ml/gcc/2003-04/msg01180.html for a discussion on - // this idiom. Also see http://en.wikipedia.org/wiki/Memory_ordering. - __asm__ __volatile__("" : : : "memory"); -} -#define LEVELDB_HAVE_MEMORY_BARRIER - -// Sun Studio -#elif defined(ARCH_CPU_X86_FAMILY) && defined(__SUNPRO_CC) -inline void MemoryBarrier() { - // See http://gcc.gnu.org/ml/gcc/2003-04/msg01180.html for a discussion on - // this idiom. Also see http://en.wikipedia.org/wiki/Memory_ordering. - asm volatile("" : : : "memory"); -} -#define LEVELDB_HAVE_MEMORY_BARRIER - -// Mac OS -#elif defined(OS_MACOSX) -inline void MemoryBarrier() { - OSMemoryBarrier(); -} -#define LEVELDB_HAVE_MEMORY_BARRIER - -// ARM Linux -#elif defined(ARCH_CPU_ARM_FAMILY) && defined(__linux__) -typedef void (*LinuxKernelMemoryBarrierFunc)(void); -// The Linux ARM kernel provides a highly optimized device-specific memory -// barrier function at a fixed memory address that is mapped in every -// user-level process. -// -// This beats using CPU-specific instructions which are, on single-core -// devices, un-necessary and very costly (e.g. ARMv7-A "dmb" takes more -// than 180ns on a Cortex-A8 like the one on a Nexus One). Benchmarking -// shows that the extra function call cost is completely negligible on -// multi-core devices. -// -inline void MemoryBarrier() { - (*(LinuxKernelMemoryBarrierFunc)0xffff0fa0)(); -} -#define LEVELDB_HAVE_MEMORY_BARRIER - -#endif - -// AtomicPointer built using platform-specific MemoryBarrier() -#if defined(LEVELDB_HAVE_MEMORY_BARRIER) -class AtomicPointer { - private: - void* rep_; - public: - AtomicPointer() { } - explicit AtomicPointer(void* p) : rep_(p) {} - inline void* NoBarrier_Load() const { return rep_; } - inline void NoBarrier_Store(void* v) { rep_ = v; } - inline void* Acquire_Load() const { - void* result = rep_; - MemoryBarrier(); - return result; - } - inline void Release_Store(void* v) { - MemoryBarrier(); - rep_ = v; - } -}; - -// AtomicPointer based on -#elif defined(LEVELDB_CSTDATOMIC_PRESENT) -class AtomicPointer { - private: - std::atomic rep_; - public: - AtomicPointer() { } - explicit AtomicPointer(void* v) : rep_(v) { } - inline void* Acquire_Load() const { - return rep_.load(std::memory_order_acquire); - } - inline void Release_Store(void* v) { - rep_.store(v, std::memory_order_release); - } - inline void* NoBarrier_Load() const { - return rep_.load(std::memory_order_relaxed); - } - inline void NoBarrier_Store(void* v) { - rep_.store(v, std::memory_order_relaxed); - } -}; - -// We have neither MemoryBarrier(), nor -#else -#error Please implement AtomicPointer for this platform. - -#endif - -#undef LEVELDB_HAVE_MEMORY_BARRIER -#undef ARCH_CPU_X86_FAMILY -#undef ARCH_CPU_ARM_FAMILY - -} // namespace port -} // namespace leveldb - -#endif // PORT_ATOMIC_POINTER_H_ diff --git a/leveldb/port/port.h b/leveldb/port/port.h deleted file mode 100644 index e667db4..0000000 --- a/leveldb/port/port.h +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_PORT_PORT_H_ -#define STORAGE_LEVELDB_PORT_PORT_H_ - -#include - -// Include the appropriate platform specific file below. If you are -// porting to a new platform, see "port_example.h" for documentation -// of what the new port_.h file must provide. -#if defined(LEVELDB_PLATFORM_POSIX) -# include "port/port_posix.h" -#elif defined(LEVELDB_PLATFORM_CHROMIUM) -# include "port/port_chromium.h" -#endif - -#endif // STORAGE_LEVELDB_PORT_PORT_H_ diff --git a/leveldb/port/port_example.h b/leveldb/port/port_example.h deleted file mode 100644 index ab9e489..0000000 --- a/leveldb/port/port_example.h +++ /dev/null @@ -1,135 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// This file contains the specification, but not the implementations, -// of the types/operations/etc. that should be defined by a platform -// specific port_.h file. Use this file as a reference for -// how to port this package to a new platform. - -#ifndef STORAGE_LEVELDB_PORT_PORT_EXAMPLE_H_ -#define STORAGE_LEVELDB_PORT_PORT_EXAMPLE_H_ - -namespace leveldb { -namespace port { - -// TODO(jorlow): Many of these belong more in the environment class rather than -// here. We should try moving them and see if it affects perf. - -// The following boolean constant must be true on a little-endian machine -// and false otherwise. -static const bool kLittleEndian = true /* or some other expression */; - -// ------------------ Threading ------------------- - -// A Mutex represents an exclusive lock. -class Mutex { - public: - Mutex(); - ~Mutex(); - - // Lock the mutex. Waits until other lockers have exited. - // Will deadlock if the mutex is already locked by this thread. - void Lock(); - - // Unlock the mutex. - // REQUIRES: This mutex was locked by this thread. - void Unlock(); - - // Optionally crash if this thread does not hold this mutex. - // The implementation must be fast, especially if NDEBUG is - // defined. The implementation is allowed to skip all checks. - void AssertHeld(); -}; - -class CondVar { - public: - explicit CondVar(Mutex* mu); - ~CondVar(); - - // Atomically release *mu and block on this condition variable until - // either a call to SignalAll(), or a call to Signal() that picks - // this thread to wakeup. - // REQUIRES: this thread holds *mu - void Wait(); - - // If there are some threads waiting, wake up at least one of them. - void Signal(); - - // Wake up all waiting threads. - void SignallAll(); -}; - -// Thread-safe initialization. -// Used as follows: -// static port::OnceType init_control = LEVELDB_ONCE_INIT; -// static void Initializer() { ... do something ...; } -// ... -// port::InitOnce(&init_control, &Initializer); -typedef intptr_t OnceType; -#define LEVELDB_ONCE_INIT 0 -extern void InitOnce(port::OnceType*, void (*initializer)()); - -// A type that holds a pointer that can be read or written atomically -// (i.e., without word-tearing.) -class AtomicPointer { - private: - intptr_t rep_; - public: - // Initialize to arbitrary value - AtomicPointer(); - - // Initialize to hold v - explicit AtomicPointer(void* v) : rep_(v) { } - - // Read and return the stored pointer with the guarantee that no - // later memory access (read or write) by this thread can be - // reordered ahead of this read. - void* Acquire_Load() const; - - // Set v as the stored pointer with the guarantee that no earlier - // memory access (read or write) by this thread can be reordered - // after this store. - void Release_Store(void* v); - - // Read the stored pointer with no ordering guarantees. - void* NoBarrier_Load() const; - - // Set va as the stored pointer with no ordering guarantees. - void NoBarrier_Store(void* v); -}; - -// ------------------ Compression ------------------- - -// Store the snappy compression of "input[0,input_length-1]" in *output. -// Returns false if snappy is not supported by this port. -extern bool Snappy_Compress(const char* input, size_t input_length, - std::string* output); - -// If input[0,input_length-1] looks like a valid snappy compressed -// buffer, store the size of the uncompressed data in *result and -// return true. Else return false. -extern bool Snappy_GetUncompressedLength(const char* input, size_t length, - size_t* result); - -// Attempt to snappy uncompress input[0,input_length-1] into *output. -// Returns true if successful, false if the input is invalid lightweight -// compressed data. -// -// REQUIRES: at least the first "n" bytes of output[] must be writable -// where "n" is the result of a successful call to -// Snappy_GetUncompressedLength. -extern bool Snappy_Uncompress(const char* input_data, size_t input_length, - char* output); - -// ------------------ Miscellaneous ------------------- - -// If heap profiling is not supported, returns false. -// Else repeatedly calls (*func)(arg, data, n) and then returns true. -// The concatenation of all "data[0,n-1]" fragments is the heap profile. -extern bool GetHeapProfile(void (*func)(void*, const char*, int), void* arg); - -} // namespace port -} // namespace leveldb - -#endif // STORAGE_LEVELDB_PORT_PORT_EXAMPLE_H_ diff --git a/leveldb/port/port_posix.cc b/leveldb/port/port_posix.cc deleted file mode 100644 index 5ba127a..0000000 --- a/leveldb/port/port_posix.cc +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "port/port_posix.h" - -#include -#include -#include -#include "util/logging.h" - -namespace leveldb { -namespace port { - -static void PthreadCall(const char* label, int result) { - if (result != 0) { - fprintf(stderr, "pthread %s: %s\n", label, strerror(result)); - abort(); - } -} - -Mutex::Mutex() { PthreadCall("init mutex", pthread_mutex_init(&mu_, NULL)); } - -Mutex::~Mutex() { PthreadCall("destroy mutex", pthread_mutex_destroy(&mu_)); } - -void Mutex::Lock() { PthreadCall("lock", pthread_mutex_lock(&mu_)); } - -void Mutex::Unlock() { PthreadCall("unlock", pthread_mutex_unlock(&mu_)); } - -CondVar::CondVar(Mutex* mu) - : mu_(mu) { - PthreadCall("init cv", pthread_cond_init(&cv_, NULL)); -} - -CondVar::~CondVar() { PthreadCall("destroy cv", pthread_cond_destroy(&cv_)); } - -void CondVar::Wait() { - PthreadCall("wait", pthread_cond_wait(&cv_, &mu_->mu_)); -} - -void CondVar::Signal() { - PthreadCall("signal", pthread_cond_signal(&cv_)); -} - -void CondVar::SignalAll() { - PthreadCall("broadcast", pthread_cond_broadcast(&cv_)); -} - -void InitOnce(OnceType* once, void (*initializer)()) { - PthreadCall("once", pthread_once(once, initializer)); -} - -} // namespace port -} // namespace leveldb diff --git a/leveldb/port/port_posix.h b/leveldb/port/port_posix.h deleted file mode 100644 index 654a4b9..0000000 --- a/leveldb/port/port_posix.h +++ /dev/null @@ -1,144 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// See port_example.h for documentation for the following types/functions. - -#ifndef STORAGE_LEVELDB_PORT_PORT_POSIX_H_ -#define STORAGE_LEVELDB_PORT_PORT_POSIX_H_ - -#undef PLATFORM_IS_LITTLE_ENDIAN -#if defined(OS_MACOSX) - #include - #if defined(__DARWIN_LITTLE_ENDIAN) && defined(__DARWIN_BYTE_ORDER) - #define PLATFORM_IS_LITTLE_ENDIAN \ - (__DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN) - #endif -#elif defined(OS_SOLARIS) - #include - #ifdef _LITTLE_ENDIAN - #define PLATFORM_IS_LITTLE_ENDIAN true - #else - #define PLATFORM_IS_LITTLE_ENDIAN false - #endif -#elif defined(OS_FREEBSD) || defined(OS_OPENBSD) || defined(OS_NETBSD) ||\ - defined(OS_DRAGONFLYBSD) || defined(OS_ANDROID) - #include - #include -#else - #include -#endif -#include -#ifdef SNAPPY -#include -#endif -#include -#include -#include "port/atomic_pointer.h" - -#ifndef PLATFORM_IS_LITTLE_ENDIAN -#define PLATFORM_IS_LITTLE_ENDIAN (__BYTE_ORDER == __LITTLE_ENDIAN) -#endif - -#if defined(OS_MACOSX) || defined(OS_SOLARIS) || defined(OS_FREEBSD) ||\ - defined(OS_NETBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLYBSD) ||\ - defined(OS_ANDROID) -// Use fread/fwrite/fflush on platforms without _unlocked variants -#define fread_unlocked fread -#define fwrite_unlocked fwrite -#define fflush_unlocked fflush -#endif - -#if defined(OS_MACOSX) || defined(OS_FREEBSD) ||\ - defined(OS_OPENBSD) || defined(OS_DRAGONFLYBSD) -// Use fsync() on platforms without fdatasync() -#define fdatasync fsync -#endif - -#if defined(OS_ANDROID) && __ANDROID_API__ < 9 -// fdatasync() was only introduced in API level 9 on Android. Use fsync() -// when targetting older platforms. -#define fdatasync fsync -#endif - -namespace leveldb { -namespace port { - -static const bool kLittleEndian = PLATFORM_IS_LITTLE_ENDIAN; -#undef PLATFORM_IS_LITTLE_ENDIAN - -class CondVar; - -class Mutex { - public: - Mutex(); - ~Mutex(); - - void Lock(); - void Unlock(); - void AssertHeld() { } - - private: - friend class CondVar; - pthread_mutex_t mu_; - - // No copying - Mutex(const Mutex&); - void operator=(const Mutex&); -}; - -class CondVar { - public: - explicit CondVar(Mutex* mu); - ~CondVar(); - void Wait(); - void Signal(); - void SignalAll(); - private: - pthread_cond_t cv_; - Mutex* mu_; -}; - -typedef pthread_once_t OnceType; -#define LEVELDB_ONCE_INIT PTHREAD_ONCE_INIT -extern void InitOnce(OnceType* once, void (*initializer)()); - -inline bool Snappy_Compress(const char* input, size_t length, - ::std::string* output) { -#ifdef SNAPPY - output->resize(snappy::MaxCompressedLength(length)); - size_t outlen; - snappy::RawCompress(input, length, &(*output)[0], &outlen); - output->resize(outlen); - return true; -#endif - - return false; -} - -inline bool Snappy_GetUncompressedLength(const char* input, size_t length, - size_t* result) { -#ifdef SNAPPY - return snappy::GetUncompressedLength(input, length, result); -#else - return false; -#endif -} - -inline bool Snappy_Uncompress(const char* input, size_t length, - char* output) { -#ifdef SNAPPY - return snappy::RawUncompress(input, length, output); -#else - return false; -#endif -} - -inline bool GetHeapProfile(void (*func)(void*, const char*, int), void* arg) { - return false; -} - -} // namespace port -} // namespace leveldb - -#endif // STORAGE_LEVELDB_PORT_PORT_POSIX_H_ diff --git a/leveldb/port/win/stdint.h b/leveldb/port/win/stdint.h deleted file mode 100644 index 39edd0d..0000000 --- a/leveldb/port/win/stdint.h +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -// MSVC didn't ship with this file until the 2010 version. - -#ifndef STORAGE_LEVELDB_PORT_WIN_STDINT_H_ -#define STORAGE_LEVELDB_PORT_WIN_STDINT_H_ - -#if !defined(_MSC_VER) -#error This file should only be included when compiling with MSVC. -#endif - -// Define C99 equivalent types. -typedef signed char int8_t; -typedef signed short int16_t; -typedef signed int int32_t; -typedef signed long long int64_t; -typedef unsigned char uint8_t; -typedef unsigned short uint16_t; -typedef unsigned int uint32_t; -typedef unsigned long long uint64_t; - -#endif // STORAGE_LEVELDB_PORT_WIN_STDINT_H_ diff --git a/leveldb/table/block.cc b/leveldb/table/block.cc deleted file mode 100644 index 199d453..0000000 --- a/leveldb/table/block.cc +++ /dev/null @@ -1,267 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// Decodes the blocks generated by block_builder.cc. - -#include "table/block.h" - -#include -#include -#include "leveldb/comparator.h" -#include "table/format.h" -#include "util/coding.h" -#include "util/logging.h" - -namespace leveldb { - -inline uint32_t Block::NumRestarts() const { - assert(size_ >= 2*sizeof(uint32_t)); - return DecodeFixed32(data_ + size_ - sizeof(uint32_t)); -} - -Block::Block(const BlockContents& contents) - : data_(contents.data.data()), - size_(contents.data.size()), - owned_(contents.heap_allocated) { - if (size_ < sizeof(uint32_t)) { - size_ = 0; // Error marker - } else { - restart_offset_ = size_ - (1 + NumRestarts()) * sizeof(uint32_t); - if (restart_offset_ > size_ - sizeof(uint32_t)) { - // The size is too small for NumRestarts() and therefore - // restart_offset_ wrapped around. - size_ = 0; - } - } -} - -Block::~Block() { - if (owned_) { - delete[] data_; - } -} - -// Helper routine: decode the next block entry starting at "p", -// storing the number of shared key bytes, non_shared key bytes, -// and the length of the value in "*shared", "*non_shared", and -// "*value_length", respectively. Will not derefence past "limit". -// -// If any errors are detected, returns NULL. Otherwise, returns a -// pointer to the key delta (just past the three decoded values). -static inline const char* DecodeEntry(const char* p, const char* limit, - uint32_t* shared, - uint32_t* non_shared, - uint32_t* value_length) { - if (limit - p < 3) return NULL; - *shared = reinterpret_cast(p)[0]; - *non_shared = reinterpret_cast(p)[1]; - *value_length = reinterpret_cast(p)[2]; - if ((*shared | *non_shared | *value_length) < 128) { - // Fast path: all three values are encoded in one byte each - p += 3; - } else { - if ((p = GetVarint32Ptr(p, limit, shared)) == NULL) return NULL; - if ((p = GetVarint32Ptr(p, limit, non_shared)) == NULL) return NULL; - if ((p = GetVarint32Ptr(p, limit, value_length)) == NULL) return NULL; - } - - if (static_cast(limit - p) < (*non_shared + *value_length)) { - return NULL; - } - return p; -} - -class Block::Iter : public Iterator { - private: - const Comparator* const comparator_; - const char* const data_; // underlying block contents - uint32_t const restarts_; // Offset of restart array (list of fixed32) - uint32_t const num_restarts_; // Number of uint32_t entries in restart array - - // current_ is offset in data_ of current entry. >= restarts_ if !Valid - uint32_t current_; - uint32_t restart_index_; // Index of restart block in which current_ falls - std::string key_; - Slice value_; - Status status_; - - inline int Compare(const Slice& a, const Slice& b) const { - return comparator_->Compare(a, b); - } - - // Return the offset in data_ just past the end of the current entry. - inline uint32_t NextEntryOffset() const { - return (value_.data() + value_.size()) - data_; - } - - uint32_t GetRestartPoint(uint32_t index) { - assert(index < num_restarts_); - return DecodeFixed32(data_ + restarts_ + index * sizeof(uint32_t)); - } - - void SeekToRestartPoint(uint32_t index) { - key_.clear(); - restart_index_ = index; - // current_ will be fixed by ParseNextKey(); - - // ParseNextKey() starts at the end of value_, so set value_ accordingly - uint32_t offset = GetRestartPoint(index); - value_ = Slice(data_ + offset, 0); - } - - public: - Iter(const Comparator* comparator, - const char* data, - uint32_t restarts, - uint32_t num_restarts) - : comparator_(comparator), - data_(data), - restarts_(restarts), - num_restarts_(num_restarts), - current_(restarts_), - restart_index_(num_restarts_) { - assert(num_restarts_ > 0); - } - - virtual bool Valid() const { return current_ < restarts_; } - virtual Status status() const { return status_; } - virtual Slice key() const { - assert(Valid()); - return key_; - } - virtual Slice value() const { - assert(Valid()); - return value_; - } - - virtual void Next() { - assert(Valid()); - ParseNextKey(); - } - - virtual void Prev() { - assert(Valid()); - - // Scan backwards to a restart point before current_ - const uint32_t original = current_; - while (GetRestartPoint(restart_index_) >= original) { - if (restart_index_ == 0) { - // No more entries - current_ = restarts_; - restart_index_ = num_restarts_; - return; - } - restart_index_--; - } - - SeekToRestartPoint(restart_index_); - do { - // Loop until end of current entry hits the start of original entry - } while (ParseNextKey() && NextEntryOffset() < original); - } - - virtual void Seek(const Slice& target) { - // Binary search in restart array to find the first restart point - // with a key >= target - uint32_t left = 0; - uint32_t right = num_restarts_ - 1; - while (left < right) { - uint32_t mid = (left + right + 1) / 2; - uint32_t region_offset = GetRestartPoint(mid); - uint32_t shared, non_shared, value_length; - const char* key_ptr = DecodeEntry(data_ + region_offset, - data_ + restarts_, - &shared, &non_shared, &value_length); - if (key_ptr == NULL || (shared != 0)) { - CorruptionError(); - return; - } - Slice mid_key(key_ptr, non_shared); - if (Compare(mid_key, target) < 0) { - // Key at "mid" is smaller than "target". Therefore all - // blocks before "mid" are uninteresting. - left = mid; - } else { - // Key at "mid" is >= "target". Therefore all blocks at or - // after "mid" are uninteresting. - right = mid - 1; - } - } - - // Linear search (within restart block) for first key >= target - SeekToRestartPoint(left); - while (true) { - if (!ParseNextKey()) { - return; - } - if (Compare(key_, target) >= 0) { - return; - } - } - } - - virtual void SeekToFirst() { - SeekToRestartPoint(0); - ParseNextKey(); - } - - virtual void SeekToLast() { - SeekToRestartPoint(num_restarts_ - 1); - while (ParseNextKey() && NextEntryOffset() < restarts_) { - // Keep skipping - } - } - - private: - void CorruptionError() { - current_ = restarts_; - restart_index_ = num_restarts_; - status_ = Status::Corruption("bad entry in block"); - key_.clear(); - value_.clear(); - } - - bool ParseNextKey() { - current_ = NextEntryOffset(); - const char* p = data_ + current_; - const char* limit = data_ + restarts_; // Restarts come right after data - if (p >= limit) { - // No more entries to return. Mark as invalid. - current_ = restarts_; - restart_index_ = num_restarts_; - return false; - } - - // Decode next entry - uint32_t shared, non_shared, value_length; - p = DecodeEntry(p, limit, &shared, &non_shared, &value_length); - if (p == NULL || key_.size() < shared) { - CorruptionError(); - return false; - } else { - key_.resize(shared); - key_.append(p, non_shared); - value_ = Slice(p + non_shared, value_length); - while (restart_index_ + 1 < num_restarts_ && - GetRestartPoint(restart_index_ + 1) < current_) { - ++restart_index_; - } - return true; - } - } -}; - -Iterator* Block::NewIterator(const Comparator* cmp) { - if (size_ < 2*sizeof(uint32_t)) { - return NewErrorIterator(Status::Corruption("bad block contents")); - } - const uint32_t num_restarts = NumRestarts(); - if (num_restarts == 0) { - return NewEmptyIterator(); - } else { - return new Iter(cmp, data_, restart_offset_, num_restarts); - } -} - -} // namespace leveldb diff --git a/leveldb/table/block.h b/leveldb/table/block.h deleted file mode 100644 index 2493eb9..0000000 --- a/leveldb/table/block.h +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_TABLE_BLOCK_H_ -#define STORAGE_LEVELDB_TABLE_BLOCK_H_ - -#include -#include -#include "leveldb/iterator.h" - -namespace leveldb { - -struct BlockContents; -class Comparator; - -class Block { - public: - // Initialize the block with the specified contents. - explicit Block(const BlockContents& contents); - - ~Block(); - - size_t size() const { return size_; } - Iterator* NewIterator(const Comparator* comparator); - - private: - uint32_t NumRestarts() const; - - const char* data_; - size_t size_; - uint32_t restart_offset_; // Offset in data_ of restart array - bool owned_; // Block owns data_[] - - // No copying allowed - Block(const Block&); - void operator=(const Block&); - - class Iter; -}; - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_TABLE_BLOCK_H_ diff --git a/leveldb/table/block_builder.cc b/leveldb/table/block_builder.cc deleted file mode 100644 index db660cd..0000000 --- a/leveldb/table/block_builder.cc +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// BlockBuilder generates blocks where keys are prefix-compressed: -// -// When we store a key, we drop the prefix shared with the previous -// string. This helps reduce the space requirement significantly. -// Furthermore, once every K keys, we do not apply the prefix -// compression and store the entire key. We call this a "restart -// point". The tail end of the block stores the offsets of all of the -// restart points, and can be used to do a binary search when looking -// for a particular key. Values are stored as-is (without compression) -// immediately following the corresponding key. -// -// An entry for a particular key-value pair has the form: -// shared_bytes: varint32 -// unshared_bytes: varint32 -// value_length: varint32 -// key_delta: char[unshared_bytes] -// value: char[value_length] -// shared_bytes == 0 for restart points. -// -// The trailer of the block has the form: -// restarts: uint32[num_restarts] -// num_restarts: uint32 -// restarts[i] contains the offset within the block of the ith restart point. - -#include "table/block_builder.h" - -#include -#include -#include "leveldb/comparator.h" -#include "leveldb/table_builder.h" -#include "util/coding.h" - -namespace leveldb { - -BlockBuilder::BlockBuilder(const Options* options) - : options_(options), - restarts_(), - counter_(0), - finished_(false) { - assert(options->block_restart_interval >= 1); - restarts_.push_back(0); // First restart point is at offset 0 -} - -void BlockBuilder::Reset() { - buffer_.clear(); - restarts_.clear(); - restarts_.push_back(0); // First restart point is at offset 0 - counter_ = 0; - finished_ = false; - last_key_.clear(); -} - -size_t BlockBuilder::CurrentSizeEstimate() const { - return (buffer_.size() + // Raw data buffer - restarts_.size() * sizeof(uint32_t) + // Restart array - sizeof(uint32_t)); // Restart array length -} - -Slice BlockBuilder::Finish() { - // Append restart array - for (size_t i = 0; i < restarts_.size(); i++) { - PutFixed32(&buffer_, restarts_[i]); - } - PutFixed32(&buffer_, restarts_.size()); - finished_ = true; - return Slice(buffer_); -} - -void BlockBuilder::Add(const Slice& key, const Slice& value) { - Slice last_key_piece(last_key_); - assert(!finished_); - assert(counter_ <= options_->block_restart_interval); - assert(buffer_.empty() // No values yet? - || options_->comparator->Compare(key, last_key_piece) > 0); - size_t shared = 0; - if (counter_ < options_->block_restart_interval) { - // See how much sharing to do with previous string - const size_t min_length = std::min(last_key_piece.size(), key.size()); - while ((shared < min_length) && (last_key_piece[shared] == key[shared])) { - shared++; - } - } else { - // Restart compression - restarts_.push_back(buffer_.size()); - counter_ = 0; - } - const size_t non_shared = key.size() - shared; - - // Add "" to buffer_ - PutVarint32(&buffer_, shared); - PutVarint32(&buffer_, non_shared); - PutVarint32(&buffer_, value.size()); - - // Add string delta to buffer_ followed by value - buffer_.append(key.data() + shared, non_shared); - buffer_.append(value.data(), value.size()); - - // Update state - last_key_.resize(shared); - last_key_.append(key.data() + shared, non_shared); - assert(Slice(last_key_) == key); - counter_++; -} - -} // namespace leveldb diff --git a/leveldb/table/block_builder.h b/leveldb/table/block_builder.h deleted file mode 100644 index 5b545bd..0000000 --- a/leveldb/table/block_builder.h +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_TABLE_BLOCK_BUILDER_H_ -#define STORAGE_LEVELDB_TABLE_BLOCK_BUILDER_H_ - -#include - -#include -#include "leveldb/slice.h" - -namespace leveldb { - -struct Options; - -class BlockBuilder { - public: - explicit BlockBuilder(const Options* options); - - // Reset the contents as if the BlockBuilder was just constructed. - void Reset(); - - // REQUIRES: Finish() has not been callled since the last call to Reset(). - // REQUIRES: key is larger than any previously added key - void Add(const Slice& key, const Slice& value); - - // Finish building the block and return a slice that refers to the - // block contents. The returned slice will remain valid for the - // lifetime of this builder or until Reset() is called. - Slice Finish(); - - // Returns an estimate of the current (uncompressed) size of the block - // we are building. - size_t CurrentSizeEstimate() const; - - // Return true iff no entries have been added since the last Reset() - bool empty() const { - return buffer_.empty(); - } - - private: - const Options* options_; - std::string buffer_; // Destination buffer - std::vector restarts_; // Restart points - int counter_; // Number of entries emitted since restart - bool finished_; // Has Finish() been called? - std::string last_key_; - - // No copying allowed - BlockBuilder(const BlockBuilder&); - void operator=(const BlockBuilder&); -}; - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_TABLE_BLOCK_BUILDER_H_ diff --git a/leveldb/table/filter_block.cc b/leveldb/table/filter_block.cc deleted file mode 100644 index 203e15c..0000000 --- a/leveldb/table/filter_block.cc +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright (c) 2012 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "table/filter_block.h" - -#include "leveldb/filter_policy.h" -#include "util/coding.h" - -namespace leveldb { - -// See doc/table_format.txt for an explanation of the filter block format. - -// Generate new filter every 2KB of data -static const size_t kFilterBaseLg = 11; -static const size_t kFilterBase = 1 << kFilterBaseLg; - -FilterBlockBuilder::FilterBlockBuilder(const FilterPolicy* policy) - : policy_(policy) { -} - -void FilterBlockBuilder::StartBlock(uint64_t block_offset) { - uint64_t filter_index = (block_offset / kFilterBase); - assert(filter_index >= filter_offsets_.size()); - while (filter_index > filter_offsets_.size()) { - GenerateFilter(); - } -} - -void FilterBlockBuilder::AddKey(const Slice& key) { - Slice k = key; - start_.push_back(keys_.size()); - keys_.append(k.data(), k.size()); -} - -Slice FilterBlockBuilder::Finish() { - if (!start_.empty()) { - GenerateFilter(); - } - - // Append array of per-filter offsets - const uint32_t array_offset = result_.size(); - for (size_t i = 0; i < filter_offsets_.size(); i++) { - PutFixed32(&result_, filter_offsets_[i]); - } - - PutFixed32(&result_, array_offset); - result_.push_back(kFilterBaseLg); // Save encoding parameter in result - return Slice(result_); -} - -void FilterBlockBuilder::GenerateFilter() { - const size_t num_keys = start_.size(); - if (num_keys == 0) { - // Fast path if there are no keys for this filter - filter_offsets_.push_back(result_.size()); - return; - } - - // Make list of keys from flattened key structure - start_.push_back(keys_.size()); // Simplify length computation - tmp_keys_.resize(num_keys); - for (size_t i = 0; i < num_keys; i++) { - const char* base = keys_.data() + start_[i]; - size_t length = start_[i+1] - start_[i]; - tmp_keys_[i] = Slice(base, length); - } - - // Generate filter for current set of keys and append to result_. - filter_offsets_.push_back(result_.size()); - policy_->CreateFilter(&tmp_keys_[0], num_keys, &result_); - - tmp_keys_.clear(); - keys_.clear(); - start_.clear(); -} - -FilterBlockReader::FilterBlockReader(const FilterPolicy* policy, - const Slice& contents) - : policy_(policy), - data_(NULL), - offset_(NULL), - num_(0), - base_lg_(0) { - size_t n = contents.size(); - if (n < 5) return; // 1 byte for base_lg_ and 4 for start of offset array - base_lg_ = contents[n-1]; - uint32_t last_word = DecodeFixed32(contents.data() + n - 5); - if (last_word > n - 5) return; - data_ = contents.data(); - offset_ = data_ + last_word; - num_ = (n - 5 - last_word) / 4; -} - -bool FilterBlockReader::KeyMayMatch(uint64_t block_offset, const Slice& key) { - uint64_t index = block_offset >> base_lg_; - if (index < num_) { - uint32_t start = DecodeFixed32(offset_ + index*4); - uint32_t limit = DecodeFixed32(offset_ + index*4 + 4); - if (start <= limit && limit <= (offset_ - data_)) { - Slice filter = Slice(data_ + start, limit - start); - return policy_->KeyMayMatch(key, filter); - } else if (start == limit) { - // Empty filters do not match any keys - return false; - } - } - return true; // Errors are treated as potential matches -} - -} diff --git a/leveldb/table/filter_block.h b/leveldb/table/filter_block.h deleted file mode 100644 index c67d010..0000000 --- a/leveldb/table/filter_block.h +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (c) 2012 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// A filter block is stored near the end of a Table file. It contains -// filters (e.g., bloom filters) for all data blocks in the table combined -// into a single filter block. - -#ifndef STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_ -#define STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_ - -#include -#include -#include -#include -#include "leveldb/slice.h" -#include "util/hash.h" - -namespace leveldb { - -class FilterPolicy; - -// A FilterBlockBuilder is used to construct all of the filters for a -// particular Table. It generates a single string which is stored as -// a special block in the Table. -// -// The sequence of calls to FilterBlockBuilder must match the regexp: -// (StartBlock AddKey*)* Finish -class FilterBlockBuilder { - public: - explicit FilterBlockBuilder(const FilterPolicy*); - - void StartBlock(uint64_t block_offset); - void AddKey(const Slice& key); - Slice Finish(); - - private: - void GenerateFilter(); - - const FilterPolicy* policy_; - std::string keys_; // Flattened key contents - std::vector start_; // Starting index in keys_ of each key - std::string result_; // Filter data computed so far - std::vector tmp_keys_; // policy_->CreateFilter() argument - std::vector filter_offsets_; - - // No copying allowed - FilterBlockBuilder(const FilterBlockBuilder&); - void operator=(const FilterBlockBuilder&); -}; - -class FilterBlockReader { - public: - // REQUIRES: "contents" and *policy must stay live while *this is live. - FilterBlockReader(const FilterPolicy* policy, const Slice& contents); - bool KeyMayMatch(uint64_t block_offset, const Slice& key); - - private: - const FilterPolicy* policy_; - const char* data_; // Pointer to filter data (at block-start) - const char* offset_; // Pointer to beginning of offset array (at block-end) - size_t num_; // Number of entries in offset array - size_t base_lg_; // Encoding parameter (see kFilterBaseLg in .cc file) -}; - -} - -#endif // STORAGE_LEVELDB_TABLE_FILTER_BLOCK_H_ diff --git a/leveldb/table/filter_block_test.cc b/leveldb/table/filter_block_test.cc deleted file mode 100644 index 3a2a07c..0000000 --- a/leveldb/table/filter_block_test.cc +++ /dev/null @@ -1,128 +0,0 @@ -// Copyright (c) 2012 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "table/filter_block.h" - -#include "leveldb/filter_policy.h" -#include "util/coding.h" -#include "util/hash.h" -#include "util/logging.h" -#include "util/testharness.h" -#include "util/testutil.h" - -namespace leveldb { - -// For testing: emit an array with one hash value per key -class TestHashFilter : public FilterPolicy { - public: - virtual const char* Name() const { - return "TestHashFilter"; - } - - virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const { - for (int i = 0; i < n; i++) { - uint32_t h = Hash(keys[i].data(), keys[i].size(), 1); - PutFixed32(dst, h); - } - } - - virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const { - uint32_t h = Hash(key.data(), key.size(), 1); - for (int i = 0; i + 4 <= filter.size(); i += 4) { - if (h == DecodeFixed32(filter.data() + i)) { - return true; - } - } - return false; - } -}; - -class FilterBlockTest { - public: - TestHashFilter policy_; -}; - -TEST(FilterBlockTest, EmptyBuilder) { - FilterBlockBuilder builder(&policy_); - Slice block = builder.Finish(); - ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(block)); - FilterBlockReader reader(&policy_, block); - ASSERT_TRUE(reader.KeyMayMatch(0, "foo")); - ASSERT_TRUE(reader.KeyMayMatch(100000, "foo")); -} - -TEST(FilterBlockTest, SingleChunk) { - FilterBlockBuilder builder(&policy_); - builder.StartBlock(100); - builder.AddKey("foo"); - builder.AddKey("bar"); - builder.AddKey("box"); - builder.StartBlock(200); - builder.AddKey("box"); - builder.StartBlock(300); - builder.AddKey("hello"); - Slice block = builder.Finish(); - FilterBlockReader reader(&policy_, block); - ASSERT_TRUE(reader.KeyMayMatch(100, "foo")); - ASSERT_TRUE(reader.KeyMayMatch(100, "bar")); - ASSERT_TRUE(reader.KeyMayMatch(100, "box")); - ASSERT_TRUE(reader.KeyMayMatch(100, "hello")); - ASSERT_TRUE(reader.KeyMayMatch(100, "foo")); - ASSERT_TRUE(! reader.KeyMayMatch(100, "missing")); - ASSERT_TRUE(! reader.KeyMayMatch(100, "other")); -} - -TEST(FilterBlockTest, MultiChunk) { - FilterBlockBuilder builder(&policy_); - - // First filter - builder.StartBlock(0); - builder.AddKey("foo"); - builder.StartBlock(2000); - builder.AddKey("bar"); - - // Second filter - builder.StartBlock(3100); - builder.AddKey("box"); - - // Third filter is empty - - // Last filter - builder.StartBlock(9000); - builder.AddKey("box"); - builder.AddKey("hello"); - - Slice block = builder.Finish(); - FilterBlockReader reader(&policy_, block); - - // Check first filter - ASSERT_TRUE(reader.KeyMayMatch(0, "foo")); - ASSERT_TRUE(reader.KeyMayMatch(2000, "bar")); - ASSERT_TRUE(! reader.KeyMayMatch(0, "box")); - ASSERT_TRUE(! reader.KeyMayMatch(0, "hello")); - - // Check second filter - ASSERT_TRUE(reader.KeyMayMatch(3100, "box")); - ASSERT_TRUE(! reader.KeyMayMatch(3100, "foo")); - ASSERT_TRUE(! reader.KeyMayMatch(3100, "bar")); - ASSERT_TRUE(! reader.KeyMayMatch(3100, "hello")); - - // Check third filter (empty) - ASSERT_TRUE(! reader.KeyMayMatch(4100, "foo")); - ASSERT_TRUE(! reader.KeyMayMatch(4100, "bar")); - ASSERT_TRUE(! reader.KeyMayMatch(4100, "box")); - ASSERT_TRUE(! reader.KeyMayMatch(4100, "hello")); - - // Check last filter - ASSERT_TRUE(reader.KeyMayMatch(9000, "box")); - ASSERT_TRUE(reader.KeyMayMatch(9000, "hello")); - ASSERT_TRUE(! reader.KeyMayMatch(9000, "foo")); - ASSERT_TRUE(! reader.KeyMayMatch(9000, "bar")); -} - -} // namespace leveldb - -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} diff --git a/leveldb/table/format.cc b/leveldb/table/format.cc deleted file mode 100644 index cda1dec..0000000 --- a/leveldb/table/format.cc +++ /dev/null @@ -1,145 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "table/format.h" - -#include "leveldb/env.h" -#include "port/port.h" -#include "table/block.h" -#include "util/coding.h" -#include "util/crc32c.h" - -namespace leveldb { - -void BlockHandle::EncodeTo(std::string* dst) const { - // Sanity check that all fields have been set - assert(offset_ != ~static_cast(0)); - assert(size_ != ~static_cast(0)); - PutVarint64(dst, offset_); - PutVarint64(dst, size_); -} - -Status BlockHandle::DecodeFrom(Slice* input) { - if (GetVarint64(input, &offset_) && - GetVarint64(input, &size_)) { - return Status::OK(); - } else { - return Status::Corruption("bad block handle"); - } -} - -void Footer::EncodeTo(std::string* dst) const { -#ifndef NDEBUG - const size_t original_size = dst->size(); -#endif - metaindex_handle_.EncodeTo(dst); - index_handle_.EncodeTo(dst); - dst->resize(2 * BlockHandle::kMaxEncodedLength); // Padding - PutFixed32(dst, static_cast(kTableMagicNumber & 0xffffffffu)); - PutFixed32(dst, static_cast(kTableMagicNumber >> 32)); - assert(dst->size() == original_size + kEncodedLength); -} - -Status Footer::DecodeFrom(Slice* input) { - const char* magic_ptr = input->data() + kEncodedLength - 8; - const uint32_t magic_lo = DecodeFixed32(magic_ptr); - const uint32_t magic_hi = DecodeFixed32(magic_ptr + 4); - const uint64_t magic = ((static_cast(magic_hi) << 32) | - (static_cast(magic_lo))); - if (magic != kTableMagicNumber) { - return Status::InvalidArgument("not an sstable (bad magic number)"); - } - - Status result = metaindex_handle_.DecodeFrom(input); - if (result.ok()) { - result = index_handle_.DecodeFrom(input); - } - if (result.ok()) { - // We skip over any leftover data (just padding for now) in "input" - const char* end = magic_ptr + 8; - *input = Slice(end, input->data() + input->size() - end); - } - return result; -} - -Status ReadBlock(RandomAccessFile* file, - const ReadOptions& options, - const BlockHandle& handle, - BlockContents* result) { - result->data = Slice(); - result->cachable = false; - result->heap_allocated = false; - - // Read the block contents as well as the type/crc footer. - // See table_builder.cc for the code that built this structure. - size_t n = static_cast(handle.size()); - char* buf = new char[n + kBlockTrailerSize]; - Slice contents; - Status s = file->Read(handle.offset(), n + kBlockTrailerSize, &contents, buf); - if (!s.ok()) { - delete[] buf; - return s; - } - if (contents.size() != n + kBlockTrailerSize) { - delete[] buf; - return Status::Corruption("truncated block read"); - } - - // Check the crc of the type and the block contents - const char* data = contents.data(); // Pointer to where Read put the data - if (options.verify_checksums) { - const uint32_t crc = crc32c::Unmask(DecodeFixed32(data + n + 1)); - const uint32_t actual = crc32c::Value(data, n + 1); - if (actual != crc) { - delete[] buf; - s = Status::Corruption("block checksum mismatch"); - return s; - } - } - - switch (data[n]) { - case kNoCompression: - if (data != buf) { - // File implementation gave us pointer to some other data. - // Use it directly under the assumption that it will be live - // while the file is open. - delete[] buf; - result->data = Slice(data, n); - result->heap_allocated = false; - result->cachable = false; // Do not double-cache - } else { - result->data = Slice(buf, n); - result->heap_allocated = true; - result->cachable = true; - } - - // Ok - break; - case kSnappyCompression: { - size_t ulength = 0; - if (!port::Snappy_GetUncompressedLength(data, n, &ulength)) { - delete[] buf; - return Status::Corruption("corrupted compressed block contents"); - } - char* ubuf = new char[ulength]; - if (!port::Snappy_Uncompress(data, n, ubuf)) { - delete[] buf; - delete[] ubuf; - return Status::Corruption("corrupted compressed block contents"); - } - delete[] buf; - result->data = Slice(ubuf, ulength); - result->heap_allocated = true; - result->cachable = true; - break; - } - default: - delete[] buf; - return Status::Corruption("bad block type"); - } - - return Status::OK(); -} - -} // namespace leveldb diff --git a/leveldb/table/format.h b/leveldb/table/format.h deleted file mode 100644 index 6c0b80c..0000000 --- a/leveldb/table/format.h +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_TABLE_FORMAT_H_ -#define STORAGE_LEVELDB_TABLE_FORMAT_H_ - -#include -#include -#include "leveldb/slice.h" -#include "leveldb/status.h" -#include "leveldb/table_builder.h" - -namespace leveldb { - -class Block; -class RandomAccessFile; -struct ReadOptions; - -// BlockHandle is a pointer to the extent of a file that stores a data -// block or a meta block. -class BlockHandle { - public: - BlockHandle(); - - // The offset of the block in the file. - uint64_t offset() const { return offset_; } - void set_offset(uint64_t offset) { offset_ = offset; } - - // The size of the stored block - uint64_t size() const { return size_; } - void set_size(uint64_t size) { size_ = size; } - - void EncodeTo(std::string* dst) const; - Status DecodeFrom(Slice* input); - - // Maximum encoding length of a BlockHandle - enum { kMaxEncodedLength = 10 + 10 }; - - private: - uint64_t offset_; - uint64_t size_; -}; - -// Footer encapsulates the fixed information stored at the tail -// end of every table file. -class Footer { - public: - Footer() { } - - // The block handle for the metaindex block of the table - const BlockHandle& metaindex_handle() const { return metaindex_handle_; } - void set_metaindex_handle(const BlockHandle& h) { metaindex_handle_ = h; } - - // The block handle for the index block of the table - const BlockHandle& index_handle() const { - return index_handle_; - } - void set_index_handle(const BlockHandle& h) { - index_handle_ = h; - } - - void EncodeTo(std::string* dst) const; - Status DecodeFrom(Slice* input); - - // Encoded length of a Footer. Note that the serialization of a - // Footer will always occupy exactly this many bytes. It consists - // of two block handles and a magic number. - enum { - kEncodedLength = 2*BlockHandle::kMaxEncodedLength + 8 - }; - - private: - BlockHandle metaindex_handle_; - BlockHandle index_handle_; -}; - -// kTableMagicNumber was picked by running -// echo http://code.google.com/p/leveldb/ | sha1sum -// and taking the leading 64 bits. -static const uint64_t kTableMagicNumber = 0xdb4775248b80fb57ull; - -// 1-byte type + 32-bit crc -static const size_t kBlockTrailerSize = 5; - -struct BlockContents { - Slice data; // Actual contents of data - bool cachable; // True iff data can be cached - bool heap_allocated; // True iff caller should delete[] data.data() -}; - -// Read the block identified by "handle" from "file". On failure -// return non-OK. On success fill *result and return OK. -extern Status ReadBlock(RandomAccessFile* file, - const ReadOptions& options, - const BlockHandle& handle, - BlockContents* result); - -// Implementation details follow. Clients should ignore, - -inline BlockHandle::BlockHandle() - : offset_(~static_cast(0)), - size_(~static_cast(0)) { -} - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_TABLE_FORMAT_H_ diff --git a/leveldb/table/iterator.cc b/leveldb/table/iterator.cc deleted file mode 100644 index 3d1c87f..0000000 --- a/leveldb/table/iterator.cc +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "leveldb/iterator.h" - -namespace leveldb { - -Iterator::Iterator() { - cleanup_.function = NULL; - cleanup_.next = NULL; -} - -Iterator::~Iterator() { - if (cleanup_.function != NULL) { - (*cleanup_.function)(cleanup_.arg1, cleanup_.arg2); - for (Cleanup* c = cleanup_.next; c != NULL; ) { - (*c->function)(c->arg1, c->arg2); - Cleanup* next = c->next; - delete c; - c = next; - } - } -} - -void Iterator::RegisterCleanup(CleanupFunction func, void* arg1, void* arg2) { - assert(func != NULL); - Cleanup* c; - if (cleanup_.function == NULL) { - c = &cleanup_; - } else { - c = new Cleanup; - c->next = cleanup_.next; - cleanup_.next = c; - } - c->function = func; - c->arg1 = arg1; - c->arg2 = arg2; -} - -namespace { -class EmptyIterator : public Iterator { - public: - EmptyIterator(const Status& s) : status_(s) { } - virtual bool Valid() const { return false; } - virtual void Seek(const Slice& target) { } - virtual void SeekToFirst() { } - virtual void SeekToLast() { } - virtual void Next() { assert(false); } - virtual void Prev() { assert(false); } - Slice key() const { assert(false); return Slice(); } - Slice value() const { assert(false); return Slice(); } - virtual Status status() const { return status_; } - private: - Status status_; -}; -} // namespace - -Iterator* NewEmptyIterator() { - return new EmptyIterator(Status::OK()); -} - -Iterator* NewErrorIterator(const Status& status) { - return new EmptyIterator(status); -} - -} // namespace leveldb diff --git a/leveldb/table/iterator_wrapper.h b/leveldb/table/iterator_wrapper.h deleted file mode 100644 index 9e16b3d..0000000 --- a/leveldb/table/iterator_wrapper.h +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_TABLE_ITERATOR_WRAPPER_H_ -#define STORAGE_LEVELDB_TABLE_ITERATOR_WRAPPER_H_ - -namespace leveldb { - -// A internal wrapper class with an interface similar to Iterator that -// caches the valid() and key() results for an underlying iterator. -// This can help avoid virtual function calls and also gives better -// cache locality. -class IteratorWrapper { - public: - IteratorWrapper(): iter_(NULL), valid_(false) { } - explicit IteratorWrapper(Iterator* iter): iter_(NULL) { - Set(iter); - } - ~IteratorWrapper() { delete iter_; } - Iterator* iter() const { return iter_; } - - // Takes ownership of "iter" and will delete it when destroyed, or - // when Set() is invoked again. - void Set(Iterator* iter) { - delete iter_; - iter_ = iter; - if (iter_ == NULL) { - valid_ = false; - } else { - Update(); - } - } - - - // Iterator interface methods - bool Valid() const { return valid_; } - Slice key() const { assert(Valid()); return key_; } - Slice value() const { assert(Valid()); return iter_->value(); } - // Methods below require iter() != NULL - Status status() const { assert(iter_); return iter_->status(); } - void Next() { assert(iter_); iter_->Next(); Update(); } - void Prev() { assert(iter_); iter_->Prev(); Update(); } - void Seek(const Slice& k) { assert(iter_); iter_->Seek(k); Update(); } - void SeekToFirst() { assert(iter_); iter_->SeekToFirst(); Update(); } - void SeekToLast() { assert(iter_); iter_->SeekToLast(); Update(); } - - private: - void Update() { - valid_ = iter_->Valid(); - if (valid_) { - key_ = iter_->key(); - } - } - - Iterator* iter_; - bool valid_; - Slice key_; -}; - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_TABLE_ITERATOR_WRAPPER_H_ diff --git a/leveldb/table/merger.cc b/leveldb/table/merger.cc deleted file mode 100644 index 2dde4dc..0000000 --- a/leveldb/table/merger.cc +++ /dev/null @@ -1,197 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "table/merger.h" - -#include "leveldb/comparator.h" -#include "leveldb/iterator.h" -#include "table/iterator_wrapper.h" - -namespace leveldb { - -namespace { -class MergingIterator : public Iterator { - public: - MergingIterator(const Comparator* comparator, Iterator** children, int n) - : comparator_(comparator), - children_(new IteratorWrapper[n]), - n_(n), - current_(NULL), - direction_(kForward) { - for (int i = 0; i < n; i++) { - children_[i].Set(children[i]); - } - } - - virtual ~MergingIterator() { - delete[] children_; - } - - virtual bool Valid() const { - return (current_ != NULL); - } - - virtual void SeekToFirst() { - for (int i = 0; i < n_; i++) { - children_[i].SeekToFirst(); - } - FindSmallest(); - direction_ = kForward; - } - - virtual void SeekToLast() { - for (int i = 0; i < n_; i++) { - children_[i].SeekToLast(); - } - FindLargest(); - direction_ = kReverse; - } - - virtual void Seek(const Slice& target) { - for (int i = 0; i < n_; i++) { - children_[i].Seek(target); - } - FindSmallest(); - direction_ = kForward; - } - - virtual void Next() { - assert(Valid()); - - // Ensure that all children are positioned after key(). - // If we are moving in the forward direction, it is already - // true for all of the non-current_ children since current_ is - // the smallest child and key() == current_->key(). Otherwise, - // we explicitly position the non-current_ children. - if (direction_ != kForward) { - for (int i = 0; i < n_; i++) { - IteratorWrapper* child = &children_[i]; - if (child != current_) { - child->Seek(key()); - if (child->Valid() && - comparator_->Compare(key(), child->key()) == 0) { - child->Next(); - } - } - } - direction_ = kForward; - } - - current_->Next(); - FindSmallest(); - } - - virtual void Prev() { - assert(Valid()); - - // Ensure that all children are positioned before key(). - // If we are moving in the reverse direction, it is already - // true for all of the non-current_ children since current_ is - // the largest child and key() == current_->key(). Otherwise, - // we explicitly position the non-current_ children. - if (direction_ != kReverse) { - for (int i = 0; i < n_; i++) { - IteratorWrapper* child = &children_[i]; - if (child != current_) { - child->Seek(key()); - if (child->Valid()) { - // Child is at first entry >= key(). Step back one to be < key() - child->Prev(); - } else { - // Child has no entries >= key(). Position at last entry. - child->SeekToLast(); - } - } - } - direction_ = kReverse; - } - - current_->Prev(); - FindLargest(); - } - - virtual Slice key() const { - assert(Valid()); - return current_->key(); - } - - virtual Slice value() const { - assert(Valid()); - return current_->value(); - } - - virtual Status status() const { - Status status; - for (int i = 0; i < n_; i++) { - status = children_[i].status(); - if (!status.ok()) { - break; - } - } - return status; - } - - private: - void FindSmallest(); - void FindLargest(); - - // We might want to use a heap in case there are lots of children. - // For now we use a simple array since we expect a very small number - // of children in leveldb. - const Comparator* comparator_; - IteratorWrapper* children_; - int n_; - IteratorWrapper* current_; - - // Which direction is the iterator moving? - enum Direction { - kForward, - kReverse - }; - Direction direction_; -}; - -void MergingIterator::FindSmallest() { - IteratorWrapper* smallest = NULL; - for (int i = 0; i < n_; i++) { - IteratorWrapper* child = &children_[i]; - if (child->Valid()) { - if (smallest == NULL) { - smallest = child; - } else if (comparator_->Compare(child->key(), smallest->key()) < 0) { - smallest = child; - } - } - } - current_ = smallest; -} - -void MergingIterator::FindLargest() { - IteratorWrapper* largest = NULL; - for (int i = n_-1; i >= 0; i--) { - IteratorWrapper* child = &children_[i]; - if (child->Valid()) { - if (largest == NULL) { - largest = child; - } else if (comparator_->Compare(child->key(), largest->key()) > 0) { - largest = child; - } - } - } - current_ = largest; -} -} // namespace - -Iterator* NewMergingIterator(const Comparator* cmp, Iterator** list, int n) { - assert(n >= 0); - if (n == 0) { - return NewEmptyIterator(); - } else if (n == 1) { - return list[0]; - } else { - return new MergingIterator(cmp, list, n); - } -} - -} // namespace leveldb diff --git a/leveldb/table/merger.h b/leveldb/table/merger.h deleted file mode 100644 index 91ddd80..0000000 --- a/leveldb/table/merger.h +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_TABLE_MERGER_H_ -#define STORAGE_LEVELDB_TABLE_MERGER_H_ - -namespace leveldb { - -class Comparator; -class Iterator; - -// Return an iterator that provided the union of the data in -// children[0,n-1]. Takes ownership of the child iterators and -// will delete them when the result iterator is deleted. -// -// The result does no duplicate suppression. I.e., if a particular -// key is present in K child iterators, it will be yielded K times. -// -// REQUIRES: n >= 0 -extern Iterator* NewMergingIterator( - const Comparator* comparator, Iterator** children, int n); - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_TABLE_MERGER_H_ diff --git a/leveldb/table/table.cc b/leveldb/table/table.cc deleted file mode 100644 index dbd6d3a..0000000 --- a/leveldb/table/table.cc +++ /dev/null @@ -1,276 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "leveldb/table.h" - -#include "leveldb/cache.h" -#include "leveldb/comparator.h" -#include "leveldb/env.h" -#include "leveldb/filter_policy.h" -#include "leveldb/options.h" -#include "table/block.h" -#include "table/filter_block.h" -#include "table/format.h" -#include "table/two_level_iterator.h" -#include "util/coding.h" - -namespace leveldb { - -struct Table::Rep { - ~Rep() { - delete filter; - delete [] filter_data; - delete index_block; - } - - Options options; - Status status; - RandomAccessFile* file; - uint64_t cache_id; - FilterBlockReader* filter; - const char* filter_data; - - BlockHandle metaindex_handle; // Handle to metaindex_block: saved from footer - Block* index_block; -}; - -Status Table::Open(const Options& options, - RandomAccessFile* file, - uint64_t size, - Table** table) { - *table = NULL; - if (size < Footer::kEncodedLength) { - return Status::InvalidArgument("file is too short to be an sstable"); - } - - char footer_space[Footer::kEncodedLength]; - Slice footer_input; - Status s = file->Read(size - Footer::kEncodedLength, Footer::kEncodedLength, - &footer_input, footer_space); - if (!s.ok()) return s; - - Footer footer; - s = footer.DecodeFrom(&footer_input); - if (!s.ok()) return s; - - // Read the index block - BlockContents contents; - Block* index_block = NULL; - if (s.ok()) { - s = ReadBlock(file, ReadOptions(), footer.index_handle(), &contents); - if (s.ok()) { - index_block = new Block(contents); - } - } - - if (s.ok()) { - // We've successfully read the footer and the index block: we're - // ready to serve requests. - Rep* rep = new Table::Rep; - rep->options = options; - rep->file = file; - rep->metaindex_handle = footer.metaindex_handle(); - rep->index_block = index_block; - rep->cache_id = (options.block_cache ? options.block_cache->NewId() : 0); - rep->filter_data = NULL; - rep->filter = NULL; - *table = new Table(rep); - (*table)->ReadMeta(footer); - } else { - if (index_block) delete index_block; - } - - return s; -} - -void Table::ReadMeta(const Footer& footer) { - if (rep_->options.filter_policy == NULL) { - return; // Do not need any metadata - } - - // TODO(sanjay): Skip this if footer.metaindex_handle() size indicates - // it is an empty block. - ReadOptions opt; - BlockContents contents; - if (!ReadBlock(rep_->file, opt, footer.metaindex_handle(), &contents).ok()) { - // Do not propagate errors since meta info is not needed for operation - return; - } - Block* meta = new Block(contents); - - Iterator* iter = meta->NewIterator(BytewiseComparator()); - std::string key = "filter."; - key.append(rep_->options.filter_policy->Name()); - iter->Seek(key); - if (iter->Valid() && iter->key() == Slice(key)) { - ReadFilter(iter->value()); - } - delete iter; - delete meta; -} - -void Table::ReadFilter(const Slice& filter_handle_value) { - Slice v = filter_handle_value; - BlockHandle filter_handle; - if (!filter_handle.DecodeFrom(&v).ok()) { - return; - } - - // We might want to unify with ReadBlock() if we start - // requiring checksum verification in Table::Open. - ReadOptions opt; - BlockContents block; - if (!ReadBlock(rep_->file, opt, filter_handle, &block).ok()) { - return; - } - if (block.heap_allocated) { - rep_->filter_data = block.data.data(); // Will need to delete later - } - rep_->filter = new FilterBlockReader(rep_->options.filter_policy, block.data); -} - -Table::~Table() { - delete rep_; -} - -static void DeleteBlock(void* arg, void* ignored) { - delete reinterpret_cast(arg); -} - -static void DeleteCachedBlock(const Slice& key, void* value) { - Block* block = reinterpret_cast(value); - delete block; -} - -static void ReleaseBlock(void* arg, void* h) { - Cache* cache = reinterpret_cast(arg); - Cache::Handle* handle = reinterpret_cast(h); - cache->Release(handle); -} - -// Convert an index iterator value (i.e., an encoded BlockHandle) -// into an iterator over the contents of the corresponding block. -Iterator* Table::BlockReader(void* arg, - const ReadOptions& options, - const Slice& index_value) { - Table* table = reinterpret_cast(arg); - Cache* block_cache = table->rep_->options.block_cache; - Block* block = NULL; - Cache::Handle* cache_handle = NULL; - - BlockHandle handle; - Slice input = index_value; - Status s = handle.DecodeFrom(&input); - // We intentionally allow extra stuff in index_value so that we - // can add more features in the future. - - if (s.ok()) { - BlockContents contents; - if (block_cache != NULL) { - char cache_key_buffer[16]; - EncodeFixed64(cache_key_buffer, table->rep_->cache_id); - EncodeFixed64(cache_key_buffer+8, handle.offset()); - Slice key(cache_key_buffer, sizeof(cache_key_buffer)); - cache_handle = block_cache->Lookup(key); - if (cache_handle != NULL) { - block = reinterpret_cast(block_cache->Value(cache_handle)); - } else { - s = ReadBlock(table->rep_->file, options, handle, &contents); - if (s.ok()) { - block = new Block(contents); - if (contents.cachable && options.fill_cache) { - cache_handle = block_cache->Insert( - key, block, block->size(), &DeleteCachedBlock); - } - } - } - } else { - s = ReadBlock(table->rep_->file, options, handle, &contents); - if (s.ok()) { - block = new Block(contents); - } - } - } - - Iterator* iter; - if (block != NULL) { - iter = block->NewIterator(table->rep_->options.comparator); - if (cache_handle == NULL) { - iter->RegisterCleanup(&DeleteBlock, block, NULL); - } else { - iter->RegisterCleanup(&ReleaseBlock, block_cache, cache_handle); - } - } else { - iter = NewErrorIterator(s); - } - return iter; -} - -Iterator* Table::NewIterator(const ReadOptions& options) const { - return NewTwoLevelIterator( - rep_->index_block->NewIterator(rep_->options.comparator), - &Table::BlockReader, const_cast(this), options); -} - -Status Table::InternalGet(const ReadOptions& options, const Slice& k, - void* arg, - void (*saver)(void*, const Slice&, const Slice&)) { - Status s; - Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator); - iiter->Seek(k); - if (iiter->Valid()) { - Slice handle_value = iiter->value(); - FilterBlockReader* filter = rep_->filter; - BlockHandle handle; - if (filter != NULL && - handle.DecodeFrom(&handle_value).ok() && - !filter->KeyMayMatch(handle.offset(), k)) { - // Not found - } else { - Slice handle = iiter->value(); - Iterator* block_iter = BlockReader(this, options, iiter->value()); - block_iter->Seek(k); - if (block_iter->Valid()) { - (*saver)(arg, block_iter->key(), block_iter->value()); - } - s = block_iter->status(); - delete block_iter; - } - } - if (s.ok()) { - s = iiter->status(); - } - delete iiter; - return s; -} - - -uint64_t Table::ApproximateOffsetOf(const Slice& key) const { - Iterator* index_iter = - rep_->index_block->NewIterator(rep_->options.comparator); - index_iter->Seek(key); - uint64_t result; - if (index_iter->Valid()) { - BlockHandle handle; - Slice input = index_iter->value(); - Status s = handle.DecodeFrom(&input); - if (s.ok()) { - result = handle.offset(); - } else { - // Strange: we can't decode the block handle in the index block. - // We'll just return the offset of the metaindex block, which is - // close to the whole file size for this case. - result = rep_->metaindex_handle.offset(); - } - } else { - // key is past the last key in the file. Approximate the offset - // by returning the offset of the metaindex block (which is - // right near the end of the file). - result = rep_->metaindex_handle.offset(); - } - delete index_iter; - return result; -} - -} // namespace leveldb diff --git a/leveldb/table/table_builder.cc b/leveldb/table/table_builder.cc deleted file mode 100644 index 62002c8..0000000 --- a/leveldb/table/table_builder.cc +++ /dev/null @@ -1,270 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "leveldb/table_builder.h" - -#include -#include "leveldb/comparator.h" -#include "leveldb/env.h" -#include "leveldb/filter_policy.h" -#include "leveldb/options.h" -#include "table/block_builder.h" -#include "table/filter_block.h" -#include "table/format.h" -#include "util/coding.h" -#include "util/crc32c.h" - -namespace leveldb { - -struct TableBuilder::Rep { - Options options; - Options index_block_options; - WritableFile* file; - uint64_t offset; - Status status; - BlockBuilder data_block; - BlockBuilder index_block; - std::string last_key; - int64_t num_entries; - bool closed; // Either Finish() or Abandon() has been called. - FilterBlockBuilder* filter_block; - - // We do not emit the index entry for a block until we have seen the - // first key for the next data block. This allows us to use shorter - // keys in the index block. For example, consider a block boundary - // between the keys "the quick brown fox" and "the who". We can use - // "the r" as the key for the index block entry since it is >= all - // entries in the first block and < all entries in subsequent - // blocks. - // - // Invariant: r->pending_index_entry is true only if data_block is empty. - bool pending_index_entry; - BlockHandle pending_handle; // Handle to add to index block - - std::string compressed_output; - - Rep(const Options& opt, WritableFile* f) - : options(opt), - index_block_options(opt), - file(f), - offset(0), - data_block(&options), - index_block(&index_block_options), - num_entries(0), - closed(false), - filter_block(opt.filter_policy == NULL ? NULL - : new FilterBlockBuilder(opt.filter_policy)), - pending_index_entry(false) { - index_block_options.block_restart_interval = 1; - } -}; - -TableBuilder::TableBuilder(const Options& options, WritableFile* file) - : rep_(new Rep(options, file)) { - if (rep_->filter_block != NULL) { - rep_->filter_block->StartBlock(0); - } -} - -TableBuilder::~TableBuilder() { - assert(rep_->closed); // Catch errors where caller forgot to call Finish() - delete rep_->filter_block; - delete rep_; -} - -Status TableBuilder::ChangeOptions(const Options& options) { - // Note: if more fields are added to Options, update - // this function to catch changes that should not be allowed to - // change in the middle of building a Table. - if (options.comparator != rep_->options.comparator) { - return Status::InvalidArgument("changing comparator while building table"); - } - - // Note that any live BlockBuilders point to rep_->options and therefore - // will automatically pick up the updated options. - rep_->options = options; - rep_->index_block_options = options; - rep_->index_block_options.block_restart_interval = 1; - return Status::OK(); -} - -void TableBuilder::Add(const Slice& key, const Slice& value) { - Rep* r = rep_; - assert(!r->closed); - if (!ok()) return; - if (r->num_entries > 0) { - assert(r->options.comparator->Compare(key, Slice(r->last_key)) > 0); - } - - if (r->pending_index_entry) { - assert(r->data_block.empty()); - r->options.comparator->FindShortestSeparator(&r->last_key, key); - std::string handle_encoding; - r->pending_handle.EncodeTo(&handle_encoding); - r->index_block.Add(r->last_key, Slice(handle_encoding)); - r->pending_index_entry = false; - } - - if (r->filter_block != NULL) { - r->filter_block->AddKey(key); - } - - r->last_key.assign(key.data(), key.size()); - r->num_entries++; - r->data_block.Add(key, value); - - const size_t estimated_block_size = r->data_block.CurrentSizeEstimate(); - if (estimated_block_size >= r->options.block_size) { - Flush(); - } -} - -void TableBuilder::Flush() { - Rep* r = rep_; - assert(!r->closed); - if (!ok()) return; - if (r->data_block.empty()) return; - assert(!r->pending_index_entry); - WriteBlock(&r->data_block, &r->pending_handle); - if (ok()) { - r->pending_index_entry = true; - r->status = r->file->Flush(); - } - if (r->filter_block != NULL) { - r->filter_block->StartBlock(r->offset); - } -} - -void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) { - // File format contains a sequence of blocks where each block has: - // block_data: uint8[n] - // type: uint8 - // crc: uint32 - assert(ok()); - Rep* r = rep_; - Slice raw = block->Finish(); - - Slice block_contents; - CompressionType type = r->options.compression; - // TODO(postrelease): Support more compression options: zlib? - switch (type) { - case kNoCompression: - block_contents = raw; - break; - - case kSnappyCompression: { - std::string* compressed = &r->compressed_output; - if (port::Snappy_Compress(raw.data(), raw.size(), compressed) && - compressed->size() < raw.size() - (raw.size() / 8u)) { - block_contents = *compressed; - } else { - // Snappy not supported, or compressed less than 12.5%, so just - // store uncompressed form - block_contents = raw; - type = kNoCompression; - } - break; - } - } - WriteRawBlock(block_contents, type, handle); - r->compressed_output.clear(); - block->Reset(); -} - -void TableBuilder::WriteRawBlock(const Slice& block_contents, - CompressionType type, - BlockHandle* handle) { - Rep* r = rep_; - handle->set_offset(r->offset); - handle->set_size(block_contents.size()); - r->status = r->file->Append(block_contents); - if (r->status.ok()) { - char trailer[kBlockTrailerSize]; - trailer[0] = type; - uint32_t crc = crc32c::Value(block_contents.data(), block_contents.size()); - crc = crc32c::Extend(crc, trailer, 1); // Extend crc to cover block type - EncodeFixed32(trailer+1, crc32c::Mask(crc)); - r->status = r->file->Append(Slice(trailer, kBlockTrailerSize)); - if (r->status.ok()) { - r->offset += block_contents.size() + kBlockTrailerSize; - } - } -} - -Status TableBuilder::status() const { - return rep_->status; -} - -Status TableBuilder::Finish() { - Rep* r = rep_; - Flush(); - assert(!r->closed); - r->closed = true; - - BlockHandle filter_block_handle, metaindex_block_handle, index_block_handle; - - // Write filter block - if (ok() && r->filter_block != NULL) { - WriteRawBlock(r->filter_block->Finish(), kNoCompression, - &filter_block_handle); - } - - // Write metaindex block - if (ok()) { - BlockBuilder meta_index_block(&r->options); - if (r->filter_block != NULL) { - // Add mapping from "filter.Name" to location of filter data - std::string key = "filter."; - key.append(r->options.filter_policy->Name()); - std::string handle_encoding; - filter_block_handle.EncodeTo(&handle_encoding); - meta_index_block.Add(key, handle_encoding); - } - - // TODO(postrelease): Add stats and other meta blocks - WriteBlock(&meta_index_block, &metaindex_block_handle); - } - - // Write index block - if (ok()) { - if (r->pending_index_entry) { - r->options.comparator->FindShortSuccessor(&r->last_key); - std::string handle_encoding; - r->pending_handle.EncodeTo(&handle_encoding); - r->index_block.Add(r->last_key, Slice(handle_encoding)); - r->pending_index_entry = false; - } - WriteBlock(&r->index_block, &index_block_handle); - } - - // Write footer - if (ok()) { - Footer footer; - footer.set_metaindex_handle(metaindex_block_handle); - footer.set_index_handle(index_block_handle); - std::string footer_encoding; - footer.EncodeTo(&footer_encoding); - r->status = r->file->Append(footer_encoding); - if (r->status.ok()) { - r->offset += footer_encoding.size(); - } - } - return r->status; -} - -void TableBuilder::Abandon() { - Rep* r = rep_; - assert(!r->closed); - r->closed = true; -} - -uint64_t TableBuilder::NumEntries() const { - return rep_->num_entries; -} - -uint64_t TableBuilder::FileSize() const { - return rep_->offset; -} - -} // namespace leveldb diff --git a/leveldb/table/table_test.cc b/leveldb/table/table_test.cc deleted file mode 100644 index 57cea25..0000000 --- a/leveldb/table/table_test.cc +++ /dev/null @@ -1,838 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "leveldb/table.h" - -#include -#include -#include "db/dbformat.h" -#include "db/memtable.h" -#include "db/write_batch_internal.h" -#include "leveldb/db.h" -#include "leveldb/env.h" -#include "leveldb/iterator.h" -#include "leveldb/table_builder.h" -#include "table/block.h" -#include "table/block_builder.h" -#include "table/format.h" -#include "util/random.h" -#include "util/testharness.h" -#include "util/testutil.h" - -namespace leveldb { - -// Return reverse of "key". -// Used to test non-lexicographic comparators. -static std::string Reverse(const Slice& key) { - std::string str(key.ToString()); - std::string rev(""); - for (std::string::reverse_iterator rit = str.rbegin(); - rit != str.rend(); ++rit) { - rev.push_back(*rit); - } - return rev; -} - -namespace { -class ReverseKeyComparator : public Comparator { - public: - virtual const char* Name() const { - return "leveldb.ReverseBytewiseComparator"; - } - - virtual int Compare(const Slice& a, const Slice& b) const { - return BytewiseComparator()->Compare(Reverse(a), Reverse(b)); - } - - virtual void FindShortestSeparator( - std::string* start, - const Slice& limit) const { - std::string s = Reverse(*start); - std::string l = Reverse(limit); - BytewiseComparator()->FindShortestSeparator(&s, l); - *start = Reverse(s); - } - - virtual void FindShortSuccessor(std::string* key) const { - std::string s = Reverse(*key); - BytewiseComparator()->FindShortSuccessor(&s); - *key = Reverse(s); - } -}; -} // namespace -static ReverseKeyComparator reverse_key_comparator; - -static void Increment(const Comparator* cmp, std::string* key) { - if (cmp == BytewiseComparator()) { - key->push_back('\0'); - } else { - assert(cmp == &reverse_key_comparator); - std::string rev = Reverse(*key); - rev.push_back('\0'); - *key = Reverse(rev); - } -} - -// An STL comparator that uses a Comparator -namespace { -struct STLLessThan { - const Comparator* cmp; - - STLLessThan() : cmp(BytewiseComparator()) { } - STLLessThan(const Comparator* c) : cmp(c) { } - bool operator()(const std::string& a, const std::string& b) const { - return cmp->Compare(Slice(a), Slice(b)) < 0; - } -}; -} // namespace - -class StringSink: public WritableFile { - public: - ~StringSink() { } - - const std::string& contents() const { return contents_; } - - virtual Status Close() { return Status::OK(); } - virtual Status Flush() { return Status::OK(); } - virtual Status Sync() { return Status::OK(); } - - virtual Status Append(const Slice& data) { - contents_.append(data.data(), data.size()); - return Status::OK(); - } - - private: - std::string contents_; -}; - - -class StringSource: public RandomAccessFile { - public: - StringSource(const Slice& contents) - : contents_(contents.data(), contents.size()) { - } - - virtual ~StringSource() { } - - uint64_t Size() const { return contents_.size(); } - - virtual Status Read(uint64_t offset, size_t n, Slice* result, - char* scratch) const { - if (offset > contents_.size()) { - return Status::InvalidArgument("invalid Read offset"); - } - if (offset + n > contents_.size()) { - n = contents_.size() - offset; - } - memcpy(scratch, &contents_[offset], n); - *result = Slice(scratch, n); - return Status::OK(); - } - - private: - std::string contents_; -}; - -typedef std::map KVMap; - -// Helper class for tests to unify the interface between -// BlockBuilder/TableBuilder and Block/Table. -class Constructor { - public: - explicit Constructor(const Comparator* cmp) : data_(STLLessThan(cmp)) { } - virtual ~Constructor() { } - - void Add(const std::string& key, const Slice& value) { - data_[key] = value.ToString(); - } - - // Finish constructing the data structure with all the keys that have - // been added so far. Returns the keys in sorted order in "*keys" - // and stores the key/value pairs in "*kvmap" - void Finish(const Options& options, - std::vector* keys, - KVMap* kvmap) { - *kvmap = data_; - keys->clear(); - for (KVMap::const_iterator it = data_.begin(); - it != data_.end(); - ++it) { - keys->push_back(it->first); - } - data_.clear(); - Status s = FinishImpl(options, *kvmap); - ASSERT_TRUE(s.ok()) << s.ToString(); - } - - // Construct the data structure from the data in "data" - virtual Status FinishImpl(const Options& options, const KVMap& data) = 0; - - virtual Iterator* NewIterator() const = 0; - - virtual const KVMap& data() { return data_; } - - virtual DB* db() const { return NULL; } // Overridden in DBConstructor - - private: - KVMap data_; -}; - -class BlockConstructor: public Constructor { - public: - explicit BlockConstructor(const Comparator* cmp) - : Constructor(cmp), - comparator_(cmp), - block_(NULL) { } - ~BlockConstructor() { - delete block_; - } - virtual Status FinishImpl(const Options& options, const KVMap& data) { - delete block_; - block_ = NULL; - BlockBuilder builder(&options); - - for (KVMap::const_iterator it = data.begin(); - it != data.end(); - ++it) { - builder.Add(it->first, it->second); - } - // Open the block - data_ = builder.Finish().ToString(); - BlockContents contents; - contents.data = data_; - contents.cachable = false; - contents.heap_allocated = false; - block_ = new Block(contents); - return Status::OK(); - } - virtual Iterator* NewIterator() const { - return block_->NewIterator(comparator_); - } - - private: - const Comparator* comparator_; - std::string data_; - Block* block_; - - BlockConstructor(); -}; - -class TableConstructor: public Constructor { - public: - TableConstructor(const Comparator* cmp) - : Constructor(cmp), - source_(NULL), table_(NULL) { - } - ~TableConstructor() { - Reset(); - } - virtual Status FinishImpl(const Options& options, const KVMap& data) { - Reset(); - StringSink sink; - TableBuilder builder(options, &sink); - - for (KVMap::const_iterator it = data.begin(); - it != data.end(); - ++it) { - builder.Add(it->first, it->second); - ASSERT_TRUE(builder.status().ok()); - } - Status s = builder.Finish(); - ASSERT_TRUE(s.ok()) << s.ToString(); - - ASSERT_EQ(sink.contents().size(), builder.FileSize()); - - // Open the table - source_ = new StringSource(sink.contents()); - Options table_options; - table_options.comparator = options.comparator; - return Table::Open(table_options, source_, sink.contents().size(), &table_); - } - - virtual Iterator* NewIterator() const { - return table_->NewIterator(ReadOptions()); - } - - uint64_t ApproximateOffsetOf(const Slice& key) const { - return table_->ApproximateOffsetOf(key); - } - - private: - void Reset() { - delete table_; - delete source_; - table_ = NULL; - source_ = NULL; - } - - StringSource* source_; - Table* table_; - - TableConstructor(); -}; - -// A helper class that converts internal format keys into user keys -class KeyConvertingIterator: public Iterator { - public: - explicit KeyConvertingIterator(Iterator* iter) : iter_(iter) { } - virtual ~KeyConvertingIterator() { delete iter_; } - virtual bool Valid() const { return iter_->Valid(); } - virtual void Seek(const Slice& target) { - ParsedInternalKey ikey(target, kMaxSequenceNumber, kTypeValue); - std::string encoded; - AppendInternalKey(&encoded, ikey); - iter_->Seek(encoded); - } - virtual void SeekToFirst() { iter_->SeekToFirst(); } - virtual void SeekToLast() { iter_->SeekToLast(); } - virtual void Next() { iter_->Next(); } - virtual void Prev() { iter_->Prev(); } - - virtual Slice key() const { - assert(Valid()); - ParsedInternalKey key; - if (!ParseInternalKey(iter_->key(), &key)) { - status_ = Status::Corruption("malformed internal key"); - return Slice("corrupted key"); - } - return key.user_key; - } - - virtual Slice value() const { return iter_->value(); } - virtual Status status() const { - return status_.ok() ? iter_->status() : status_; - } - - private: - mutable Status status_; - Iterator* iter_; - - // No copying allowed - KeyConvertingIterator(const KeyConvertingIterator&); - void operator=(const KeyConvertingIterator&); -}; - -class MemTableConstructor: public Constructor { - public: - explicit MemTableConstructor(const Comparator* cmp) - : Constructor(cmp), - internal_comparator_(cmp) { - memtable_ = new MemTable(internal_comparator_); - memtable_->Ref(); - } - ~MemTableConstructor() { - memtable_->Unref(); - } - virtual Status FinishImpl(const Options& options, const KVMap& data) { - memtable_->Unref(); - memtable_ = new MemTable(internal_comparator_); - memtable_->Ref(); - int seq = 1; - for (KVMap::const_iterator it = data.begin(); - it != data.end(); - ++it) { - memtable_->Add(seq, kTypeValue, it->first, it->second); - seq++; - } - return Status::OK(); - } - virtual Iterator* NewIterator() const { - return new KeyConvertingIterator(memtable_->NewIterator()); - } - - private: - InternalKeyComparator internal_comparator_; - MemTable* memtable_; -}; - -class DBConstructor: public Constructor { - public: - explicit DBConstructor(const Comparator* cmp) - : Constructor(cmp), - comparator_(cmp) { - db_ = NULL; - NewDB(); - } - ~DBConstructor() { - delete db_; - } - virtual Status FinishImpl(const Options& options, const KVMap& data) { - delete db_; - db_ = NULL; - NewDB(); - for (KVMap::const_iterator it = data.begin(); - it != data.end(); - ++it) { - WriteBatch batch; - batch.Put(it->first, it->second); - ASSERT_TRUE(db_->Write(WriteOptions(), &batch).ok()); - } - return Status::OK(); - } - virtual Iterator* NewIterator() const { - return db_->NewIterator(ReadOptions()); - } - - virtual DB* db() const { return db_; } - - private: - void NewDB() { - std::string name = test::TmpDir() + "/table_testdb"; - - Options options; - options.comparator = comparator_; - Status status = DestroyDB(name, options); - ASSERT_TRUE(status.ok()) << status.ToString(); - - options.create_if_missing = true; - options.error_if_exists = true; - options.write_buffer_size = 10000; // Something small to force merging - status = DB::Open(options, name, &db_); - ASSERT_TRUE(status.ok()) << status.ToString(); - } - - const Comparator* comparator_; - DB* db_; -}; - -enum TestType { - TABLE_TEST, - BLOCK_TEST, - MEMTABLE_TEST, - DB_TEST -}; - -struct TestArgs { - TestType type; - bool reverse_compare; - int restart_interval; -}; - -static const TestArgs kTestArgList[] = { - { TABLE_TEST, false, 16 }, - { TABLE_TEST, false, 1 }, - { TABLE_TEST, false, 1024 }, - { TABLE_TEST, true, 16 }, - { TABLE_TEST, true, 1 }, - { TABLE_TEST, true, 1024 }, - - { BLOCK_TEST, false, 16 }, - { BLOCK_TEST, false, 1 }, - { BLOCK_TEST, false, 1024 }, - { BLOCK_TEST, true, 16 }, - { BLOCK_TEST, true, 1 }, - { BLOCK_TEST, true, 1024 }, - - // Restart interval does not matter for memtables - { MEMTABLE_TEST, false, 16 }, - { MEMTABLE_TEST, true, 16 }, - - // Do not bother with restart interval variations for DB - { DB_TEST, false, 16 }, - { DB_TEST, true, 16 }, -}; -static const int kNumTestArgs = sizeof(kTestArgList) / sizeof(kTestArgList[0]); - -class Harness { - public: - Harness() : constructor_(NULL) { } - - void Init(const TestArgs& args) { - delete constructor_; - constructor_ = NULL; - options_ = Options(); - - options_.block_restart_interval = args.restart_interval; - // Use shorter block size for tests to exercise block boundary - // conditions more. - options_.block_size = 256; - if (args.reverse_compare) { - options_.comparator = &reverse_key_comparator; - } - switch (args.type) { - case TABLE_TEST: - constructor_ = new TableConstructor(options_.comparator); - break; - case BLOCK_TEST: - constructor_ = new BlockConstructor(options_.comparator); - break; - case MEMTABLE_TEST: - constructor_ = new MemTableConstructor(options_.comparator); - break; - case DB_TEST: - constructor_ = new DBConstructor(options_.comparator); - break; - } - } - - ~Harness() { - delete constructor_; - } - - void Add(const std::string& key, const std::string& value) { - constructor_->Add(key, value); - } - - void Test(Random* rnd) { - std::vector keys; - KVMap data; - constructor_->Finish(options_, &keys, &data); - - TestForwardScan(keys, data); - TestBackwardScan(keys, data); - TestRandomAccess(rnd, keys, data); - } - - void TestForwardScan(const std::vector& keys, - const KVMap& data) { - Iterator* iter = constructor_->NewIterator(); - ASSERT_TRUE(!iter->Valid()); - iter->SeekToFirst(); - for (KVMap::const_iterator model_iter = data.begin(); - model_iter != data.end(); - ++model_iter) { - ASSERT_EQ(ToString(data, model_iter), ToString(iter)); - iter->Next(); - } - ASSERT_TRUE(!iter->Valid()); - delete iter; - } - - void TestBackwardScan(const std::vector& keys, - const KVMap& data) { - Iterator* iter = constructor_->NewIterator(); - ASSERT_TRUE(!iter->Valid()); - iter->SeekToLast(); - for (KVMap::const_reverse_iterator model_iter = data.rbegin(); - model_iter != data.rend(); - ++model_iter) { - ASSERT_EQ(ToString(data, model_iter), ToString(iter)); - iter->Prev(); - } - ASSERT_TRUE(!iter->Valid()); - delete iter; - } - - void TestRandomAccess(Random* rnd, - const std::vector& keys, - const KVMap& data) { - static const bool kVerbose = false; - Iterator* iter = constructor_->NewIterator(); - ASSERT_TRUE(!iter->Valid()); - KVMap::const_iterator model_iter = data.begin(); - if (kVerbose) fprintf(stderr, "---\n"); - for (int i = 0; i < 200; i++) { - const int toss = rnd->Uniform(5); - switch (toss) { - case 0: { - if (iter->Valid()) { - if (kVerbose) fprintf(stderr, "Next\n"); - iter->Next(); - ++model_iter; - ASSERT_EQ(ToString(data, model_iter), ToString(iter)); - } - break; - } - - case 1: { - if (kVerbose) fprintf(stderr, "SeekToFirst\n"); - iter->SeekToFirst(); - model_iter = data.begin(); - ASSERT_EQ(ToString(data, model_iter), ToString(iter)); - break; - } - - case 2: { - std::string key = PickRandomKey(rnd, keys); - model_iter = data.lower_bound(key); - if (kVerbose) fprintf(stderr, "Seek '%s'\n", - EscapeString(key).c_str()); - iter->Seek(Slice(key)); - ASSERT_EQ(ToString(data, model_iter), ToString(iter)); - break; - } - - case 3: { - if (iter->Valid()) { - if (kVerbose) fprintf(stderr, "Prev\n"); - iter->Prev(); - if (model_iter == data.begin()) { - model_iter = data.end(); // Wrap around to invalid value - } else { - --model_iter; - } - ASSERT_EQ(ToString(data, model_iter), ToString(iter)); - } - break; - } - - case 4: { - if (kVerbose) fprintf(stderr, "SeekToLast\n"); - iter->SeekToLast(); - if (keys.empty()) { - model_iter = data.end(); - } else { - std::string last = data.rbegin()->first; - model_iter = data.lower_bound(last); - } - ASSERT_EQ(ToString(data, model_iter), ToString(iter)); - break; - } - } - } - delete iter; - } - - std::string ToString(const KVMap& data, const KVMap::const_iterator& it) { - if (it == data.end()) { - return "END"; - } else { - return "'" + it->first + "->" + it->second + "'"; - } - } - - std::string ToString(const KVMap& data, - const KVMap::const_reverse_iterator& it) { - if (it == data.rend()) { - return "END"; - } else { - return "'" + it->first + "->" + it->second + "'"; - } - } - - std::string ToString(const Iterator* it) { - if (!it->Valid()) { - return "END"; - } else { - return "'" + it->key().ToString() + "->" + it->value().ToString() + "'"; - } - } - - std::string PickRandomKey(Random* rnd, const std::vector& keys) { - if (keys.empty()) { - return "foo"; - } else { - const int index = rnd->Uniform(keys.size()); - std::string result = keys[index]; - switch (rnd->Uniform(3)) { - case 0: - // Return an existing key - break; - case 1: { - // Attempt to return something smaller than an existing key - if (result.size() > 0 && result[result.size()-1] > '\0') { - result[result.size()-1]--; - } - break; - } - case 2: { - // Return something larger than an existing key - Increment(options_.comparator, &result); - break; - } - } - return result; - } - } - - // Returns NULL if not running against a DB - DB* db() const { return constructor_->db(); } - - private: - Options options_; - Constructor* constructor_; -}; - -// Test the empty key -TEST(Harness, SimpleEmptyKey) { - for (int i = 0; i < kNumTestArgs; i++) { - Init(kTestArgList[i]); - Random rnd(test::RandomSeed() + 1); - Add("", "v"); - Test(&rnd); - } -} - -TEST(Harness, SimpleSingle) { - for (int i = 0; i < kNumTestArgs; i++) { - Init(kTestArgList[i]); - Random rnd(test::RandomSeed() + 2); - Add("abc", "v"); - Test(&rnd); - } -} - -TEST(Harness, SimpleMulti) { - for (int i = 0; i < kNumTestArgs; i++) { - Init(kTestArgList[i]); - Random rnd(test::RandomSeed() + 3); - Add("abc", "v"); - Add("abcd", "v"); - Add("ac", "v2"); - Test(&rnd); - } -} - -TEST(Harness, SimpleSpecialKey) { - for (int i = 0; i < kNumTestArgs; i++) { - Init(kTestArgList[i]); - Random rnd(test::RandomSeed() + 4); - Add("\xff\xff", "v3"); - Test(&rnd); - } -} - -TEST(Harness, Randomized) { - for (int i = 0; i < kNumTestArgs; i++) { - Init(kTestArgList[i]); - Random rnd(test::RandomSeed() + 5); - for (int num_entries = 0; num_entries < 2000; - num_entries += (num_entries < 50 ? 1 : 200)) { - if ((num_entries % 10) == 0) { - fprintf(stderr, "case %d of %d: num_entries = %d\n", - (i + 1), int(kNumTestArgs), num_entries); - } - for (int e = 0; e < num_entries; e++) { - std::string v; - Add(test::RandomKey(&rnd, rnd.Skewed(4)), - test::RandomString(&rnd, rnd.Skewed(5), &v).ToString()); - } - Test(&rnd); - } - } -} - -TEST(Harness, RandomizedLongDB) { - Random rnd(test::RandomSeed()); - TestArgs args = { DB_TEST, false, 16 }; - Init(args); - int num_entries = 100000; - for (int e = 0; e < num_entries; e++) { - std::string v; - Add(test::RandomKey(&rnd, rnd.Skewed(4)), - test::RandomString(&rnd, rnd.Skewed(5), &v).ToString()); - } - Test(&rnd); - - // We must have created enough data to force merging - int files = 0; - for (int level = 0; level < config::kNumLevels; level++) { - std::string value; - char name[100]; - snprintf(name, sizeof(name), "leveldb.num-files-at-level%d", level); - ASSERT_TRUE(db()->GetProperty(name, &value)); - files += atoi(value.c_str()); - } - ASSERT_GT(files, 0); -} - -class MemTableTest { }; - -TEST(MemTableTest, Simple) { - InternalKeyComparator cmp(BytewiseComparator()); - MemTable* memtable = new MemTable(cmp); - memtable->Ref(); - WriteBatch batch; - WriteBatchInternal::SetSequence(&batch, 100); - batch.Put(std::string("k1"), std::string("v1")); - batch.Put(std::string("k2"), std::string("v2")); - batch.Put(std::string("k3"), std::string("v3")); - batch.Put(std::string("largekey"), std::string("vlarge")); - ASSERT_TRUE(WriteBatchInternal::InsertInto(&batch, memtable).ok()); - - Iterator* iter = memtable->NewIterator(); - iter->SeekToFirst(); - while (iter->Valid()) { - fprintf(stderr, "key: '%s' -> '%s'\n", - iter->key().ToString().c_str(), - iter->value().ToString().c_str()); - iter->Next(); - } - - delete iter; - memtable->Unref(); -} - -static bool Between(uint64_t val, uint64_t low, uint64_t high) { - bool result = (val >= low) && (val <= high); - if (!result) { - fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n", - (unsigned long long)(val), - (unsigned long long)(low), - (unsigned long long)(high)); - } - return result; -} - -class TableTest { }; - -TEST(TableTest, ApproximateOffsetOfPlain) { - TableConstructor c(BytewiseComparator()); - c.Add("k01", "hello"); - c.Add("k02", "hello2"); - c.Add("k03", std::string(10000, 'x')); - c.Add("k04", std::string(200000, 'x')); - c.Add("k05", std::string(300000, 'x')); - c.Add("k06", "hello3"); - c.Add("k07", std::string(100000, 'x')); - std::vector keys; - KVMap kvmap; - Options options; - options.block_size = 1024; - options.compression = kNoCompression; - c.Finish(options, &keys, &kvmap); - - ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01a"), 0, 0)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 0, 0)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 10000, 11000)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04a"), 210000, 211000)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k05"), 210000, 211000)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k06"), 510000, 511000)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k07"), 510000, 511000)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 610000, 612000)); - -} - -static bool SnappyCompressionSupported() { - std::string out; - Slice in = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; - return port::Snappy_Compress(in.data(), in.size(), &out); -} - -TEST(TableTest, ApproximateOffsetOfCompressed) { - if (!SnappyCompressionSupported()) { - fprintf(stderr, "skipping compression tests\n"); - return; - } - - Random rnd(301); - TableConstructor c(BytewiseComparator()); - std::string tmp; - c.Add("k01", "hello"); - c.Add("k02", test::CompressibleString(&rnd, 0.25, 10000, &tmp)); - c.Add("k03", "hello3"); - c.Add("k04", test::CompressibleString(&rnd, 0.25, 10000, &tmp)); - std::vector keys; - KVMap kvmap; - Options options; - options.block_size = 1024; - options.compression = kSnappyCompression; - c.Finish(options, &keys, &kvmap); - - ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 2000, 3000)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 2000, 3000)); - ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 4000, 6000)); -} - -} // namespace leveldb - -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} diff --git a/leveldb/table/two_level_iterator.cc b/leveldb/table/two_level_iterator.cc deleted file mode 100644 index 7822eba..0000000 --- a/leveldb/table/two_level_iterator.cc +++ /dev/null @@ -1,182 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "table/two_level_iterator.h" - -#include "leveldb/table.h" -#include "table/block.h" -#include "table/format.h" -#include "table/iterator_wrapper.h" - -namespace leveldb { - -namespace { - -typedef Iterator* (*BlockFunction)(void*, const ReadOptions&, const Slice&); - -class TwoLevelIterator: public Iterator { - public: - TwoLevelIterator( - Iterator* index_iter, - BlockFunction block_function, - void* arg, - const ReadOptions& options); - - virtual ~TwoLevelIterator(); - - virtual void Seek(const Slice& target); - virtual void SeekToFirst(); - virtual void SeekToLast(); - virtual void Next(); - virtual void Prev(); - - virtual bool Valid() const { - return data_iter_.Valid(); - } - virtual Slice key() const { - assert(Valid()); - return data_iter_.key(); - } - virtual Slice value() const { - assert(Valid()); - return data_iter_.value(); - } - virtual Status status() const { - // It'd be nice if status() returned a const Status& instead of a Status - if (!index_iter_.status().ok()) { - return index_iter_.status(); - } else if (data_iter_.iter() != NULL && !data_iter_.status().ok()) { - return data_iter_.status(); - } else { - return status_; - } - } - - private: - void SaveError(const Status& s) { - if (status_.ok() && !s.ok()) status_ = s; - } - void SkipEmptyDataBlocksForward(); - void SkipEmptyDataBlocksBackward(); - void SetDataIterator(Iterator* data_iter); - void InitDataBlock(); - - BlockFunction block_function_; - void* arg_; - const ReadOptions options_; - Status status_; - IteratorWrapper index_iter_; - IteratorWrapper data_iter_; // May be NULL - // If data_iter_ is non-NULL, then "data_block_handle_" holds the - // "index_value" passed to block_function_ to create the data_iter_. - std::string data_block_handle_; -}; - -TwoLevelIterator::TwoLevelIterator( - Iterator* index_iter, - BlockFunction block_function, - void* arg, - const ReadOptions& options) - : block_function_(block_function), - arg_(arg), - options_(options), - index_iter_(index_iter), - data_iter_(NULL) { -} - -TwoLevelIterator::~TwoLevelIterator() { -} - -void TwoLevelIterator::Seek(const Slice& target) { - index_iter_.Seek(target); - InitDataBlock(); - if (data_iter_.iter() != NULL) data_iter_.Seek(target); - SkipEmptyDataBlocksForward(); -} - -void TwoLevelIterator::SeekToFirst() { - index_iter_.SeekToFirst(); - InitDataBlock(); - if (data_iter_.iter() != NULL) data_iter_.SeekToFirst(); - SkipEmptyDataBlocksForward(); -} - -void TwoLevelIterator::SeekToLast() { - index_iter_.SeekToLast(); - InitDataBlock(); - if (data_iter_.iter() != NULL) data_iter_.SeekToLast(); - SkipEmptyDataBlocksBackward(); -} - -void TwoLevelIterator::Next() { - assert(Valid()); - data_iter_.Next(); - SkipEmptyDataBlocksForward(); -} - -void TwoLevelIterator::Prev() { - assert(Valid()); - data_iter_.Prev(); - SkipEmptyDataBlocksBackward(); -} - - -void TwoLevelIterator::SkipEmptyDataBlocksForward() { - while (data_iter_.iter() == NULL || !data_iter_.Valid()) { - // Move to next block - if (!index_iter_.Valid()) { - SetDataIterator(NULL); - return; - } - index_iter_.Next(); - InitDataBlock(); - if (data_iter_.iter() != NULL) data_iter_.SeekToFirst(); - } -} - -void TwoLevelIterator::SkipEmptyDataBlocksBackward() { - while (data_iter_.iter() == NULL || !data_iter_.Valid()) { - // Move to next block - if (!index_iter_.Valid()) { - SetDataIterator(NULL); - return; - } - index_iter_.Prev(); - InitDataBlock(); - if (data_iter_.iter() != NULL) data_iter_.SeekToLast(); - } -} - -void TwoLevelIterator::SetDataIterator(Iterator* data_iter) { - if (data_iter_.iter() != NULL) SaveError(data_iter_.status()); - data_iter_.Set(data_iter); -} - -void TwoLevelIterator::InitDataBlock() { - if (!index_iter_.Valid()) { - SetDataIterator(NULL); - } else { - Slice handle = index_iter_.value(); - if (data_iter_.iter() != NULL && handle.compare(data_block_handle_) == 0) { - // data_iter_ is already constructed with this iterator, so - // no need to change anything - } else { - Iterator* iter = (*block_function_)(arg_, options_, handle); - data_block_handle_.assign(handle.data(), handle.size()); - SetDataIterator(iter); - } - } -} - -} // namespace - -Iterator* NewTwoLevelIterator( - Iterator* index_iter, - BlockFunction block_function, - void* arg, - const ReadOptions& options) { - return new TwoLevelIterator(index_iter, block_function, arg, options); -} - -} // namespace leveldb diff --git a/leveldb/table/two_level_iterator.h b/leveldb/table/two_level_iterator.h deleted file mode 100644 index 629ca34..0000000 --- a/leveldb/table/two_level_iterator.h +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_TABLE_TWO_LEVEL_ITERATOR_H_ -#define STORAGE_LEVELDB_TABLE_TWO_LEVEL_ITERATOR_H_ - -#include "leveldb/iterator.h" - -namespace leveldb { - -struct ReadOptions; - -// Return a new two level iterator. A two-level iterator contains an -// index iterator whose values point to a sequence of blocks where -// each block is itself a sequence of key,value pairs. The returned -// two-level iterator yields the concatenation of all key/value pairs -// in the sequence of blocks. Takes ownership of "index_iter" and -// will delete it when no longer needed. -// -// Uses a supplied function to convert an index_iter value into -// an iterator over the contents of the corresponding block. -extern Iterator* NewTwoLevelIterator( - Iterator* index_iter, - Iterator* (*block_function)( - void* arg, - const ReadOptions& options, - const Slice& index_value), - void* arg, - const ReadOptions& options); - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_TABLE_TWO_LEVEL_ITERATOR_H_ diff --git a/leveldb/util/arena.cc b/leveldb/util/arena.cc deleted file mode 100644 index 9551d6a..0000000 --- a/leveldb/util/arena.cc +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "util/arena.h" -#include - -namespace leveldb { - -static const int kBlockSize = 4096; - -Arena::Arena() { - blocks_memory_ = 0; - alloc_ptr_ = NULL; // First allocation will allocate a block - alloc_bytes_remaining_ = 0; -} - -Arena::~Arena() { - for (size_t i = 0; i < blocks_.size(); i++) { - delete[] blocks_[i]; - } -} - -char* Arena::AllocateFallback(size_t bytes) { - if (bytes > kBlockSize / 4) { - // Object is more than a quarter of our block size. Allocate it separately - // to avoid wasting too much space in leftover bytes. - char* result = AllocateNewBlock(bytes); - return result; - } - - // We waste the remaining space in the current block. - alloc_ptr_ = AllocateNewBlock(kBlockSize); - alloc_bytes_remaining_ = kBlockSize; - - char* result = alloc_ptr_; - alloc_ptr_ += bytes; - alloc_bytes_remaining_ -= bytes; - return result; -} - -char* Arena::AllocateAligned(size_t bytes) { - const int align = sizeof(void*); // We'll align to pointer size - assert((align & (align-1)) == 0); // Pointer size should be a power of 2 - size_t current_mod = reinterpret_cast(alloc_ptr_) & (align-1); - size_t slop = (current_mod == 0 ? 0 : align - current_mod); - size_t needed = bytes + slop; - char* result; - if (needed <= alloc_bytes_remaining_) { - result = alloc_ptr_ + slop; - alloc_ptr_ += needed; - alloc_bytes_remaining_ -= needed; - } else { - // AllocateFallback always returned aligned memory - result = AllocateFallback(bytes); - } - assert((reinterpret_cast(result) & (align-1)) == 0); - return result; -} - -char* Arena::AllocateNewBlock(size_t block_bytes) { - char* result = new char[block_bytes]; - blocks_memory_ += block_bytes; - blocks_.push_back(result); - return result; -} - -} // namespace leveldb diff --git a/leveldb/util/arena.h b/leveldb/util/arena.h deleted file mode 100644 index 8f7dde2..0000000 --- a/leveldb/util/arena.h +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_UTIL_ARENA_H_ -#define STORAGE_LEVELDB_UTIL_ARENA_H_ - -#include -#include -#include -#include - -namespace leveldb { - -class Arena { - public: - Arena(); - ~Arena(); - - // Return a pointer to a newly allocated memory block of "bytes" bytes. - char* Allocate(size_t bytes); - - // Allocate memory with the normal alignment guarantees provided by malloc - char* AllocateAligned(size_t bytes); - - // Returns an estimate of the total memory usage of data allocated - // by the arena (including space allocated but not yet used for user - // allocations). - size_t MemoryUsage() const { - return blocks_memory_ + blocks_.capacity() * sizeof(char*); - } - - private: - char* AllocateFallback(size_t bytes); - char* AllocateNewBlock(size_t block_bytes); - - // Allocation state - char* alloc_ptr_; - size_t alloc_bytes_remaining_; - - // Array of new[] allocated memory blocks - std::vector blocks_; - - // Bytes of memory in blocks allocated so far - size_t blocks_memory_; - - // No copying allowed - Arena(const Arena&); - void operator=(const Arena&); -}; - -inline char* Arena::Allocate(size_t bytes) { - // The semantics of what to return are a bit messy if we allow - // 0-byte allocations, so we disallow them here (we don't need - // them for our internal use). - assert(bytes > 0); - if (bytes <= alloc_bytes_remaining_) { - char* result = alloc_ptr_; - alloc_ptr_ += bytes; - alloc_bytes_remaining_ -= bytes; - return result; - } - return AllocateFallback(bytes); -} - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_UTIL_ARENA_H_ diff --git a/leveldb/util/arena_test.cc b/leveldb/util/arena_test.cc deleted file mode 100644 index 63d1778..0000000 --- a/leveldb/util/arena_test.cc +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "util/arena.h" - -#include "util/random.h" -#include "util/testharness.h" - -namespace leveldb { - -class ArenaTest { }; - -TEST(ArenaTest, Empty) { - Arena arena; -} - -TEST(ArenaTest, Simple) { - std::vector > allocated; - Arena arena; - const int N = 100000; - size_t bytes = 0; - Random rnd(301); - for (int i = 0; i < N; i++) { - size_t s; - if (i % (N / 10) == 0) { - s = i; - } else { - s = rnd.OneIn(4000) ? rnd.Uniform(6000) : - (rnd.OneIn(10) ? rnd.Uniform(100) : rnd.Uniform(20)); - } - if (s == 0) { - // Our arena disallows size 0 allocations. - s = 1; - } - char* r; - if (rnd.OneIn(10)) { - r = arena.AllocateAligned(s); - } else { - r = arena.Allocate(s); - } - - for (int b = 0; b < s; b++) { - // Fill the "i"th allocation with a known bit pattern - r[b] = i % 256; - } - bytes += s; - allocated.push_back(std::make_pair(s, r)); - ASSERT_GE(arena.MemoryUsage(), bytes); - if (i > N/10) { - ASSERT_LE(arena.MemoryUsage(), bytes * 1.10); - } - } - for (int i = 0; i < allocated.size(); i++) { - size_t num_bytes = allocated[i].first; - const char* p = allocated[i].second; - for (int b = 0; b < num_bytes; b++) { - // Check the "i"th allocation for the known bit pattern - ASSERT_EQ(int(p[b]) & 0xff, i % 256); - } - } -} - -} // namespace leveldb - -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} diff --git a/leveldb/util/bloom.cc b/leveldb/util/bloom.cc deleted file mode 100644 index d7941cd..0000000 --- a/leveldb/util/bloom.cc +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright (c) 2012 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "leveldb/filter_policy.h" - -#include "leveldb/slice.h" -#include "util/hash.h" - -namespace leveldb { - -namespace { -static uint32_t BloomHash(const Slice& key) { - return Hash(key.data(), key.size(), 0xbc9f1d34); -} - -class BloomFilterPolicy : public FilterPolicy { - private: - size_t bits_per_key_; - size_t k_; - - public: - explicit BloomFilterPolicy(int bits_per_key) - : bits_per_key_(bits_per_key) { - // We intentionally round down to reduce probing cost a little bit - k_ = static_cast(bits_per_key * 0.69); // 0.69 =~ ln(2) - if (k_ < 1) k_ = 1; - if (k_ > 30) k_ = 30; - } - - virtual const char* Name() const { - return "leveldb.BuiltinBloomFilter"; - } - - virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const { - // Compute bloom filter size (in both bits and bytes) - size_t bits = n * bits_per_key_; - - // For small n, we can see a very high false positive rate. Fix it - // by enforcing a minimum bloom filter length. - if (bits < 64) bits = 64; - - size_t bytes = (bits + 7) / 8; - bits = bytes * 8; - - const size_t init_size = dst->size(); - dst->resize(init_size + bytes, 0); - dst->push_back(static_cast(k_)); // Remember # of probes in filter - char* array = &(*dst)[init_size]; - for (size_t i = 0; i < n; i++) { - // Use double-hashing to generate a sequence of hash values. - // See analysis in [Kirsch,Mitzenmacher 2006]. - uint32_t h = BloomHash(keys[i]); - const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits - for (size_t j = 0; j < k_; j++) { - const uint32_t bitpos = h % bits; - array[bitpos/8] |= (1 << (bitpos % 8)); - h += delta; - } - } - } - - virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const { - const size_t len = bloom_filter.size(); - if (len < 2) return false; - - const char* array = bloom_filter.data(); - const size_t bits = (len - 1) * 8; - - // Use the encoded k so that we can read filters generated by - // bloom filters created using different parameters. - const size_t k = array[len-1]; - if (k > 30) { - // Reserved for potentially new encodings for short bloom filters. - // Consider it a match. - return true; - } - - uint32_t h = BloomHash(key); - const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits - for (size_t j = 0; j < k; j++) { - const uint32_t bitpos = h % bits; - if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false; - h += delta; - } - return true; - } -}; -} - -const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) { - return new BloomFilterPolicy(bits_per_key); -} - -} // namespace leveldb diff --git a/leveldb/util/bloom_test.cc b/leveldb/util/bloom_test.cc deleted file mode 100644 index 4a6ea1b..0000000 --- a/leveldb/util/bloom_test.cc +++ /dev/null @@ -1,159 +0,0 @@ -// Copyright (c) 2012 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "leveldb/filter_policy.h" - -#include "util/logging.h" -#include "util/testharness.h" -#include "util/testutil.h" - -namespace leveldb { - -static const int kVerbose = 1; - -static Slice Key(int i, char* buffer) { - memcpy(buffer, &i, sizeof(i)); - return Slice(buffer, sizeof(i)); -} - -class BloomTest { - private: - const FilterPolicy* policy_; - std::string filter_; - std::vector keys_; - - public: - BloomTest() : policy_(NewBloomFilterPolicy(10)) { } - - ~BloomTest() { - delete policy_; - } - - void Reset() { - keys_.clear(); - filter_.clear(); - } - - void Add(const Slice& s) { - keys_.push_back(s.ToString()); - } - - void Build() { - std::vector key_slices; - for (size_t i = 0; i < keys_.size(); i++) { - key_slices.push_back(Slice(keys_[i])); - } - filter_.clear(); - policy_->CreateFilter(&key_slices[0], key_slices.size(), &filter_); - keys_.clear(); - if (kVerbose >= 2) DumpFilter(); - } - - size_t FilterSize() const { - return filter_.size(); - } - - void DumpFilter() { - fprintf(stderr, "F("); - for (size_t i = 0; i+1 < filter_.size(); i++) { - const unsigned int c = static_cast(filter_[i]); - for (int j = 0; j < 8; j++) { - fprintf(stderr, "%c", (c & (1 <KeyMayMatch(s, filter_); - } - - double FalsePositiveRate() { - char buffer[sizeof(int)]; - int result = 0; - for (int i = 0; i < 10000; i++) { - if (Matches(Key(i + 1000000000, buffer))) { - result++; - } - } - return result / 10000.0; - } -}; - -TEST(BloomTest, EmptyFilter) { - ASSERT_TRUE(! Matches("hello")); - ASSERT_TRUE(! Matches("world")); -} - -TEST(BloomTest, Small) { - Add("hello"); - Add("world"); - ASSERT_TRUE(Matches("hello")); - ASSERT_TRUE(Matches("world")); - ASSERT_TRUE(! Matches("x")); - ASSERT_TRUE(! Matches("foo")); -} - -static int NextLength(int length) { - if (length < 10) { - length += 1; - } else if (length < 100) { - length += 10; - } else if (length < 1000) { - length += 100; - } else { - length += 1000; - } - return length; -} - -TEST(BloomTest, VaryingLengths) { - char buffer[sizeof(int)]; - - // Count number of filters that significantly exceed the false positive rate - int mediocre_filters = 0; - int good_filters = 0; - - for (int length = 1; length <= 10000; length = NextLength(length)) { - Reset(); - for (int i = 0; i < length; i++) { - Add(Key(i, buffer)); - } - Build(); - - ASSERT_LE(FilterSize(), (length * 10 / 8) + 40) << length; - - // All added keys must match - for (int i = 0; i < length; i++) { - ASSERT_TRUE(Matches(Key(i, buffer))) - << "Length " << length << "; key " << i; - } - - // Check false positive rate - double rate = FalsePositiveRate(); - if (kVerbose >= 1) { - fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n", - rate*100.0, length, static_cast(FilterSize())); - } - ASSERT_LE(rate, 0.02); // Must not be over 2% - if (rate > 0.0125) mediocre_filters++; // Allowed, but not too often - else good_filters++; - } - if (kVerbose >= 1) { - fprintf(stderr, "Filters: %d good, %d mediocre\n", - good_filters, mediocre_filters); - } - ASSERT_LE(mediocre_filters, good_filters/5); -} - -// Different bits-per-byte - -} // namespace leveldb - -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} diff --git a/leveldb/util/cache.cc b/leveldb/util/cache.cc deleted file mode 100644 index 24f1f63..0000000 --- a/leveldb/util/cache.cc +++ /dev/null @@ -1,328 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include -#include -#include - -#include "leveldb/cache.h" -#include "port/port.h" -#include "util/hash.h" -#include "util/mutexlock.h" - -namespace leveldb { - -Cache::~Cache() { -} - -namespace { - -// LRU cache implementation - -// An entry is a variable length heap-allocated structure. Entries -// are kept in a circular doubly linked list ordered by access time. -struct LRUHandle { - void* value; - void (*deleter)(const Slice&, void* value); - LRUHandle* next_hash; - LRUHandle* next; - LRUHandle* prev; - size_t charge; // TODO(opt): Only allow uint32_t? - size_t key_length; - uint32_t refs; - uint32_t hash; // Hash of key(); used for fast sharding and comparisons - char key_data[1]; // Beginning of key - - Slice key() const { - // For cheaper lookups, we allow a temporary Handle object - // to store a pointer to a key in "value". - if (next == this) { - return *(reinterpret_cast(value)); - } else { - return Slice(key_data, key_length); - } - } -}; - -// We provide our own simple hash table since it removes a whole bunch -// of porting hacks and is also faster than some of the built-in hash -// table implementations in some of the compiler/runtime combinations -// we have tested. E.g., readrandom speeds up by ~5% over the g++ -// 4.4.3's builtin hashtable. -class HandleTable { - public: - HandleTable() : length_(0), elems_(0), list_(NULL) { Resize(); } - ~HandleTable() { delete[] list_; } - - LRUHandle* Lookup(const Slice& key, uint32_t hash) { - return *FindPointer(key, hash); - } - - LRUHandle* Insert(LRUHandle* h) { - LRUHandle** ptr = FindPointer(h->key(), h->hash); - LRUHandle* old = *ptr; - h->next_hash = (old == NULL ? NULL : old->next_hash); - *ptr = h; - if (old == NULL) { - ++elems_; - if (elems_ > length_) { - // Since each cache entry is fairly large, we aim for a small - // average linked list length (<= 1). - Resize(); - } - } - return old; - } - - LRUHandle* Remove(const Slice& key, uint32_t hash) { - LRUHandle** ptr = FindPointer(key, hash); - LRUHandle* result = *ptr; - if (result != NULL) { - *ptr = result->next_hash; - --elems_; - } - return result; - } - - private: - // The table consists of an array of buckets where each bucket is - // a linked list of cache entries that hash into the bucket. - uint32_t length_; - uint32_t elems_; - LRUHandle** list_; - - // Return a pointer to slot that points to a cache entry that - // matches key/hash. If there is no such cache entry, return a - // pointer to the trailing slot in the corresponding linked list. - LRUHandle** FindPointer(const Slice& key, uint32_t hash) { - LRUHandle** ptr = &list_[hash & (length_ - 1)]; - while (*ptr != NULL && - ((*ptr)->hash != hash || key != (*ptr)->key())) { - ptr = &(*ptr)->next_hash; - } - return ptr; - } - - void Resize() { - uint32_t new_length = 4; - while (new_length < elems_) { - new_length *= 2; - } - LRUHandle** new_list = new LRUHandle*[new_length]; - memset(new_list, 0, sizeof(new_list[0]) * new_length); - uint32_t count = 0; - for (uint32_t i = 0; i < length_; i++) { - LRUHandle* h = list_[i]; - while (h != NULL) { - LRUHandle* next = h->next_hash; - Slice key = h->key(); - uint32_t hash = h->hash; - LRUHandle** ptr = &new_list[hash & (new_length - 1)]; - h->next_hash = *ptr; - *ptr = h; - h = next; - count++; - } - } - assert(elems_ == count); - delete[] list_; - list_ = new_list; - length_ = new_length; - } -}; - -// A single shard of sharded cache. -class LRUCache { - public: - LRUCache(); - ~LRUCache(); - - // Separate from constructor so caller can easily make an array of LRUCache - void SetCapacity(size_t capacity) { capacity_ = capacity; } - - // Like Cache methods, but with an extra "hash" parameter. - Cache::Handle* Insert(const Slice& key, uint32_t hash, - void* value, size_t charge, - void (*deleter)(const Slice& key, void* value)); - Cache::Handle* Lookup(const Slice& key, uint32_t hash); - void Release(Cache::Handle* handle); - void Erase(const Slice& key, uint32_t hash); - - private: - void LRU_Remove(LRUHandle* e); - void LRU_Append(LRUHandle* e); - void Unref(LRUHandle* e); - - // Initialized before use. - size_t capacity_; - - // mutex_ protects the following state. - port::Mutex mutex_; - size_t usage_; - uint64_t last_id_; - - // Dummy head of LRU list. - // lru.prev is newest entry, lru.next is oldest entry. - LRUHandle lru_; - - HandleTable table_; -}; - -LRUCache::LRUCache() - : usage_(0), - last_id_(0) { - // Make empty circular linked list - lru_.next = &lru_; - lru_.prev = &lru_; -} - -LRUCache::~LRUCache() { - for (LRUHandle* e = lru_.next; e != &lru_; ) { - LRUHandle* next = e->next; - assert(e->refs == 1); // Error if caller has an unreleased handle - Unref(e); - e = next; - } -} - -void LRUCache::Unref(LRUHandle* e) { - assert(e->refs > 0); - e->refs--; - if (e->refs <= 0) { - usage_ -= e->charge; - (*e->deleter)(e->key(), e->value); - free(e); - } -} - -void LRUCache::LRU_Remove(LRUHandle* e) { - e->next->prev = e->prev; - e->prev->next = e->next; -} - -void LRUCache::LRU_Append(LRUHandle* e) { - // Make "e" newest entry by inserting just before lru_ - e->next = &lru_; - e->prev = lru_.prev; - e->prev->next = e; - e->next->prev = e; -} - -Cache::Handle* LRUCache::Lookup(const Slice& key, uint32_t hash) { - MutexLock l(&mutex_); - LRUHandle* e = table_.Lookup(key, hash); - if (e != NULL) { - e->refs++; - LRU_Remove(e); - LRU_Append(e); - } - return reinterpret_cast(e); -} - -void LRUCache::Release(Cache::Handle* handle) { - MutexLock l(&mutex_); - Unref(reinterpret_cast(handle)); -} - -Cache::Handle* LRUCache::Insert( - const Slice& key, uint32_t hash, void* value, size_t charge, - void (*deleter)(const Slice& key, void* value)) { - MutexLock l(&mutex_); - - LRUHandle* e = reinterpret_cast( - malloc(sizeof(LRUHandle)-1 + key.size())); - e->value = value; - e->deleter = deleter; - e->charge = charge; - e->key_length = key.size(); - e->hash = hash; - e->refs = 2; // One from LRUCache, one for the returned handle - memcpy(e->key_data, key.data(), key.size()); - LRU_Append(e); - usage_ += charge; - - LRUHandle* old = table_.Insert(e); - if (old != NULL) { - LRU_Remove(old); - Unref(old); - } - - while (usage_ > capacity_ && lru_.next != &lru_) { - LRUHandle* old = lru_.next; - LRU_Remove(old); - table_.Remove(old->key(), old->hash); - Unref(old); - } - - return reinterpret_cast(e); -} - -void LRUCache::Erase(const Slice& key, uint32_t hash) { - MutexLock l(&mutex_); - LRUHandle* e = table_.Remove(key, hash); - if (e != NULL) { - LRU_Remove(e); - Unref(e); - } -} - -static const int kNumShardBits = 4; -static const int kNumShards = 1 << kNumShardBits; - -class ShardedLRUCache : public Cache { - private: - LRUCache shard_[kNumShards]; - port::Mutex id_mutex_; - uint64_t last_id_; - - static inline uint32_t HashSlice(const Slice& s) { - return Hash(s.data(), s.size(), 0); - } - - static uint32_t Shard(uint32_t hash) { - return hash >> (32 - kNumShardBits); - } - - public: - explicit ShardedLRUCache(size_t capacity) - : last_id_(0) { - const size_t per_shard = (capacity + (kNumShards - 1)) / kNumShards; - for (int s = 0; s < kNumShards; s++) { - shard_[s].SetCapacity(per_shard); - } - } - virtual ~ShardedLRUCache() { } - virtual Handle* Insert(const Slice& key, void* value, size_t charge, - void (*deleter)(const Slice& key, void* value)) { - const uint32_t hash = HashSlice(key); - return shard_[Shard(hash)].Insert(key, hash, value, charge, deleter); - } - virtual Handle* Lookup(const Slice& key) { - const uint32_t hash = HashSlice(key); - return shard_[Shard(hash)].Lookup(key, hash); - } - virtual void Release(Handle* handle) { - LRUHandle* h = reinterpret_cast(handle); - shard_[Shard(h->hash)].Release(handle); - } - virtual void Erase(const Slice& key) { - const uint32_t hash = HashSlice(key); - shard_[Shard(hash)].Erase(key, hash); - } - virtual void* Value(Handle* handle) { - return reinterpret_cast(handle)->value; - } - virtual uint64_t NewId() { - MutexLock l(&id_mutex_); - return ++(last_id_); - } -}; - -} // end anonymous namespace - -Cache* NewLRUCache(size_t capacity) { - return new ShardedLRUCache(capacity); -} - -} // namespace leveldb diff --git a/leveldb/util/cache_test.cc b/leveldb/util/cache_test.cc deleted file mode 100644 index 4371671..0000000 --- a/leveldb/util/cache_test.cc +++ /dev/null @@ -1,186 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "leveldb/cache.h" - -#include -#include "util/coding.h" -#include "util/testharness.h" - -namespace leveldb { - -// Conversions between numeric keys/values and the types expected by Cache. -static std::string EncodeKey(int k) { - std::string result; - PutFixed32(&result, k); - return result; -} -static int DecodeKey(const Slice& k) { - assert(k.size() == 4); - return DecodeFixed32(k.data()); -} -static void* EncodeValue(uintptr_t v) { return reinterpret_cast(v); } -static int DecodeValue(void* v) { return reinterpret_cast(v); } - -class CacheTest { - public: - static CacheTest* current_; - - static void Deleter(const Slice& key, void* v) { - current_->deleted_keys_.push_back(DecodeKey(key)); - current_->deleted_values_.push_back(DecodeValue(v)); - } - - static const int kCacheSize = 1000; - std::vector deleted_keys_; - std::vector deleted_values_; - Cache* cache_; - - CacheTest() : cache_(NewLRUCache(kCacheSize)) { - current_ = this; - } - - ~CacheTest() { - delete cache_; - } - - int Lookup(int key) { - Cache::Handle* handle = cache_->Lookup(EncodeKey(key)); - const int r = (handle == NULL) ? -1 : DecodeValue(cache_->Value(handle)); - if (handle != NULL) { - cache_->Release(handle); - } - return r; - } - - void Insert(int key, int value, int charge = 1) { - cache_->Release(cache_->Insert(EncodeKey(key), EncodeValue(value), charge, - &CacheTest::Deleter)); - } - - void Erase(int key) { - cache_->Erase(EncodeKey(key)); - } -}; -CacheTest* CacheTest::current_; - -TEST(CacheTest, HitAndMiss) { - ASSERT_EQ(-1, Lookup(100)); - - Insert(100, 101); - ASSERT_EQ(101, Lookup(100)); - ASSERT_EQ(-1, Lookup(200)); - ASSERT_EQ(-1, Lookup(300)); - - Insert(200, 201); - ASSERT_EQ(101, Lookup(100)); - ASSERT_EQ(201, Lookup(200)); - ASSERT_EQ(-1, Lookup(300)); - - Insert(100, 102); - ASSERT_EQ(102, Lookup(100)); - ASSERT_EQ(201, Lookup(200)); - ASSERT_EQ(-1, Lookup(300)); - - ASSERT_EQ(1, deleted_keys_.size()); - ASSERT_EQ(100, deleted_keys_[0]); - ASSERT_EQ(101, deleted_values_[0]); -} - -TEST(CacheTest, Erase) { - Erase(200); - ASSERT_EQ(0, deleted_keys_.size()); - - Insert(100, 101); - Insert(200, 201); - Erase(100); - ASSERT_EQ(-1, Lookup(100)); - ASSERT_EQ(201, Lookup(200)); - ASSERT_EQ(1, deleted_keys_.size()); - ASSERT_EQ(100, deleted_keys_[0]); - ASSERT_EQ(101, deleted_values_[0]); - - Erase(100); - ASSERT_EQ(-1, Lookup(100)); - ASSERT_EQ(201, Lookup(200)); - ASSERT_EQ(1, deleted_keys_.size()); -} - -TEST(CacheTest, EntriesArePinned) { - Insert(100, 101); - Cache::Handle* h1 = cache_->Lookup(EncodeKey(100)); - ASSERT_EQ(101, DecodeValue(cache_->Value(h1))); - - Insert(100, 102); - Cache::Handle* h2 = cache_->Lookup(EncodeKey(100)); - ASSERT_EQ(102, DecodeValue(cache_->Value(h2))); - ASSERT_EQ(0, deleted_keys_.size()); - - cache_->Release(h1); - ASSERT_EQ(1, deleted_keys_.size()); - ASSERT_EQ(100, deleted_keys_[0]); - ASSERT_EQ(101, deleted_values_[0]); - - Erase(100); - ASSERT_EQ(-1, Lookup(100)); - ASSERT_EQ(1, deleted_keys_.size()); - - cache_->Release(h2); - ASSERT_EQ(2, deleted_keys_.size()); - ASSERT_EQ(100, deleted_keys_[1]); - ASSERT_EQ(102, deleted_values_[1]); -} - -TEST(CacheTest, EvictionPolicy) { - Insert(100, 101); - Insert(200, 201); - - // Frequently used entry must be kept around - for (int i = 0; i < kCacheSize + 100; i++) { - Insert(1000+i, 2000+i); - ASSERT_EQ(2000+i, Lookup(1000+i)); - ASSERT_EQ(101, Lookup(100)); - } - ASSERT_EQ(101, Lookup(100)); - ASSERT_EQ(-1, Lookup(200)); -} - -TEST(CacheTest, HeavyEntries) { - // Add a bunch of light and heavy entries and then count the combined - // size of items still in the cache, which must be approximately the - // same as the total capacity. - const int kLight = 1; - const int kHeavy = 10; - int added = 0; - int index = 0; - while (added < 2*kCacheSize) { - const int weight = (index & 1) ? kLight : kHeavy; - Insert(index, 1000+index, weight); - added += weight; - index++; - } - - int cached_weight = 0; - for (int i = 0; i < index; i++) { - const int weight = (i & 1 ? kLight : kHeavy); - int r = Lookup(i); - if (r >= 0) { - cached_weight += weight; - ASSERT_EQ(1000+i, r); - } - } - ASSERT_LE(cached_weight, kCacheSize + kCacheSize/10); -} - -TEST(CacheTest, NewId) { - uint64_t a = cache_->NewId(); - uint64_t b = cache_->NewId(); - ASSERT_NE(a, b); -} - -} // namespace leveldb - -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} diff --git a/leveldb/util/coding.cc b/leveldb/util/coding.cc deleted file mode 100644 index dbd7a65..0000000 --- a/leveldb/util/coding.cc +++ /dev/null @@ -1,194 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "util/coding.h" - -namespace leveldb { - -void EncodeFixed32(char* buf, uint32_t value) { -#if __BYTE_ORDER == __LITTLE_ENDIAN - memcpy(buf, &value, sizeof(value)); -#else - buf[0] = value & 0xff; - buf[1] = (value >> 8) & 0xff; - buf[2] = (value >> 16) & 0xff; - buf[3] = (value >> 24) & 0xff; -#endif -} - -void EncodeFixed64(char* buf, uint64_t value) { -#if __BYTE_ORDER == __LITTLE_ENDIAN - memcpy(buf, &value, sizeof(value)); -#else - buf[0] = value & 0xff; - buf[1] = (value >> 8) & 0xff; - buf[2] = (value >> 16) & 0xff; - buf[3] = (value >> 24) & 0xff; - buf[4] = (value >> 32) & 0xff; - buf[5] = (value >> 40) & 0xff; - buf[6] = (value >> 48) & 0xff; - buf[7] = (value >> 56) & 0xff; -#endif -} - -void PutFixed32(std::string* dst, uint32_t value) { - char buf[sizeof(value)]; - EncodeFixed32(buf, value); - dst->append(buf, sizeof(buf)); -} - -void PutFixed64(std::string* dst, uint64_t value) { - char buf[sizeof(value)]; - EncodeFixed64(buf, value); - dst->append(buf, sizeof(buf)); -} - -char* EncodeVarint32(char* dst, uint32_t v) { - // Operate on characters as unsigneds - unsigned char* ptr = reinterpret_cast(dst); - static const int B = 128; - if (v < (1<<7)) { - *(ptr++) = v; - } else if (v < (1<<14)) { - *(ptr++) = v | B; - *(ptr++) = v>>7; - } else if (v < (1<<21)) { - *(ptr++) = v | B; - *(ptr++) = (v>>7) | B; - *(ptr++) = v>>14; - } else if (v < (1<<28)) { - *(ptr++) = v | B; - *(ptr++) = (v>>7) | B; - *(ptr++) = (v>>14) | B; - *(ptr++) = v>>21; - } else { - *(ptr++) = v | B; - *(ptr++) = (v>>7) | B; - *(ptr++) = (v>>14) | B; - *(ptr++) = (v>>21) | B; - *(ptr++) = v>>28; - } - return reinterpret_cast(ptr); -} - -void PutVarint32(std::string* dst, uint32_t v) { - char buf[5]; - char* ptr = EncodeVarint32(buf, v); - dst->append(buf, ptr - buf); -} - -char* EncodeVarint64(char* dst, uint64_t v) { - static const int B = 128; - unsigned char* ptr = reinterpret_cast(dst); - while (v >= B) { - *(ptr++) = (v & (B-1)) | B; - v >>= 7; - } - *(ptr++) = static_cast(v); - return reinterpret_cast(ptr); -} - -void PutVarint64(std::string* dst, uint64_t v) { - char buf[10]; - char* ptr = EncodeVarint64(buf, v); - dst->append(buf, ptr - buf); -} - -void PutLengthPrefixedSlice(std::string* dst, const Slice& value) { - PutVarint32(dst, value.size()); - dst->append(value.data(), value.size()); -} - -int VarintLength(uint64_t v) { - int len = 1; - while (v >= 128) { - v >>= 7; - len++; - } - return len; -} - -const char* GetVarint32PtrFallback(const char* p, - const char* limit, - uint32_t* value) { - uint32_t result = 0; - for (uint32_t shift = 0; shift <= 28 && p < limit; shift += 7) { - uint32_t byte = *(reinterpret_cast(p)); - p++; - if (byte & 128) { - // More bytes are present - result |= ((byte & 127) << shift); - } else { - result |= (byte << shift); - *value = result; - return reinterpret_cast(p); - } - } - return NULL; -} - -bool GetVarint32(Slice* input, uint32_t* value) { - const char* p = input->data(); - const char* limit = p + input->size(); - const char* q = GetVarint32Ptr(p, limit, value); - if (q == NULL) { - return false; - } else { - *input = Slice(q, limit - q); - return true; - } -} - -const char* GetVarint64Ptr(const char* p, const char* limit, uint64_t* value) { - uint64_t result = 0; - for (uint32_t shift = 0; shift <= 63 && p < limit; shift += 7) { - uint64_t byte = *(reinterpret_cast(p)); - p++; - if (byte & 128) { - // More bytes are present - result |= ((byte & 127) << shift); - } else { - result |= (byte << shift); - *value = result; - return reinterpret_cast(p); - } - } - return NULL; -} - -bool GetVarint64(Slice* input, uint64_t* value) { - const char* p = input->data(); - const char* limit = p + input->size(); - const char* q = GetVarint64Ptr(p, limit, value); - if (q == NULL) { - return false; - } else { - *input = Slice(q, limit - q); - return true; - } -} - -const char* GetLengthPrefixedSlice(const char* p, const char* limit, - Slice* result) { - uint32_t len; - p = GetVarint32Ptr(p, limit, &len); - if (p == NULL) return NULL; - if (p + len > limit) return NULL; - *result = Slice(p, len); - return p + len; -} - -bool GetLengthPrefixedSlice(Slice* input, Slice* result) { - uint32_t len; - if (GetVarint32(input, &len) && - input->size() >= len) { - *result = Slice(input->data(), len); - input->remove_prefix(len); - return true; - } else { - return false; - } -} - -} // namespace leveldb diff --git a/leveldb/util/coding.h b/leveldb/util/coding.h deleted file mode 100644 index 3993c4a..0000000 --- a/leveldb/util/coding.h +++ /dev/null @@ -1,104 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// Endian-neutral encoding: -// * Fixed-length numbers are encoded with least-significant byte first -// * In addition we support variable length "varint" encoding -// * Strings are encoded prefixed by their length in varint format - -#ifndef STORAGE_LEVELDB_UTIL_CODING_H_ -#define STORAGE_LEVELDB_UTIL_CODING_H_ - -#include -#include -#include -#include "leveldb/slice.h" -#include "port/port.h" - -namespace leveldb { - -// Standard Put... routines append to a string -extern void PutFixed32(std::string* dst, uint32_t value); -extern void PutFixed64(std::string* dst, uint64_t value); -extern void PutVarint32(std::string* dst, uint32_t value); -extern void PutVarint64(std::string* dst, uint64_t value); -extern void PutLengthPrefixedSlice(std::string* dst, const Slice& value); - -// Standard Get... routines parse a value from the beginning of a Slice -// and advance the slice past the parsed value. -extern bool GetVarint32(Slice* input, uint32_t* value); -extern bool GetVarint64(Slice* input, uint64_t* value); -extern bool GetLengthPrefixedSlice(Slice* input, Slice* result); - -// Pointer-based variants of GetVarint... These either store a value -// in *v and return a pointer just past the parsed value, or return -// NULL on error. These routines only look at bytes in the range -// [p..limit-1] -extern const char* GetVarint32Ptr(const char* p,const char* limit, uint32_t* v); -extern const char* GetVarint64Ptr(const char* p,const char* limit, uint64_t* v); - -// Returns the length of the varint32 or varint64 encoding of "v" -extern int VarintLength(uint64_t v); - -// Lower-level versions of Put... that write directly into a character buffer -// REQUIRES: dst has enough space for the value being written -extern void EncodeFixed32(char* dst, uint32_t value); -extern void EncodeFixed64(char* dst, uint64_t value); - -// Lower-level versions of Put... that write directly into a character buffer -// and return a pointer just past the last byte written. -// REQUIRES: dst has enough space for the value being written -extern char* EncodeVarint32(char* dst, uint32_t value); -extern char* EncodeVarint64(char* dst, uint64_t value); - -// Lower-level versions of Get... that read directly from a character buffer -// without any bounds checking. - -inline uint32_t DecodeFixed32(const char* ptr) { - if (port::kLittleEndian) { - // Load the raw bytes - uint32_t result; - memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load - return result; - } else { - return ((static_cast(static_cast(ptr[0]))) - | (static_cast(static_cast(ptr[1])) << 8) - | (static_cast(static_cast(ptr[2])) << 16) - | (static_cast(static_cast(ptr[3])) << 24)); - } -} - -inline uint64_t DecodeFixed64(const char* ptr) { - if (port::kLittleEndian) { - // Load the raw bytes - uint64_t result; - memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load - return result; - } else { - uint64_t lo = DecodeFixed32(ptr); - uint64_t hi = DecodeFixed32(ptr + 4); - return (hi << 32) | lo; - } -} - -// Internal routine for use by fallback path of GetVarint32Ptr -extern const char* GetVarint32PtrFallback(const char* p, - const char* limit, - uint32_t* value); -inline const char* GetVarint32Ptr(const char* p, - const char* limit, - uint32_t* value) { - if (p < limit) { - uint32_t result = *(reinterpret_cast(p)); - if ((result & 128) == 0) { - *value = result; - return p + 1; - } - } - return GetVarint32PtrFallback(p, limit, value); -} - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_UTIL_CODING_H_ diff --git a/leveldb/util/coding_test.cc b/leveldb/util/coding_test.cc deleted file mode 100644 index 2c52b17..0000000 --- a/leveldb/util/coding_test.cc +++ /dev/null @@ -1,196 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "util/coding.h" - -#include "util/testharness.h" - -namespace leveldb { - -class Coding { }; - -TEST(Coding, Fixed32) { - std::string s; - for (uint32_t v = 0; v < 100000; v++) { - PutFixed32(&s, v); - } - - const char* p = s.data(); - for (uint32_t v = 0; v < 100000; v++) { - uint32_t actual = DecodeFixed32(p); - ASSERT_EQ(v, actual); - p += sizeof(uint32_t); - } -} - -TEST(Coding, Fixed64) { - std::string s; - for (int power = 0; power <= 63; power++) { - uint64_t v = static_cast(1) << power; - PutFixed64(&s, v - 1); - PutFixed64(&s, v + 0); - PutFixed64(&s, v + 1); - } - - const char* p = s.data(); - for (int power = 0; power <= 63; power++) { - uint64_t v = static_cast(1) << power; - uint64_t actual; - actual = DecodeFixed64(p); - ASSERT_EQ(v-1, actual); - p += sizeof(uint64_t); - - actual = DecodeFixed64(p); - ASSERT_EQ(v+0, actual); - p += sizeof(uint64_t); - - actual = DecodeFixed64(p); - ASSERT_EQ(v+1, actual); - p += sizeof(uint64_t); - } -} - -// Test that encoding routines generate little-endian encodings -TEST(Coding, EncodingOutput) { - std::string dst; - PutFixed32(&dst, 0x04030201); - ASSERT_EQ(4, dst.size()); - ASSERT_EQ(0x01, static_cast(dst[0])); - ASSERT_EQ(0x02, static_cast(dst[1])); - ASSERT_EQ(0x03, static_cast(dst[2])); - ASSERT_EQ(0x04, static_cast(dst[3])); - - dst.clear(); - PutFixed64(&dst, 0x0807060504030201ull); - ASSERT_EQ(8, dst.size()); - ASSERT_EQ(0x01, static_cast(dst[0])); - ASSERT_EQ(0x02, static_cast(dst[1])); - ASSERT_EQ(0x03, static_cast(dst[2])); - ASSERT_EQ(0x04, static_cast(dst[3])); - ASSERT_EQ(0x05, static_cast(dst[4])); - ASSERT_EQ(0x06, static_cast(dst[5])); - ASSERT_EQ(0x07, static_cast(dst[6])); - ASSERT_EQ(0x08, static_cast(dst[7])); -} - -TEST(Coding, Varint32) { - std::string s; - for (uint32_t i = 0; i < (32 * 32); i++) { - uint32_t v = (i / 32) << (i % 32); - PutVarint32(&s, v); - } - - const char* p = s.data(); - const char* limit = p + s.size(); - for (uint32_t i = 0; i < (32 * 32); i++) { - uint32_t expected = (i / 32) << (i % 32); - uint32_t actual; - const char* start = p; - p = GetVarint32Ptr(p, limit, &actual); - ASSERT_TRUE(p != NULL); - ASSERT_EQ(expected, actual); - ASSERT_EQ(VarintLength(actual), p - start); - } - ASSERT_EQ(p, s.data() + s.size()); -} - -TEST(Coding, Varint64) { - // Construct the list of values to check - std::vector values; - // Some special values - values.push_back(0); - values.push_back(100); - values.push_back(~static_cast(0)); - values.push_back(~static_cast(0) - 1); - for (uint32_t k = 0; k < 64; k++) { - // Test values near powers of two - const uint64_t power = 1ull << k; - values.push_back(power); - values.push_back(power-1); - values.push_back(power+1); - }; - - std::string s; - for (int i = 0; i < values.size(); i++) { - PutVarint64(&s, values[i]); - } - - const char* p = s.data(); - const char* limit = p + s.size(); - for (int i = 0; i < values.size(); i++) { - ASSERT_TRUE(p < limit); - uint64_t actual; - const char* start = p; - p = GetVarint64Ptr(p, limit, &actual); - ASSERT_TRUE(p != NULL); - ASSERT_EQ(values[i], actual); - ASSERT_EQ(VarintLength(actual), p - start); - } - ASSERT_EQ(p, limit); - -} - -TEST(Coding, Varint32Overflow) { - uint32_t result; - std::string input("\x81\x82\x83\x84\x85\x11"); - ASSERT_TRUE(GetVarint32Ptr(input.data(), input.data() + input.size(), &result) - == NULL); -} - -TEST(Coding, Varint32Truncation) { - uint32_t large_value = (1u << 31) + 100; - std::string s; - PutVarint32(&s, large_value); - uint32_t result; - for (int len = 0; len < s.size() - 1; len++) { - ASSERT_TRUE(GetVarint32Ptr(s.data(), s.data() + len, &result) == NULL); - } - ASSERT_TRUE(GetVarint32Ptr(s.data(), s.data() + s.size(), &result) != NULL); - ASSERT_EQ(large_value, result); -} - -TEST(Coding, Varint64Overflow) { - uint64_t result; - std::string input("\x81\x82\x83\x84\x85\x81\x82\x83\x84\x85\x11"); - ASSERT_TRUE(GetVarint64Ptr(input.data(), input.data() + input.size(), &result) - == NULL); -} - -TEST(Coding, Varint64Truncation) { - uint64_t large_value = (1ull << 63) + 100ull; - std::string s; - PutVarint64(&s, large_value); - uint64_t result; - for (int len = 0; len < s.size() - 1; len++) { - ASSERT_TRUE(GetVarint64Ptr(s.data(), s.data() + len, &result) == NULL); - } - ASSERT_TRUE(GetVarint64Ptr(s.data(), s.data() + s.size(), &result) != NULL); - ASSERT_EQ(large_value, result); -} - -TEST(Coding, Strings) { - std::string s; - PutLengthPrefixedSlice(&s, Slice("")); - PutLengthPrefixedSlice(&s, Slice("foo")); - PutLengthPrefixedSlice(&s, Slice("bar")); - PutLengthPrefixedSlice(&s, Slice(std::string(200, 'x'))); - - Slice input(s); - Slice v; - ASSERT_TRUE(GetLengthPrefixedSlice(&input, &v)); - ASSERT_EQ("", v.ToString()); - ASSERT_TRUE(GetLengthPrefixedSlice(&input, &v)); - ASSERT_EQ("foo", v.ToString()); - ASSERT_TRUE(GetLengthPrefixedSlice(&input, &v)); - ASSERT_EQ("bar", v.ToString()); - ASSERT_TRUE(GetLengthPrefixedSlice(&input, &v)); - ASSERT_EQ(std::string(200, 'x'), v.ToString()); - ASSERT_EQ("", input.ToString()); -} - -} // namespace leveldb - -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} diff --git a/leveldb/util/comparator.cc b/leveldb/util/comparator.cc deleted file mode 100644 index 4b7b572..0000000 --- a/leveldb/util/comparator.cc +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include -#include -#include "leveldb/comparator.h" -#include "leveldb/slice.h" -#include "port/port.h" -#include "util/logging.h" - -namespace leveldb { - -Comparator::~Comparator() { } - -namespace { -class BytewiseComparatorImpl : public Comparator { - public: - BytewiseComparatorImpl() { } - - virtual const char* Name() const { - return "leveldb.BytewiseComparator"; - } - - virtual int Compare(const Slice& a, const Slice& b) const { - return a.compare(b); - } - - virtual void FindShortestSeparator( - std::string* start, - const Slice& limit) const { - // Find length of common prefix - size_t min_length = std::min(start->size(), limit.size()); - size_t diff_index = 0; - while ((diff_index < min_length) && - ((*start)[diff_index] == limit[diff_index])) { - diff_index++; - } - - if (diff_index >= min_length) { - // Do not shorten if one string is a prefix of the other - } else { - uint8_t diff_byte = static_cast((*start)[diff_index]); - if (diff_byte < static_cast(0xff) && - diff_byte + 1 < static_cast(limit[diff_index])) { - (*start)[diff_index]++; - start->resize(diff_index + 1); - assert(Compare(*start, limit) < 0); - } - } - } - - virtual void FindShortSuccessor(std::string* key) const { - // Find first character that can be incremented - size_t n = key->size(); - for (size_t i = 0; i < n; i++) { - const uint8_t byte = (*key)[i]; - if (byte != static_cast(0xff)) { - (*key)[i] = byte + 1; - key->resize(i+1); - return; - } - } - // *key is a run of 0xffs. Leave it alone. - } -}; -} // namespace - -static port::OnceType once = LEVELDB_ONCE_INIT; -static const Comparator* bytewise; - -static void InitModule() { - bytewise = new BytewiseComparatorImpl; -} - -const Comparator* BytewiseComparator() { - port::InitOnce(&once, InitModule); - return bytewise; -} - -} // namespace leveldb diff --git a/leveldb/util/crc32c.cc b/leveldb/util/crc32c.cc deleted file mode 100644 index 6db9e77..0000000 --- a/leveldb/util/crc32c.cc +++ /dev/null @@ -1,332 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// A portable implementation of crc32c, optimized to handle -// four bytes at a time. - -#include "util/crc32c.h" - -#include -#include "util/coding.h" - -namespace leveldb { -namespace crc32c { - -static const uint32_t table0_[256] = { - 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, - 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb, - 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, - 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24, - 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, - 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384, - 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, - 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b, - 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, - 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35, - 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, - 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa, - 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, - 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a, - 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, - 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595, - 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, - 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957, - 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, - 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198, - 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, - 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38, - 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, - 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7, - 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, - 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789, - 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, - 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46, - 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, - 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6, - 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, - 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829, - 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, - 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93, - 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, - 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c, - 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, - 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc, - 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, - 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, - 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, - 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d, - 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, - 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982, - 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, - 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622, - 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, - 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed, - 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, - 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f, - 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, - 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, - 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, - 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540, - 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, - 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f, - 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, - 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1, - 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, - 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e, - 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, - 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e, - 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, - 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351 -}; -static const uint32_t table1_[256] = { - 0x00000000, 0x13a29877, 0x274530ee, 0x34e7a899, - 0x4e8a61dc, 0x5d28f9ab, 0x69cf5132, 0x7a6dc945, - 0x9d14c3b8, 0x8eb65bcf, 0xba51f356, 0xa9f36b21, - 0xd39ea264, 0xc03c3a13, 0xf4db928a, 0xe7790afd, - 0x3fc5f181, 0x2c6769f6, 0x1880c16f, 0x0b225918, - 0x714f905d, 0x62ed082a, 0x560aa0b3, 0x45a838c4, - 0xa2d13239, 0xb173aa4e, 0x859402d7, 0x96369aa0, - 0xec5b53e5, 0xfff9cb92, 0xcb1e630b, 0xd8bcfb7c, - 0x7f8be302, 0x6c297b75, 0x58ced3ec, 0x4b6c4b9b, - 0x310182de, 0x22a31aa9, 0x1644b230, 0x05e62a47, - 0xe29f20ba, 0xf13db8cd, 0xc5da1054, 0xd6788823, - 0xac154166, 0xbfb7d911, 0x8b507188, 0x98f2e9ff, - 0x404e1283, 0x53ec8af4, 0x670b226d, 0x74a9ba1a, - 0x0ec4735f, 0x1d66eb28, 0x298143b1, 0x3a23dbc6, - 0xdd5ad13b, 0xcef8494c, 0xfa1fe1d5, 0xe9bd79a2, - 0x93d0b0e7, 0x80722890, 0xb4958009, 0xa737187e, - 0xff17c604, 0xecb55e73, 0xd852f6ea, 0xcbf06e9d, - 0xb19da7d8, 0xa23f3faf, 0x96d89736, 0x857a0f41, - 0x620305bc, 0x71a19dcb, 0x45463552, 0x56e4ad25, - 0x2c896460, 0x3f2bfc17, 0x0bcc548e, 0x186eccf9, - 0xc0d23785, 0xd370aff2, 0xe797076b, 0xf4359f1c, - 0x8e585659, 0x9dface2e, 0xa91d66b7, 0xbabffec0, - 0x5dc6f43d, 0x4e646c4a, 0x7a83c4d3, 0x69215ca4, - 0x134c95e1, 0x00ee0d96, 0x3409a50f, 0x27ab3d78, - 0x809c2506, 0x933ebd71, 0xa7d915e8, 0xb47b8d9f, - 0xce1644da, 0xddb4dcad, 0xe9537434, 0xfaf1ec43, - 0x1d88e6be, 0x0e2a7ec9, 0x3acdd650, 0x296f4e27, - 0x53028762, 0x40a01f15, 0x7447b78c, 0x67e52ffb, - 0xbf59d487, 0xacfb4cf0, 0x981ce469, 0x8bbe7c1e, - 0xf1d3b55b, 0xe2712d2c, 0xd69685b5, 0xc5341dc2, - 0x224d173f, 0x31ef8f48, 0x050827d1, 0x16aabfa6, - 0x6cc776e3, 0x7f65ee94, 0x4b82460d, 0x5820de7a, - 0xfbc3faf9, 0xe861628e, 0xdc86ca17, 0xcf245260, - 0xb5499b25, 0xa6eb0352, 0x920cabcb, 0x81ae33bc, - 0x66d73941, 0x7575a136, 0x419209af, 0x523091d8, - 0x285d589d, 0x3bffc0ea, 0x0f186873, 0x1cbaf004, - 0xc4060b78, 0xd7a4930f, 0xe3433b96, 0xf0e1a3e1, - 0x8a8c6aa4, 0x992ef2d3, 0xadc95a4a, 0xbe6bc23d, - 0x5912c8c0, 0x4ab050b7, 0x7e57f82e, 0x6df56059, - 0x1798a91c, 0x043a316b, 0x30dd99f2, 0x237f0185, - 0x844819fb, 0x97ea818c, 0xa30d2915, 0xb0afb162, - 0xcac27827, 0xd960e050, 0xed8748c9, 0xfe25d0be, - 0x195cda43, 0x0afe4234, 0x3e19eaad, 0x2dbb72da, - 0x57d6bb9f, 0x447423e8, 0x70938b71, 0x63311306, - 0xbb8de87a, 0xa82f700d, 0x9cc8d894, 0x8f6a40e3, - 0xf50789a6, 0xe6a511d1, 0xd242b948, 0xc1e0213f, - 0x26992bc2, 0x353bb3b5, 0x01dc1b2c, 0x127e835b, - 0x68134a1e, 0x7bb1d269, 0x4f567af0, 0x5cf4e287, - 0x04d43cfd, 0x1776a48a, 0x23910c13, 0x30339464, - 0x4a5e5d21, 0x59fcc556, 0x6d1b6dcf, 0x7eb9f5b8, - 0x99c0ff45, 0x8a626732, 0xbe85cfab, 0xad2757dc, - 0xd74a9e99, 0xc4e806ee, 0xf00fae77, 0xe3ad3600, - 0x3b11cd7c, 0x28b3550b, 0x1c54fd92, 0x0ff665e5, - 0x759baca0, 0x663934d7, 0x52de9c4e, 0x417c0439, - 0xa6050ec4, 0xb5a796b3, 0x81403e2a, 0x92e2a65d, - 0xe88f6f18, 0xfb2df76f, 0xcfca5ff6, 0xdc68c781, - 0x7b5fdfff, 0x68fd4788, 0x5c1aef11, 0x4fb87766, - 0x35d5be23, 0x26772654, 0x12908ecd, 0x013216ba, - 0xe64b1c47, 0xf5e98430, 0xc10e2ca9, 0xd2acb4de, - 0xa8c17d9b, 0xbb63e5ec, 0x8f844d75, 0x9c26d502, - 0x449a2e7e, 0x5738b609, 0x63df1e90, 0x707d86e7, - 0x0a104fa2, 0x19b2d7d5, 0x2d557f4c, 0x3ef7e73b, - 0xd98eedc6, 0xca2c75b1, 0xfecbdd28, 0xed69455f, - 0x97048c1a, 0x84a6146d, 0xb041bcf4, 0xa3e32483 -}; -static const uint32_t table2_[256] = { - 0x00000000, 0xa541927e, 0x4f6f520d, 0xea2ec073, - 0x9edea41a, 0x3b9f3664, 0xd1b1f617, 0x74f06469, - 0x38513ec5, 0x9d10acbb, 0x773e6cc8, 0xd27ffeb6, - 0xa68f9adf, 0x03ce08a1, 0xe9e0c8d2, 0x4ca15aac, - 0x70a27d8a, 0xd5e3eff4, 0x3fcd2f87, 0x9a8cbdf9, - 0xee7cd990, 0x4b3d4bee, 0xa1138b9d, 0x045219e3, - 0x48f3434f, 0xedb2d131, 0x079c1142, 0xa2dd833c, - 0xd62de755, 0x736c752b, 0x9942b558, 0x3c032726, - 0xe144fb14, 0x4405696a, 0xae2ba919, 0x0b6a3b67, - 0x7f9a5f0e, 0xdadbcd70, 0x30f50d03, 0x95b49f7d, - 0xd915c5d1, 0x7c5457af, 0x967a97dc, 0x333b05a2, - 0x47cb61cb, 0xe28af3b5, 0x08a433c6, 0xade5a1b8, - 0x91e6869e, 0x34a714e0, 0xde89d493, 0x7bc846ed, - 0x0f382284, 0xaa79b0fa, 0x40577089, 0xe516e2f7, - 0xa9b7b85b, 0x0cf62a25, 0xe6d8ea56, 0x43997828, - 0x37691c41, 0x92288e3f, 0x78064e4c, 0xdd47dc32, - 0xc76580d9, 0x622412a7, 0x880ad2d4, 0x2d4b40aa, - 0x59bb24c3, 0xfcfab6bd, 0x16d476ce, 0xb395e4b0, - 0xff34be1c, 0x5a752c62, 0xb05bec11, 0x151a7e6f, - 0x61ea1a06, 0xc4ab8878, 0x2e85480b, 0x8bc4da75, - 0xb7c7fd53, 0x12866f2d, 0xf8a8af5e, 0x5de93d20, - 0x29195949, 0x8c58cb37, 0x66760b44, 0xc337993a, - 0x8f96c396, 0x2ad751e8, 0xc0f9919b, 0x65b803e5, - 0x1148678c, 0xb409f5f2, 0x5e273581, 0xfb66a7ff, - 0x26217bcd, 0x8360e9b3, 0x694e29c0, 0xcc0fbbbe, - 0xb8ffdfd7, 0x1dbe4da9, 0xf7908dda, 0x52d11fa4, - 0x1e704508, 0xbb31d776, 0x511f1705, 0xf45e857b, - 0x80aee112, 0x25ef736c, 0xcfc1b31f, 0x6a802161, - 0x56830647, 0xf3c29439, 0x19ec544a, 0xbcadc634, - 0xc85da25d, 0x6d1c3023, 0x8732f050, 0x2273622e, - 0x6ed23882, 0xcb93aafc, 0x21bd6a8f, 0x84fcf8f1, - 0xf00c9c98, 0x554d0ee6, 0xbf63ce95, 0x1a225ceb, - 0x8b277743, 0x2e66e53d, 0xc448254e, 0x6109b730, - 0x15f9d359, 0xb0b84127, 0x5a968154, 0xffd7132a, - 0xb3764986, 0x1637dbf8, 0xfc191b8b, 0x595889f5, - 0x2da8ed9c, 0x88e97fe2, 0x62c7bf91, 0xc7862def, - 0xfb850ac9, 0x5ec498b7, 0xb4ea58c4, 0x11abcaba, - 0x655baed3, 0xc01a3cad, 0x2a34fcde, 0x8f756ea0, - 0xc3d4340c, 0x6695a672, 0x8cbb6601, 0x29faf47f, - 0x5d0a9016, 0xf84b0268, 0x1265c21b, 0xb7245065, - 0x6a638c57, 0xcf221e29, 0x250cde5a, 0x804d4c24, - 0xf4bd284d, 0x51fcba33, 0xbbd27a40, 0x1e93e83e, - 0x5232b292, 0xf77320ec, 0x1d5de09f, 0xb81c72e1, - 0xccec1688, 0x69ad84f6, 0x83834485, 0x26c2d6fb, - 0x1ac1f1dd, 0xbf8063a3, 0x55aea3d0, 0xf0ef31ae, - 0x841f55c7, 0x215ec7b9, 0xcb7007ca, 0x6e3195b4, - 0x2290cf18, 0x87d15d66, 0x6dff9d15, 0xc8be0f6b, - 0xbc4e6b02, 0x190ff97c, 0xf321390f, 0x5660ab71, - 0x4c42f79a, 0xe90365e4, 0x032da597, 0xa66c37e9, - 0xd29c5380, 0x77ddc1fe, 0x9df3018d, 0x38b293f3, - 0x7413c95f, 0xd1525b21, 0x3b7c9b52, 0x9e3d092c, - 0xeacd6d45, 0x4f8cff3b, 0xa5a23f48, 0x00e3ad36, - 0x3ce08a10, 0x99a1186e, 0x738fd81d, 0xd6ce4a63, - 0xa23e2e0a, 0x077fbc74, 0xed517c07, 0x4810ee79, - 0x04b1b4d5, 0xa1f026ab, 0x4bdee6d8, 0xee9f74a6, - 0x9a6f10cf, 0x3f2e82b1, 0xd50042c2, 0x7041d0bc, - 0xad060c8e, 0x08479ef0, 0xe2695e83, 0x4728ccfd, - 0x33d8a894, 0x96993aea, 0x7cb7fa99, 0xd9f668e7, - 0x9557324b, 0x3016a035, 0xda386046, 0x7f79f238, - 0x0b899651, 0xaec8042f, 0x44e6c45c, 0xe1a75622, - 0xdda47104, 0x78e5e37a, 0x92cb2309, 0x378ab177, - 0x437ad51e, 0xe63b4760, 0x0c158713, 0xa954156d, - 0xe5f54fc1, 0x40b4ddbf, 0xaa9a1dcc, 0x0fdb8fb2, - 0x7b2bebdb, 0xde6a79a5, 0x3444b9d6, 0x91052ba8 -}; -static const uint32_t table3_[256] = { - 0x00000000, 0xdd45aab8, 0xbf672381, 0x62228939, - 0x7b2231f3, 0xa6679b4b, 0xc4451272, 0x1900b8ca, - 0xf64463e6, 0x2b01c95e, 0x49234067, 0x9466eadf, - 0x8d665215, 0x5023f8ad, 0x32017194, 0xef44db2c, - 0xe964b13d, 0x34211b85, 0x560392bc, 0x8b463804, - 0x924680ce, 0x4f032a76, 0x2d21a34f, 0xf06409f7, - 0x1f20d2db, 0xc2657863, 0xa047f15a, 0x7d025be2, - 0x6402e328, 0xb9474990, 0xdb65c0a9, 0x06206a11, - 0xd725148b, 0x0a60be33, 0x6842370a, 0xb5079db2, - 0xac072578, 0x71428fc0, 0x136006f9, 0xce25ac41, - 0x2161776d, 0xfc24ddd5, 0x9e0654ec, 0x4343fe54, - 0x5a43469e, 0x8706ec26, 0xe524651f, 0x3861cfa7, - 0x3e41a5b6, 0xe3040f0e, 0x81268637, 0x5c632c8f, - 0x45639445, 0x98263efd, 0xfa04b7c4, 0x27411d7c, - 0xc805c650, 0x15406ce8, 0x7762e5d1, 0xaa274f69, - 0xb327f7a3, 0x6e625d1b, 0x0c40d422, 0xd1057e9a, - 0xaba65fe7, 0x76e3f55f, 0x14c17c66, 0xc984d6de, - 0xd0846e14, 0x0dc1c4ac, 0x6fe34d95, 0xb2a6e72d, - 0x5de23c01, 0x80a796b9, 0xe2851f80, 0x3fc0b538, - 0x26c00df2, 0xfb85a74a, 0x99a72e73, 0x44e284cb, - 0x42c2eeda, 0x9f874462, 0xfda5cd5b, 0x20e067e3, - 0x39e0df29, 0xe4a57591, 0x8687fca8, 0x5bc25610, - 0xb4868d3c, 0x69c32784, 0x0be1aebd, 0xd6a40405, - 0xcfa4bccf, 0x12e11677, 0x70c39f4e, 0xad8635f6, - 0x7c834b6c, 0xa1c6e1d4, 0xc3e468ed, 0x1ea1c255, - 0x07a17a9f, 0xdae4d027, 0xb8c6591e, 0x6583f3a6, - 0x8ac7288a, 0x57828232, 0x35a00b0b, 0xe8e5a1b3, - 0xf1e51979, 0x2ca0b3c1, 0x4e823af8, 0x93c79040, - 0x95e7fa51, 0x48a250e9, 0x2a80d9d0, 0xf7c57368, - 0xeec5cba2, 0x3380611a, 0x51a2e823, 0x8ce7429b, - 0x63a399b7, 0xbee6330f, 0xdcc4ba36, 0x0181108e, - 0x1881a844, 0xc5c402fc, 0xa7e68bc5, 0x7aa3217d, - 0x52a0c93f, 0x8fe56387, 0xedc7eabe, 0x30824006, - 0x2982f8cc, 0xf4c75274, 0x96e5db4d, 0x4ba071f5, - 0xa4e4aad9, 0x79a10061, 0x1b838958, 0xc6c623e0, - 0xdfc69b2a, 0x02833192, 0x60a1b8ab, 0xbde41213, - 0xbbc47802, 0x6681d2ba, 0x04a35b83, 0xd9e6f13b, - 0xc0e649f1, 0x1da3e349, 0x7f816a70, 0xa2c4c0c8, - 0x4d801be4, 0x90c5b15c, 0xf2e73865, 0x2fa292dd, - 0x36a22a17, 0xebe780af, 0x89c50996, 0x5480a32e, - 0x8585ddb4, 0x58c0770c, 0x3ae2fe35, 0xe7a7548d, - 0xfea7ec47, 0x23e246ff, 0x41c0cfc6, 0x9c85657e, - 0x73c1be52, 0xae8414ea, 0xcca69dd3, 0x11e3376b, - 0x08e38fa1, 0xd5a62519, 0xb784ac20, 0x6ac10698, - 0x6ce16c89, 0xb1a4c631, 0xd3864f08, 0x0ec3e5b0, - 0x17c35d7a, 0xca86f7c2, 0xa8a47efb, 0x75e1d443, - 0x9aa50f6f, 0x47e0a5d7, 0x25c22cee, 0xf8878656, - 0xe1873e9c, 0x3cc29424, 0x5ee01d1d, 0x83a5b7a5, - 0xf90696d8, 0x24433c60, 0x4661b559, 0x9b241fe1, - 0x8224a72b, 0x5f610d93, 0x3d4384aa, 0xe0062e12, - 0x0f42f53e, 0xd2075f86, 0xb025d6bf, 0x6d607c07, - 0x7460c4cd, 0xa9256e75, 0xcb07e74c, 0x16424df4, - 0x106227e5, 0xcd278d5d, 0xaf050464, 0x7240aedc, - 0x6b401616, 0xb605bcae, 0xd4273597, 0x09629f2f, - 0xe6264403, 0x3b63eebb, 0x59416782, 0x8404cd3a, - 0x9d0475f0, 0x4041df48, 0x22635671, 0xff26fcc9, - 0x2e238253, 0xf36628eb, 0x9144a1d2, 0x4c010b6a, - 0x5501b3a0, 0x88441918, 0xea669021, 0x37233a99, - 0xd867e1b5, 0x05224b0d, 0x6700c234, 0xba45688c, - 0xa345d046, 0x7e007afe, 0x1c22f3c7, 0xc167597f, - 0xc747336e, 0x1a0299d6, 0x782010ef, 0xa565ba57, - 0xbc65029d, 0x6120a825, 0x0302211c, 0xde478ba4, - 0x31035088, 0xec46fa30, 0x8e647309, 0x5321d9b1, - 0x4a21617b, 0x9764cbc3, 0xf54642fa, 0x2803e842 -}; - -// Used to fetch a naturally-aligned 32-bit word in little endian byte-order -static inline uint32_t LE_LOAD32(const uint8_t *p) { - return DecodeFixed32(reinterpret_cast(p)); -} - -uint32_t Extend(uint32_t crc, const char* buf, size_t size) { - const uint8_t *p = reinterpret_cast(buf); - const uint8_t *e = p + size; - uint32_t l = crc ^ 0xffffffffu; - -#define STEP1 do { \ - int c = (l & 0xff) ^ *p++; \ - l = table0_[c] ^ (l >> 8); \ -} while (0) -#define STEP4 do { \ - uint32_t c = l ^ LE_LOAD32(p); \ - p += 4; \ - l = table3_[c & 0xff] ^ \ - table2_[(c >> 8) & 0xff] ^ \ - table1_[(c >> 16) & 0xff] ^ \ - table0_[c >> 24]; \ -} while (0) - - // Point x at first 4-byte aligned byte in string. This might be - // just past the end of the string. - const uintptr_t pval = reinterpret_cast(p); - const uint8_t* x = reinterpret_cast(((pval + 3) >> 2) << 2); - if (x <= e) { - // Process bytes until finished or p is 4-byte aligned - while (p != x) { - STEP1; - } - } - // Process bytes 16 at a time - while ((e-p) >= 16) { - STEP4; STEP4; STEP4; STEP4; - } - // Process bytes 4 at a time - while ((e-p) >= 4) { - STEP4; - } - // Process the last few bytes - while (p != e) { - STEP1; - } -#undef STEP4 -#undef STEP1 - return l ^ 0xffffffffu; -} - -} // namespace crc32c -} // namespace leveldb diff --git a/leveldb/util/crc32c.h b/leveldb/util/crc32c.h deleted file mode 100644 index 1d7e5c0..0000000 --- a/leveldb/util/crc32c.h +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_UTIL_CRC32C_H_ -#define STORAGE_LEVELDB_UTIL_CRC32C_H_ - -#include -#include - -namespace leveldb { -namespace crc32c { - -// Return the crc32c of concat(A, data[0,n-1]) where init_crc is the -// crc32c of some string A. Extend() is often used to maintain the -// crc32c of a stream of data. -extern uint32_t Extend(uint32_t init_crc, const char* data, size_t n); - -// Return the crc32c of data[0,n-1] -inline uint32_t Value(const char* data, size_t n) { - return Extend(0, data, n); -} - -static const uint32_t kMaskDelta = 0xa282ead8ul; - -// Return a masked representation of crc. -// -// Motivation: it is problematic to compute the CRC of a string that -// contains embedded CRCs. Therefore we recommend that CRCs stored -// somewhere (e.g., in files) should be masked before being stored. -inline uint32_t Mask(uint32_t crc) { - // Rotate right by 15 bits and add a constant. - return ((crc >> 15) | (crc << 17)) + kMaskDelta; -} - -// Return the crc whose masked representation is masked_crc. -inline uint32_t Unmask(uint32_t masked_crc) { - uint32_t rot = masked_crc - kMaskDelta; - return ((rot >> 17) | (rot << 15)); -} - -} // namespace crc32c -} // namespace leveldb - -#endif // STORAGE_LEVELDB_UTIL_CRC32C_H_ diff --git a/leveldb/util/crc32c_test.cc b/leveldb/util/crc32c_test.cc deleted file mode 100644 index 4b957ee..0000000 --- a/leveldb/util/crc32c_test.cc +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "util/crc32c.h" -#include "util/testharness.h" - -namespace leveldb { -namespace crc32c { - -class CRC { }; - -TEST(CRC, StandardResults) { - // From rfc3720 section B.4. - char buf[32]; - - memset(buf, 0, sizeof(buf)); - ASSERT_EQ(0x8a9136aa, Value(buf, sizeof(buf))); - - memset(buf, 0xff, sizeof(buf)); - ASSERT_EQ(0x62a8ab43, Value(buf, sizeof(buf))); - - for (int i = 0; i < 32; i++) { - buf[i] = i; - } - ASSERT_EQ(0x46dd794e, Value(buf, sizeof(buf))); - - for (int i = 0; i < 32; i++) { - buf[i] = 31 - i; - } - ASSERT_EQ(0x113fdb5c, Value(buf, sizeof(buf))); - - unsigned char data[48] = { - 0x01, 0xc0, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x04, 0x00, - 0x00, 0x00, 0x00, 0x14, - 0x00, 0x00, 0x00, 0x18, - 0x28, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, - }; - ASSERT_EQ(0xd9963a56, Value(reinterpret_cast(data), sizeof(data))); -} - -TEST(CRC, Values) { - ASSERT_NE(Value("a", 1), Value("foo", 3)); -} - -TEST(CRC, Extend) { - ASSERT_EQ(Value("hello world", 11), - Extend(Value("hello ", 6), "world", 5)); -} - -TEST(CRC, Mask) { - uint32_t crc = Value("foo", 3); - ASSERT_NE(crc, Mask(crc)); - ASSERT_NE(crc, Mask(Mask(crc))); - ASSERT_EQ(crc, Unmask(Mask(crc))); - ASSERT_EQ(crc, Unmask(Unmask(Mask(Mask(crc))))); -} - -} // namespace crc32c -} // namespace leveldb - -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} diff --git a/leveldb/util/env.cc b/leveldb/util/env.cc deleted file mode 100644 index c2600e9..0000000 --- a/leveldb/util/env.cc +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "leveldb/env.h" - -namespace leveldb { - -Env::~Env() { -} - -SequentialFile::~SequentialFile() { -} - -RandomAccessFile::~RandomAccessFile() { -} - -WritableFile::~WritableFile() { -} - -Logger::~Logger() { -} - -FileLock::~FileLock() { -} - -void Log(Logger* info_log, const char* format, ...) { - if (info_log != NULL) { - va_list ap; - va_start(ap, format); - info_log->Logv(format, ap); - va_end(ap); - } -} - -static Status DoWriteStringToFile(Env* env, const Slice& data, - const std::string& fname, - bool should_sync) { - WritableFile* file; - Status s = env->NewWritableFile(fname, &file); - if (!s.ok()) { - return s; - } - s = file->Append(data); - if (s.ok() && should_sync) { - s = file->Sync(); - } - if (s.ok()) { - s = file->Close(); - } - delete file; // Will auto-close if we did not close above - if (!s.ok()) { - env->DeleteFile(fname); - } - return s; -} - -Status WriteStringToFile(Env* env, const Slice& data, - const std::string& fname) { - return DoWriteStringToFile(env, data, fname, false); -} - -Status WriteStringToFileSync(Env* env, const Slice& data, - const std::string& fname) { - return DoWriteStringToFile(env, data, fname, true); -} - -Status ReadFileToString(Env* env, const std::string& fname, std::string* data) { - data->clear(); - SequentialFile* file; - Status s = env->NewSequentialFile(fname, &file); - if (!s.ok()) { - return s; - } - static const int kBufferSize = 8192; - char* space = new char[kBufferSize]; - while (true) { - Slice fragment; - s = file->Read(kBufferSize, &fragment, space); - if (!s.ok()) { - break; - } - data->append(fragment.data(), fragment.size()); - if (fragment.empty()) { - break; - } - } - delete[] space; - delete file; - return s; -} - -EnvWrapper::~EnvWrapper() { -} - -} // namespace leveldb diff --git a/leveldb/util/env_posix.cc b/leveldb/util/env_posix.cc deleted file mode 100644 index cb1f6fc..0000000 --- a/leveldb/util/env_posix.cc +++ /dev/null @@ -1,609 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if defined(LEVELDB_PLATFORM_ANDROID) -#include -#endif -#include "leveldb/env.h" -#include "leveldb/slice.h" -#include "port/port.h" -#include "util/logging.h" -#include "util/posix_logger.h" - -namespace leveldb { - -namespace { - -static Status IOError(const std::string& context, int err_number) { - return Status::IOError(context, strerror(err_number)); -} - -class PosixSequentialFile: public SequentialFile { - private: - std::string filename_; - FILE* file_; - - public: - PosixSequentialFile(const std::string& fname, FILE* f) - : filename_(fname), file_(f) { } - virtual ~PosixSequentialFile() { fclose(file_); } - - virtual Status Read(size_t n, Slice* result, char* scratch) { - Status s; - size_t r = fread_unlocked(scratch, 1, n, file_); - *result = Slice(scratch, r); - if (r < n) { - if (feof(file_)) { - // We leave status as ok if we hit the end of the file - } else { - // A partial read with an error: return a non-ok status - s = IOError(filename_, errno); - } - } - return s; - } - - virtual Status Skip(uint64_t n) { - if (fseek(file_, n, SEEK_CUR)) { - return IOError(filename_, errno); - } - return Status::OK(); - } -}; - -// pread() based random-access -class PosixRandomAccessFile: public RandomAccessFile { - private: - std::string filename_; - int fd_; - - public: - PosixRandomAccessFile(const std::string& fname, int fd) - : filename_(fname), fd_(fd) { } - virtual ~PosixRandomAccessFile() { close(fd_); } - - virtual Status Read(uint64_t offset, size_t n, Slice* result, - char* scratch) const { - Status s; - ssize_t r = pread(fd_, scratch, n, static_cast(offset)); - *result = Slice(scratch, (r < 0) ? 0 : r); - if (r < 0) { - // An error: return a non-ok status - s = IOError(filename_, errno); - } - return s; - } -}; - -// mmap() based random-access -class PosixMmapReadableFile: public RandomAccessFile { - private: - std::string filename_; - void* mmapped_region_; - size_t length_; - - public: - // base[0,length-1] contains the mmapped contents of the file. - PosixMmapReadableFile(const std::string& fname, void* base, size_t length) - : filename_(fname), mmapped_region_(base), length_(length) { } - virtual ~PosixMmapReadableFile() { munmap(mmapped_region_, length_); } - - virtual Status Read(uint64_t offset, size_t n, Slice* result, - char* scratch) const { - Status s; - if (offset + n > length_) { - *result = Slice(); - s = IOError(filename_, EINVAL); - } else { - *result = Slice(reinterpret_cast(mmapped_region_) + offset, n); - } - return s; - } -}; - -// We preallocate up to an extra megabyte and use memcpy to append new -// data to the file. This is safe since we either properly close the -// file before reading from it, or for log files, the reading code -// knows enough to skip zero suffixes. -class PosixMmapFile : public WritableFile { - private: - std::string filename_; - int fd_; - size_t page_size_; - size_t map_size_; // How much extra memory to map at a time - char* base_; // The mapped region - char* limit_; // Limit of the mapped region - char* dst_; // Where to write next (in range [base_,limit_]) - char* last_sync_; // Where have we synced up to - uint64_t file_offset_; // Offset of base_ in file - - // Have we done an munmap of unsynced data? - bool pending_sync_; - - // Roundup x to a multiple of y - static size_t Roundup(size_t x, size_t y) { - return ((x + y - 1) / y) * y; - } - - size_t TruncateToPageBoundary(size_t s) { - s -= (s & (page_size_ - 1)); - assert((s % page_size_) == 0); - return s; - } - - bool UnmapCurrentRegion() { - bool result = true; - if (base_ != NULL) { - if (last_sync_ < limit_) { - // Defer syncing this data until next Sync() call, if any - pending_sync_ = true; - } - if (munmap(base_, limit_ - base_) != 0) { - result = false; - } - file_offset_ += limit_ - base_; - base_ = NULL; - limit_ = NULL; - last_sync_ = NULL; - dst_ = NULL; - - // Increase the amount we map the next time, but capped at 1MB - if (map_size_ < (1<<20)) { - map_size_ *= 2; - } - } - return result; - } - - bool MapNewRegion() { - assert(base_ == NULL); - if (ftruncate(fd_, file_offset_ + map_size_) < 0) { - return false; - } - void* ptr = mmap(NULL, map_size_, PROT_READ | PROT_WRITE, MAP_SHARED, - fd_, file_offset_); - if (ptr == MAP_FAILED) { - return false; - } - base_ = reinterpret_cast(ptr); - limit_ = base_ + map_size_; - dst_ = base_; - last_sync_ = base_; - return true; - } - - public: - PosixMmapFile(const std::string& fname, int fd, size_t page_size) - : filename_(fname), - fd_(fd), - page_size_(page_size), - map_size_(Roundup(65536, page_size)), - base_(NULL), - limit_(NULL), - dst_(NULL), - last_sync_(NULL), - file_offset_(0), - pending_sync_(false) { - assert((page_size & (page_size - 1)) == 0); - } - - - ~PosixMmapFile() { - if (fd_ >= 0) { - PosixMmapFile::Close(); - } - } - - virtual Status Append(const Slice& data) { - const char* src = data.data(); - size_t left = data.size(); - while (left > 0) { - assert(base_ <= dst_); - assert(dst_ <= limit_); - size_t avail = limit_ - dst_; - if (avail == 0) { - if (!UnmapCurrentRegion() || - !MapNewRegion()) { - return IOError(filename_, errno); - } - } - - size_t n = (left <= avail) ? left : avail; - memcpy(dst_, src, n); - dst_ += n; - src += n; - left -= n; - } - return Status::OK(); - } - - virtual Status Close() { - Status s; - size_t unused = limit_ - dst_; - if (!UnmapCurrentRegion()) { - s = IOError(filename_, errno); - } else if (unused > 0) { - // Trim the extra space at the end of the file - if (ftruncate(fd_, file_offset_ - unused) < 0) { - s = IOError(filename_, errno); - } - } - - if (close(fd_) < 0) { - if (s.ok()) { - s = IOError(filename_, errno); - } - } - - fd_ = -1; - base_ = NULL; - limit_ = NULL; - return s; - } - - virtual Status Flush() { - return Status::OK(); - } - - virtual Status Sync() { - Status s; - - if (pending_sync_) { - // Some unmapped data was not synced - pending_sync_ = false; - if (fdatasync(fd_) < 0) { - s = IOError(filename_, errno); - } - } - - if (dst_ > last_sync_) { - // Find the beginnings of the pages that contain the first and last - // bytes to be synced. - size_t p1 = TruncateToPageBoundary(last_sync_ - base_); - size_t p2 = TruncateToPageBoundary(dst_ - base_ - 1); - last_sync_ = dst_; - if (msync(base_ + p1, p2 - p1 + page_size_, MS_SYNC) < 0) { - s = IOError(filename_, errno); - } - } - - return s; - } -}; - -static int LockOrUnlock(int fd, bool lock) { - errno = 0; - struct flock f; - memset(&f, 0, sizeof(f)); - f.l_type = (lock ? F_WRLCK : F_UNLCK); - f.l_whence = SEEK_SET; - f.l_start = 0; - f.l_len = 0; // Lock/unlock entire file - return fcntl(fd, F_SETLK, &f); -} - -class PosixFileLock : public FileLock { - public: - int fd_; -}; - -class PosixEnv : public Env { - public: - PosixEnv(); - virtual ~PosixEnv() { - fprintf(stderr, "Destroying Env::Default()\n"); - exit(1); - } - - virtual Status NewSequentialFile(const std::string& fname, - SequentialFile** result) { - FILE* f = fopen(fname.c_str(), "r"); - if (f == NULL) { - *result = NULL; - return IOError(fname, errno); - } else { - *result = new PosixSequentialFile(fname, f); - return Status::OK(); - } - } - - virtual Status NewRandomAccessFile(const std::string& fname, - RandomAccessFile** result) { - *result = NULL; - Status s; - int fd = open(fname.c_str(), O_RDONLY); - if (fd < 0) { - s = IOError(fname, errno); - } else if (sizeof(void*) >= 8) { - // Use mmap when virtual address-space is plentiful. - uint64_t size; - s = GetFileSize(fname, &size); - if (s.ok()) { - void* base = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0); - if (base != MAP_FAILED) { - *result = new PosixMmapReadableFile(fname, base, size); - } else { - s = IOError(fname, errno); - } - } - close(fd); - } else { - *result = new PosixRandomAccessFile(fname, fd); - } - return s; - } - - virtual Status NewWritableFile(const std::string& fname, - WritableFile** result) { - Status s; - const int fd = open(fname.c_str(), O_CREAT | O_RDWR | O_TRUNC, 0644); - if (fd < 0) { - *result = NULL; - s = IOError(fname, errno); - } else { - *result = new PosixMmapFile(fname, fd, page_size_); - } - return s; - } - - virtual bool FileExists(const std::string& fname) { - return access(fname.c_str(), F_OK) == 0; - } - - virtual Status GetChildren(const std::string& dir, - std::vector* result) { - result->clear(); - DIR* d = opendir(dir.c_str()); - if (d == NULL) { - return IOError(dir, errno); - } - struct dirent* entry; - while ((entry = readdir(d)) != NULL) { - result->push_back(entry->d_name); - } - closedir(d); - return Status::OK(); - } - - virtual Status DeleteFile(const std::string& fname) { - Status result; - if (unlink(fname.c_str()) != 0) { - result = IOError(fname, errno); - } - return result; - }; - - virtual Status CreateDir(const std::string& name) { - Status result; - if (mkdir(name.c_str(), 0755) != 0) { - result = IOError(name, errno); - } - return result; - }; - - virtual Status DeleteDir(const std::string& name) { - Status result; - if (rmdir(name.c_str()) != 0) { - result = IOError(name, errno); - } - return result; - }; - - virtual Status GetFileSize(const std::string& fname, uint64_t* size) { - Status s; - struct stat sbuf; - if (stat(fname.c_str(), &sbuf) != 0) { - *size = 0; - s = IOError(fname, errno); - } else { - *size = sbuf.st_size; - } - return s; - } - - virtual Status RenameFile(const std::string& src, const std::string& target) { - Status result; - if (rename(src.c_str(), target.c_str()) != 0) { - result = IOError(src, errno); - } - return result; - } - - virtual Status LockFile(const std::string& fname, FileLock** lock) { - *lock = NULL; - Status result; - int fd = open(fname.c_str(), O_RDWR | O_CREAT, 0644); - if (fd < 0) { - result = IOError(fname, errno); - } else if (LockOrUnlock(fd, true) == -1) { - result = IOError("lock " + fname, errno); - close(fd); - } else { - PosixFileLock* my_lock = new PosixFileLock; - my_lock->fd_ = fd; - *lock = my_lock; - } - return result; - } - - virtual Status UnlockFile(FileLock* lock) { - PosixFileLock* my_lock = reinterpret_cast(lock); - Status result; - if (LockOrUnlock(my_lock->fd_, false) == -1) { - result = IOError("unlock", errno); - } - close(my_lock->fd_); - delete my_lock; - return result; - } - - virtual void Schedule(void (*function)(void*), void* arg); - - virtual void StartThread(void (*function)(void* arg), void* arg); - - virtual Status GetTestDirectory(std::string* result) { - const char* env = getenv("TEST_TMPDIR"); - if (env && env[0] != '\0') { - *result = env; - } else { - char buf[100]; - snprintf(buf, sizeof(buf), "/tmp/leveldbtest-%d", int(geteuid())); - *result = buf; - } - // Directory may already exist - CreateDir(*result); - return Status::OK(); - } - - static uint64_t gettid() { - pthread_t tid = pthread_self(); - uint64_t thread_id = 0; - memcpy(&thread_id, &tid, std::min(sizeof(thread_id), sizeof(tid))); - return thread_id; - } - - virtual Status NewLogger(const std::string& fname, Logger** result) { - FILE* f = fopen(fname.c_str(), "w"); - if (f == NULL) { - *result = NULL; - return IOError(fname, errno); - } else { - *result = new PosixLogger(f, &PosixEnv::gettid); - return Status::OK(); - } - } - - virtual uint64_t NowMicros() { - struct timeval tv; - gettimeofday(&tv, NULL); - return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; - } - - virtual void SleepForMicroseconds(int micros) { - usleep(micros); - } - - private: - void PthreadCall(const char* label, int result) { - if (result != 0) { - fprintf(stderr, "pthread %s: %s\n", label, strerror(result)); - exit(1); - } - } - - // BGThread() is the body of the background thread - void BGThread(); - static void* BGThreadWrapper(void* arg) { - reinterpret_cast(arg)->BGThread(); - return NULL; - } - - size_t page_size_; - pthread_mutex_t mu_; - pthread_cond_t bgsignal_; - pthread_t bgthread_; - bool started_bgthread_; - - // Entry per Schedule() call - struct BGItem { void* arg; void (*function)(void*); }; - typedef std::deque BGQueue; - BGQueue queue_; -}; - -PosixEnv::PosixEnv() : page_size_(getpagesize()), - started_bgthread_(false) { - PthreadCall("mutex_init", pthread_mutex_init(&mu_, NULL)); - PthreadCall("cvar_init", pthread_cond_init(&bgsignal_, NULL)); -} - -void PosixEnv::Schedule(void (*function)(void*), void* arg) { - PthreadCall("lock", pthread_mutex_lock(&mu_)); - - // Start background thread if necessary - if (!started_bgthread_) { - started_bgthread_ = true; - PthreadCall( - "create thread", - pthread_create(&bgthread_, NULL, &PosixEnv::BGThreadWrapper, this)); - } - - // If the queue is currently empty, the background thread may currently be - // waiting. - if (queue_.empty()) { - PthreadCall("signal", pthread_cond_signal(&bgsignal_)); - } - - // Add to priority queue - queue_.push_back(BGItem()); - queue_.back().function = function; - queue_.back().arg = arg; - - PthreadCall("unlock", pthread_mutex_unlock(&mu_)); -} - -void PosixEnv::BGThread() { - while (true) { - // Wait until there is an item that is ready to run - PthreadCall("lock", pthread_mutex_lock(&mu_)); - while (queue_.empty()) { - PthreadCall("wait", pthread_cond_wait(&bgsignal_, &mu_)); - } - - void (*function)(void*) = queue_.front().function; - void* arg = queue_.front().arg; - queue_.pop_front(); - - PthreadCall("unlock", pthread_mutex_unlock(&mu_)); - (*function)(arg); - } -} - -namespace { -struct StartThreadState { - void (*user_function)(void*); - void* arg; -}; -} -static void* StartThreadWrapper(void* arg) { - StartThreadState* state = reinterpret_cast(arg); - state->user_function(state->arg); - delete state; - return NULL; -} - -void PosixEnv::StartThread(void (*function)(void* arg), void* arg) { - pthread_t t; - StartThreadState* state = new StartThreadState; - state->user_function = function; - state->arg = arg; - PthreadCall("start thread", - pthread_create(&t, NULL, &StartThreadWrapper, state)); -} - -} // namespace - -static pthread_once_t once = PTHREAD_ONCE_INIT; -static Env* default_env; -static void InitDefaultEnv() { default_env = new PosixEnv; } - -Env* Env::Default() { - pthread_once(&once, InitDefaultEnv); - return default_env; -} - -} // namespace leveldb diff --git a/leveldb/util/env_test.cc b/leveldb/util/env_test.cc deleted file mode 100644 index b72cb44..0000000 --- a/leveldb/util/env_test.cc +++ /dev/null @@ -1,104 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "leveldb/env.h" - -#include "port/port.h" -#include "util/testharness.h" - -namespace leveldb { - -static const int kDelayMicros = 100000; - -class EnvPosixTest { - private: - port::Mutex mu_; - std::string events_; - - public: - Env* env_; - EnvPosixTest() : env_(Env::Default()) { } -}; - -static void SetBool(void* ptr) { - reinterpret_cast(ptr)->NoBarrier_Store(ptr); -} - -TEST(EnvPosixTest, RunImmediately) { - port::AtomicPointer called (NULL); - env_->Schedule(&SetBool, &called); - Env::Default()->SleepForMicroseconds(kDelayMicros); - ASSERT_TRUE(called.NoBarrier_Load() != NULL); -} - -TEST(EnvPosixTest, RunMany) { - port::AtomicPointer last_id (NULL); - - struct CB { - port::AtomicPointer* last_id_ptr; // Pointer to shared slot - uintptr_t id; // Order# for the execution of this callback - - CB(port::AtomicPointer* p, int i) : last_id_ptr(p), id(i) { } - - static void Run(void* v) { - CB* cb = reinterpret_cast(v); - void* cur = cb->last_id_ptr->NoBarrier_Load(); - ASSERT_EQ(cb->id-1, reinterpret_cast(cur)); - cb->last_id_ptr->Release_Store(reinterpret_cast(cb->id)); - } - }; - - // Schedule in different order than start time - CB cb1(&last_id, 1); - CB cb2(&last_id, 2); - CB cb3(&last_id, 3); - CB cb4(&last_id, 4); - env_->Schedule(&CB::Run, &cb1); - env_->Schedule(&CB::Run, &cb2); - env_->Schedule(&CB::Run, &cb3); - env_->Schedule(&CB::Run, &cb4); - - Env::Default()->SleepForMicroseconds(kDelayMicros); - void* cur = last_id.Acquire_Load(); - ASSERT_EQ(4, reinterpret_cast(cur)); -} - -struct State { - port::Mutex mu; - int val; - int num_running; -}; - -static void ThreadBody(void* arg) { - State* s = reinterpret_cast(arg); - s->mu.Lock(); - s->val += 1; - s->num_running -= 1; - s->mu.Unlock(); -} - -TEST(EnvPosixTest, StartThread) { - State state; - state.val = 0; - state.num_running = 3; - for (int i = 0; i < 3; i++) { - env_->StartThread(&ThreadBody, &state); - } - while (true) { - state.mu.Lock(); - int num = state.num_running; - state.mu.Unlock(); - if (num == 0) { - break; - } - Env::Default()->SleepForMicroseconds(kDelayMicros); - } - ASSERT_EQ(state.val, 3); -} - -} // namespace leveldb - -int main(int argc, char** argv) { - return leveldb::test::RunAllTests(); -} diff --git a/leveldb/util/filter_policy.cc b/leveldb/util/filter_policy.cc deleted file mode 100644 index 7b045c8..0000000 --- a/leveldb/util/filter_policy.cc +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) 2012 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "leveldb/filter_policy.h" - -namespace leveldb { - -FilterPolicy::~FilterPolicy() { } - -} // namespace leveldb diff --git a/leveldb/util/hash.cc b/leveldb/util/hash.cc deleted file mode 100644 index ba18180..0000000 --- a/leveldb/util/hash.cc +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include -#include "util/coding.h" -#include "util/hash.h" - -namespace leveldb { - -uint32_t Hash(const char* data, size_t n, uint32_t seed) { - // Similar to murmur hash - const uint32_t m = 0xc6a4a793; - const uint32_t r = 24; - const char* limit = data + n; - uint32_t h = seed ^ (n * m); - - // Pick up four bytes at a time - while (data + 4 <= limit) { - uint32_t w = DecodeFixed32(data); - data += 4; - h += w; - h *= m; - h ^= (h >> 16); - } - - // Pick up remaining bytes - switch (limit - data) { - case 3: - h += data[2] << 16; - // fall through - case 2: - h += data[1] << 8; - // fall through - case 1: - h += data[0]; - h *= m; - h ^= (h >> r); - break; - } - return h; -} - - -} // namespace leveldb diff --git a/leveldb/util/hash.h b/leveldb/util/hash.h deleted file mode 100644 index 8889d56..0000000 --- a/leveldb/util/hash.h +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// Simple hash function used for internal data structures - -#ifndef STORAGE_LEVELDB_UTIL_HASH_H_ -#define STORAGE_LEVELDB_UTIL_HASH_H_ - -#include -#include - -namespace leveldb { - -extern uint32_t Hash(const char* data, size_t n, uint32_t seed); - -} - -#endif // STORAGE_LEVELDB_UTIL_HASH_H_ diff --git a/leveldb/util/histogram.cc b/leveldb/util/histogram.cc deleted file mode 100644 index bb95f58..0000000 --- a/leveldb/util/histogram.cc +++ /dev/null @@ -1,139 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include -#include -#include "port/port.h" -#include "util/histogram.h" - -namespace leveldb { - -const double Histogram::kBucketLimit[kNumBuckets] = { - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 45, - 50, 60, 70, 80, 90, 100, 120, 140, 160, 180, 200, 250, 300, 350, 400, 450, - 500, 600, 700, 800, 900, 1000, 1200, 1400, 1600, 1800, 2000, 2500, 3000, - 3500, 4000, 4500, 5000, 6000, 7000, 8000, 9000, 10000, 12000, 14000, - 16000, 18000, 20000, 25000, 30000, 35000, 40000, 45000, 50000, 60000, - 70000, 80000, 90000, 100000, 120000, 140000, 160000, 180000, 200000, - 250000, 300000, 350000, 400000, 450000, 500000, 600000, 700000, 800000, - 900000, 1000000, 1200000, 1400000, 1600000, 1800000, 2000000, 2500000, - 3000000, 3500000, 4000000, 4500000, 5000000, 6000000, 7000000, 8000000, - 9000000, 10000000, 12000000, 14000000, 16000000, 18000000, 20000000, - 25000000, 30000000, 35000000, 40000000, 45000000, 50000000, 60000000, - 70000000, 80000000, 90000000, 100000000, 120000000, 140000000, 160000000, - 180000000, 200000000, 250000000, 300000000, 350000000, 400000000, - 450000000, 500000000, 600000000, 700000000, 800000000, 900000000, - 1000000000, 1200000000, 1400000000, 1600000000, 1800000000, 2000000000, - 2500000000.0, 3000000000.0, 3500000000.0, 4000000000.0, 4500000000.0, - 5000000000.0, 6000000000.0, 7000000000.0, 8000000000.0, 9000000000.0, - 1e200, -}; - -void Histogram::Clear() { - min_ = kBucketLimit[kNumBuckets-1]; - max_ = 0; - num_ = 0; - sum_ = 0; - sum_squares_ = 0; - for (int i = 0; i < kNumBuckets; i++) { - buckets_[i] = 0; - } -} - -void Histogram::Add(double value) { - // Linear search is fast enough for our usage in db_bench - int b = 0; - while (b < kNumBuckets - 1 && kBucketLimit[b] <= value) { - b++; - } - buckets_[b] += 1.0; - if (min_ > value) min_ = value; - if (max_ < value) max_ = value; - num_++; - sum_ += value; - sum_squares_ += (value * value); -} - -void Histogram::Merge(const Histogram& other) { - if (other.min_ < min_) min_ = other.min_; - if (other.max_ > max_) max_ = other.max_; - num_ += other.num_; - sum_ += other.sum_; - sum_squares_ += other.sum_squares_; - for (int b = 0; b < kNumBuckets; b++) { - buckets_[b] += other.buckets_[b]; - } -} - -double Histogram::Median() const { - return Percentile(50.0); -} - -double Histogram::Percentile(double p) const { - double threshold = num_ * (p / 100.0); - double sum = 0; - for (int b = 0; b < kNumBuckets; b++) { - sum += buckets_[b]; - if (sum >= threshold) { - // Scale linearly within this bucket - double left_point = (b == 0) ? 0 : kBucketLimit[b-1]; - double right_point = kBucketLimit[b]; - double left_sum = sum - buckets_[b]; - double right_sum = sum; - double pos = (threshold - left_sum) / (right_sum - left_sum); - double r = left_point + (right_point - left_point) * pos; - if (r < min_) r = min_; - if (r > max_) r = max_; - return r; - } - } - return max_; -} - -double Histogram::Average() const { - if (num_ == 0.0) return 0; - return sum_ / num_; -} - -double Histogram::StandardDeviation() const { - if (num_ == 0.0) return 0; - double variance = (sum_squares_ * num_ - sum_ * sum_) / (num_ * num_); - return sqrt(variance); -} - -std::string Histogram::ToString() const { - std::string r; - char buf[200]; - snprintf(buf, sizeof(buf), - "Count: %.0f Average: %.4f StdDev: %.2f\n", - num_, Average(), StandardDeviation()); - r.append(buf); - snprintf(buf, sizeof(buf), - "Min: %.4f Median: %.4f Max: %.4f\n", - (num_ == 0.0 ? 0.0 : min_), Median(), max_); - r.append(buf); - r.append("------------------------------------------------------\n"); - const double mult = 100.0 / num_; - double sum = 0; - for (int b = 0; b < kNumBuckets; b++) { - if (buckets_[b] <= 0.0) continue; - sum += buckets_[b]; - snprintf(buf, sizeof(buf), - "[ %7.0f, %7.0f ) %7.0f %7.3f%% %7.3f%% ", - ((b == 0) ? 0.0 : kBucketLimit[b-1]), // left - kBucketLimit[b], // right - buckets_[b], // count - mult * buckets_[b], // percentage - mult * sum); // cumulative percentage - r.append(buf); - - // Add hash marks based on percentage; 20 marks for 100%. - int marks = static_cast(20*(buckets_[b] / num_) + 0.5); - r.append(marks, '#'); - r.push_back('\n'); - } - return r; -} - -} // namespace leveldb diff --git a/leveldb/util/histogram.h b/leveldb/util/histogram.h deleted file mode 100644 index 1ef9f3c..0000000 --- a/leveldb/util/histogram.h +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_UTIL_HISTOGRAM_H_ -#define STORAGE_LEVELDB_UTIL_HISTOGRAM_H_ - -#include - -namespace leveldb { - -class Histogram { - public: - Histogram() { } - ~Histogram() { } - - void Clear(); - void Add(double value); - void Merge(const Histogram& other); - - std::string ToString() const; - - private: - double min_; - double max_; - double num_; - double sum_; - double sum_squares_; - - enum { kNumBuckets = 154 }; - static const double kBucketLimit[kNumBuckets]; - double buckets_[kNumBuckets]; - - double Median() const; - double Percentile(double p) const; - double Average() const; - double StandardDeviation() const; -}; - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_UTIL_HISTOGRAM_H_ diff --git a/leveldb/util/logging.cc b/leveldb/util/logging.cc deleted file mode 100644 index 22cf278..0000000 --- a/leveldb/util/logging.cc +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "util/logging.h" - -#include -#include -#include -#include -#include "leveldb/env.h" -#include "leveldb/slice.h" - -namespace leveldb { - -void AppendNumberTo(std::string* str, uint64_t num) { - char buf[30]; - snprintf(buf, sizeof(buf), "%llu", (unsigned long long) num); - str->append(buf); -} - -void AppendEscapedStringTo(std::string* str, const Slice& value) { - for (size_t i = 0; i < value.size(); i++) { - char c = value[i]; - if (c >= ' ' && c <= '~') { - str->push_back(c); - } else { - char buf[10]; - snprintf(buf, sizeof(buf), "\\x%02x", - static_cast(c) & 0xff); - str->append(buf); - } - } -} - -std::string NumberToString(uint64_t num) { - std::string r; - AppendNumberTo(&r, num); - return r; -} - -std::string EscapeString(const Slice& value) { - std::string r; - AppendEscapedStringTo(&r, value); - return r; -} - -bool ConsumeChar(Slice* in, char c) { - if (!in->empty() && (*in)[0] == c) { - in->remove_prefix(1); - return true; - } else { - return false; - } -} - -bool ConsumeDecimalNumber(Slice* in, uint64_t* val) { - uint64_t v = 0; - int digits = 0; - while (!in->empty()) { - char c = (*in)[0]; - if (c >= '0' && c <= '9') { - ++digits; - const int delta = (c - '0'); - static const uint64_t kMaxUint64 = ~static_cast(0); - if (v > kMaxUint64/10 || - (v == kMaxUint64/10 && delta > kMaxUint64%10)) { - // Overflow - return false; - } - v = (v * 10) + delta; - in->remove_prefix(1); - } else { - break; - } - } - *val = v; - return (digits > 0); -} - -} // namespace leveldb diff --git a/leveldb/util/logging.h b/leveldb/util/logging.h deleted file mode 100644 index b0c5da8..0000000 --- a/leveldb/util/logging.h +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// Must not be included from any .h files to avoid polluting the namespace -// with macros. - -#ifndef STORAGE_LEVELDB_UTIL_LOGGING_H_ -#define STORAGE_LEVELDB_UTIL_LOGGING_H_ - -#include -#include -#include -#include "port/port.h" - -namespace leveldb { - -class Slice; -class WritableFile; - -// Append a human-readable printout of "num" to *str -extern void AppendNumberTo(std::string* str, uint64_t num); - -// Append a human-readable printout of "value" to *str. -// Escapes any non-printable characters found in "value". -extern void AppendEscapedStringTo(std::string* str, const Slice& value); - -// Return a human-readable printout of "num" -extern std::string NumberToString(uint64_t num); - -// Return a human-readable version of "value". -// Escapes any non-printable characters found in "value". -extern std::string EscapeString(const Slice& value); - -// If *in starts with "c", advances *in past the first character and -// returns true. Otherwise, returns false. -extern bool ConsumeChar(Slice* in, char c); - -// Parse a human-readable number from "*in" into *value. On success, -// advances "*in" past the consumed number and sets "*val" to the -// numeric value. Otherwise, returns false and leaves *in in an -// unspecified state. -extern bool ConsumeDecimalNumber(Slice* in, uint64_t* val); - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_UTIL_LOGGING_H_ diff --git a/leveldb/util/mutexlock.h b/leveldb/util/mutexlock.h deleted file mode 100644 index c3f3306..0000000 --- a/leveldb/util/mutexlock.h +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_UTIL_MUTEXLOCK_H_ -#define STORAGE_LEVELDB_UTIL_MUTEXLOCK_H_ - -#include "port/port.h" - -namespace leveldb { - -// Helper class that locks a mutex on construction and unlocks the mutex when -// the destructor of the MutexLock object is invoked. -// -// Typical usage: -// -// void MyClass::MyMethod() { -// MutexLock l(&mu_); // mu_ is an instance variable -// ... some complex code, possibly with multiple return paths ... -// } - -class MutexLock { - public: - explicit MutexLock(port::Mutex *mu) : mu_(mu) { - this->mu_->Lock(); - } - ~MutexLock() { this->mu_->Unlock(); } - - private: - port::Mutex *const mu_; - // No copying allowed - MutexLock(const MutexLock&); - void operator=(const MutexLock&); -}; - -} // namespace leveldb - - -#endif // STORAGE_LEVELDB_UTIL_MUTEXLOCK_H_ diff --git a/leveldb/util/options.cc b/leveldb/util/options.cc deleted file mode 100644 index 76af5b9..0000000 --- a/leveldb/util/options.cc +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "leveldb/options.h" - -#include "leveldb/comparator.h" -#include "leveldb/env.h" - -namespace leveldb { - -Options::Options() - : comparator(BytewiseComparator()), - create_if_missing(false), - error_if_exists(false), - paranoid_checks(false), - env(Env::Default()), - info_log(NULL), - write_buffer_size(4<<20), - max_open_files(1000), - block_cache(NULL), - block_size(4096), - block_restart_interval(16), - compression(kSnappyCompression), - filter_policy(NULL) { -} - - -} // namespace leveldb diff --git a/leveldb/util/posix_logger.h b/leveldb/util/posix_logger.h deleted file mode 100644 index 9741b1a..0000000 --- a/leveldb/util/posix_logger.h +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// Logger implementation that can be shared by all environments -// where enough posix functionality is available. - -#ifndef STORAGE_LEVELDB_UTIL_POSIX_LOGGER_H_ -#define STORAGE_LEVELDB_UTIL_POSIX_LOGGER_H_ - -#include -#include -#include -#include -#include "leveldb/env.h" - -namespace leveldb { - -class PosixLogger : public Logger { - private: - FILE* file_; - uint64_t (*gettid_)(); // Return the thread id for the current thread - public: - PosixLogger(FILE* f, uint64_t (*gettid)()) : file_(f), gettid_(gettid) { } - virtual ~PosixLogger() { - fclose(file_); - } - virtual void Logv(const char* format, va_list ap) { - const uint64_t thread_id = (*gettid_)(); - - // We try twice: the first time with a fixed-size stack allocated buffer, - // and the second time with a much larger dynamically allocated buffer. - char buffer[500]; - for (int iter = 0; iter < 2; iter++) { - char* base; - int bufsize; - if (iter == 0) { - bufsize = sizeof(buffer); - base = buffer; - } else { - bufsize = 30000; - base = new char[bufsize]; - } - char* p = base; - char* limit = base + bufsize; - - struct timeval now_tv; - gettimeofday(&now_tv, NULL); - const time_t seconds = now_tv.tv_sec; - struct tm t; - localtime_r(&seconds, &t); - p += snprintf(p, limit - p, - "%04d/%02d/%02d-%02d:%02d:%02d.%06d %llx ", - t.tm_year + 1900, - t.tm_mon + 1, - t.tm_mday, - t.tm_hour, - t.tm_min, - t.tm_sec, - static_cast(now_tv.tv_usec), - static_cast(thread_id)); - - // Print the message - if (p < limit) { - va_list backup_ap; - va_copy(backup_ap, ap); - p += vsnprintf(p, limit - p, format, backup_ap); - va_end(backup_ap); - } - - // Truncate to available space if necessary - if (p >= limit) { - if (iter == 0) { - continue; // Try again with larger buffer - } else { - p = limit - 1; - } - } - - // Add newline if necessary - if (p == base || p[-1] != '\n') { - *p++ = '\n'; - } - - assert(p <= limit); - fwrite(base, 1, p - base, file_); - fflush(file_); - if (base != buffer) { - delete[] base; - } - break; - } - } -}; - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_UTIL_POSIX_LOGGER_H_ diff --git a/leveldb/util/random.h b/leveldb/util/random.h deleted file mode 100644 index 0753824..0000000 --- a/leveldb/util/random.h +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_UTIL_RANDOM_H_ -#define STORAGE_LEVELDB_UTIL_RANDOM_H_ - -#include - -namespace leveldb { - -// A very simple random number generator. Not especially good at -// generating truly random bits, but good enough for our needs in this -// package. -class Random { - private: - uint32_t seed_; - public: - explicit Random(uint32_t s) : seed_(s & 0x7fffffffu) { } - uint32_t Next() { - static const uint32_t M = 2147483647L; // 2^31-1 - static const uint64_t A = 16807; // bits 14, 8, 7, 5, 2, 1, 0 - // We are computing - // seed_ = (seed_ * A) % M, where M = 2^31-1 - // - // seed_ must not be zero or M, or else all subsequent computed values - // will be zero or M respectively. For all other values, seed_ will end - // up cycling through every number in [1,M-1] - uint64_t product = seed_ * A; - - // Compute (product % M) using the fact that ((x << 31) % M) == x. - seed_ = static_cast((product >> 31) + (product & M)); - // The first reduction may overflow by 1 bit, so we may need to - // repeat. mod == M is not possible; using > allows the faster - // sign-bit-based test. - if (seed_ > M) { - seed_ -= M; - } - return seed_; - } - // Returns a uniformly distributed value in the range [0..n-1] - // REQUIRES: n > 0 - uint32_t Uniform(int n) { return Next() % n; } - - // Randomly returns true ~"1/n" of the time, and false otherwise. - // REQUIRES: n > 0 - bool OneIn(int n) { return (Next() % n) == 0; } - - // Skewed: pick "base" uniformly from range [0,max_log] and then - // return "base" random bits. The effect is to pick a number in the - // range [0,2^max_log-1] with exponential bias towards smaller numbers. - uint32_t Skewed(int max_log) { - return Uniform(1 << Uniform(max_log + 1)); - } -}; - -} // namespace leveldb - -#endif // STORAGE_LEVELDB_UTIL_RANDOM_H_ diff --git a/leveldb/util/status.cc b/leveldb/util/status.cc deleted file mode 100644 index a44f35b..0000000 --- a/leveldb/util/status.cc +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include -#include "port/port.h" -#include "leveldb/status.h" - -namespace leveldb { - -const char* Status::CopyState(const char* state) { - uint32_t size; - memcpy(&size, state, sizeof(size)); - char* result = new char[size + 5]; - memcpy(result, state, size + 5); - return result; -} - -Status::Status(Code code, const Slice& msg, const Slice& msg2) { - assert(code != kOk); - const uint32_t len1 = msg.size(); - const uint32_t len2 = msg2.size(); - const uint32_t size = len1 + (len2 ? (2 + len2) : 0); - char* result = new char[size + 5]; - memcpy(result, &size, sizeof(size)); - result[4] = static_cast(code); - memcpy(result + 5, msg.data(), len1); - if (len2) { - result[5 + len1] = ':'; - result[6 + len1] = ' '; - memcpy(result + 7 + len1, msg2.data(), len2); - } - state_ = result; -} - -std::string Status::ToString() const { - if (state_ == NULL) { - return "OK"; - } else { - char tmp[30]; - const char* type; - switch (code()) { - case kOk: - type = "OK"; - break; - case kNotFound: - type = "NotFound: "; - break; - case kCorruption: - type = "Corruption: "; - break; - case kNotSupported: - type = "Not implemented: "; - break; - case kInvalidArgument: - type = "Invalid argument: "; - break; - case kIOError: - type = "IO error: "; - break; - default: - snprintf(tmp, sizeof(tmp), "Unknown code(%d): ", - static_cast(code())); - type = tmp; - break; - } - std::string result(type); - uint32_t length; - memcpy(&length, state_, sizeof(length)); - result.append(state_ + 5, length); - return result; - } -} - -} // namespace leveldb diff --git a/leveldb/util/testharness.cc b/leveldb/util/testharness.cc deleted file mode 100644 index eb1bdd5..0000000 --- a/leveldb/util/testharness.cc +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "util/testharness.h" - -#include -#include -#include -#include - -namespace leveldb { -namespace test { - -namespace { -struct Test { - const char* base; - const char* name; - void (*func)(); -}; -std::vector* tests; -} - -bool RegisterTest(const char* base, const char* name, void (*func)()) { - if (tests == NULL) { - tests = new std::vector; - } - Test t; - t.base = base; - t.name = name; - t.func = func; - tests->push_back(t); - return true; -} - -int RunAllTests() { - const char* matcher = getenv("LEVELDB_TESTS"); - - int num = 0; - if (tests != NULL) { - for (int i = 0; i < tests->size(); i++) { - const Test& t = (*tests)[i]; - if (matcher != NULL) { - std::string name = t.base; - name.push_back('.'); - name.append(t.name); - if (strstr(name.c_str(), matcher) == NULL) { - continue; - } - } - fprintf(stderr, "==== Test %s.%s\n", t.base, t.name); - (*t.func)(); - ++num; - } - } - fprintf(stderr, "==== PASSED %d tests\n", num); - return 0; -} - -std::string TmpDir() { - std::string dir; - Status s = Env::Default()->GetTestDirectory(&dir); - ASSERT_TRUE(s.ok()) << s.ToString(); - return dir; -} - -int RandomSeed() { - const char* env = getenv("TEST_RANDOM_SEED"); - int result = (env != NULL ? atoi(env) : 301); - if (result <= 0) { - result = 301; - } - return result; -} - -} // namespace test -} // namespace leveldb diff --git a/leveldb/util/testharness.h b/leveldb/util/testharness.h deleted file mode 100644 index da4fe68..0000000 --- a/leveldb/util/testharness.h +++ /dev/null @@ -1,138 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_UTIL_TESTHARNESS_H_ -#define STORAGE_LEVELDB_UTIL_TESTHARNESS_H_ - -#include -#include -#include -#include "leveldb/env.h" -#include "leveldb/slice.h" -#include "util/random.h" - -namespace leveldb { -namespace test { - -// Run some of the tests registered by the TEST() macro. If the -// environment variable "LEVELDB_TESTS" is not set, runs all tests. -// Otherwise, runs only the tests whose name contains the value of -// "LEVELDB_TESTS" as a substring. E.g., suppose the tests are: -// TEST(Foo, Hello) { ... } -// TEST(Foo, World) { ... } -// LEVELDB_TESTS=Hello will run the first test -// LEVELDB_TESTS=o will run both tests -// LEVELDB_TESTS=Junk will run no tests -// -// Returns 0 if all tests pass. -// Dies or returns a non-zero value if some test fails. -extern int RunAllTests(); - -// Return the directory to use for temporary storage. -extern std::string TmpDir(); - -// Return a randomization seed for this run. Typically returns the -// same number on repeated invocations of this binary, but automated -// runs may be able to vary the seed. -extern int RandomSeed(); - -// An instance of Tester is allocated to hold temporary state during -// the execution of an assertion. -class Tester { - private: - bool ok_; - const char* fname_; - int line_; - std::stringstream ss_; - - public: - Tester(const char* f, int l) - : ok_(true), fname_(f), line_(l) { - } - - ~Tester() { - if (!ok_) { - fprintf(stderr, "%s:%d:%s\n", fname_, line_, ss_.str().c_str()); - exit(1); - } - } - - Tester& Is(bool b, const char* msg) { - if (!b) { - ss_ << " Assertion failure " << msg; - ok_ = false; - } - return *this; - } - - Tester& IsOk(const Status& s) { - if (!s.ok()) { - ss_ << " " << s.ToString(); - ok_ = false; - } - return *this; - } - -#define BINARY_OP(name,op) \ - template \ - Tester& name(const X& x, const Y& y) { \ - if (! (x op y)) { \ - ss_ << " failed: " << x << (" " #op " ") << y; \ - ok_ = false; \ - } \ - return *this; \ - } - - BINARY_OP(IsEq, ==) - BINARY_OP(IsNe, !=) - BINARY_OP(IsGe, >=) - BINARY_OP(IsGt, >) - BINARY_OP(IsLe, <=) - BINARY_OP(IsLt, <) -#undef BINARY_OP - - // Attach the specified value to the error message if an error has occurred - template - Tester& operator<<(const V& value) { - if (!ok_) { - ss_ << " " << value; - } - return *this; - } -}; - -#define ASSERT_TRUE(c) ::leveldb::test::Tester(__FILE__, __LINE__).Is((c), #c) -#define ASSERT_OK(s) ::leveldb::test::Tester(__FILE__, __LINE__).IsOk((s)) -#define ASSERT_EQ(a,b) ::leveldb::test::Tester(__FILE__, __LINE__).IsEq((a),(b)) -#define ASSERT_NE(a,b) ::leveldb::test::Tester(__FILE__, __LINE__).IsNe((a),(b)) -#define ASSERT_GE(a,b) ::leveldb::test::Tester(__FILE__, __LINE__).IsGe((a),(b)) -#define ASSERT_GT(a,b) ::leveldb::test::Tester(__FILE__, __LINE__).IsGt((a),(b)) -#define ASSERT_LE(a,b) ::leveldb::test::Tester(__FILE__, __LINE__).IsLe((a),(b)) -#define ASSERT_LT(a,b) ::leveldb::test::Tester(__FILE__, __LINE__).IsLt((a),(b)) - -#define TCONCAT(a,b) TCONCAT1(a,b) -#define TCONCAT1(a,b) a##b - -#define TEST(base,name) \ -class TCONCAT(_Test_,name) : public base { \ - public: \ - void _Run(); \ - static void _RunIt() { \ - TCONCAT(_Test_,name) t; \ - t._Run(); \ - } \ -}; \ -bool TCONCAT(_Test_ignored_,name) = \ - ::leveldb::test::RegisterTest(#base, #name, &TCONCAT(_Test_,name)::_RunIt); \ -void TCONCAT(_Test_,name)::_Run() - -// Register the specified test. Typically not used directly, but -// invoked via the macro expansion of TEST. -extern bool RegisterTest(const char* base, const char* name, void (*func)()); - - -} // namespace test -} // namespace leveldb - -#endif // STORAGE_LEVELDB_UTIL_TESTHARNESS_H_ diff --git a/leveldb/util/testutil.cc b/leveldb/util/testutil.cc deleted file mode 100644 index 538d095..0000000 --- a/leveldb/util/testutil.cc +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include "util/testutil.h" - -#include "util/random.h" - -namespace leveldb { -namespace test { - -Slice RandomString(Random* rnd, int len, std::string* dst) { - dst->resize(len); - for (int i = 0; i < len; i++) { - (*dst)[i] = static_cast(' ' + rnd->Uniform(95)); // ' ' .. '~' - } - return Slice(*dst); -} - -std::string RandomKey(Random* rnd, int len) { - // Make sure to generate a wide variety of characters so we - // test the boundary conditions for short-key optimizations. - static const char kTestChars[] = { - '\0', '\1', 'a', 'b', 'c', 'd', 'e', '\xfd', '\xfe', '\xff' - }; - std::string result; - for (int i = 0; i < len; i++) { - result += kTestChars[rnd->Uniform(sizeof(kTestChars))]; - } - return result; -} - - -extern Slice CompressibleString(Random* rnd, double compressed_fraction, - int len, std::string* dst) { - int raw = static_cast(len * compressed_fraction); - if (raw < 1) raw = 1; - std::string raw_data; - RandomString(rnd, raw, &raw_data); - - // Duplicate the random data until we have filled "len" bytes - dst->clear(); - while (dst->size() < len) { - dst->append(raw_data); - } - dst->resize(len); - return Slice(*dst); -} - -} // namespace test -} // namespace leveldb diff --git a/leveldb/util/testutil.h b/leveldb/util/testutil.h deleted file mode 100644 index 824e655..0000000 --- a/leveldb/util/testutil.h +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. - -#ifndef STORAGE_LEVELDB_UTIL_TESTUTIL_H_ -#define STORAGE_LEVELDB_UTIL_TESTUTIL_H_ - -#include "leveldb/env.h" -#include "leveldb/slice.h" -#include "util/random.h" - -namespace leveldb { -namespace test { - -// Store in *dst a random string of length "len" and return a Slice that -// references the generated data. -extern Slice RandomString(Random* rnd, int len, std::string* dst); - -// Return a random key with the specified length that may contain interesting -// characters (e.g. \x00, \xff, etc.). -extern std::string RandomKey(Random* rnd, int len); - -// Store in *dst a string of length "len" that will compress to -// "N*compressed_fraction" bytes and return a Slice that references -// the generated data. -extern Slice CompressibleString(Random* rnd, double compressed_fraction, - int len, std::string* dst); - -// A wrapper that allows injection of errors. -class ErrorEnv : public EnvWrapper { - public: - bool writable_file_error_; - int num_writable_file_errors_; - - ErrorEnv() : EnvWrapper(Env::Default()), - writable_file_error_(false), - num_writable_file_errors_(0) { } - - virtual Status NewWritableFile(const std::string& fname, - WritableFile** result) { - if (writable_file_error_) { - ++num_writable_file_errors_; - *result = NULL; - return Status::IOError(fname, "fake error"); - } - return target()->NewWritableFile(fname, result); - } -}; - -} // namespace test -} // namespace leveldb - -#endif // STORAGE_LEVELDB_UTIL_TESTUTIL_H_