Skip to content
This repository was archived by the owner on Jan 21, 2026. It is now read-only.

Commit 919ab18

Browse files
authored
Merge pull request #1 from topling/memtable_as_log_index
[pull] memtable_as_log_index from topling:memtable_as_log_index
2 parents 20a3be1 + 1d6dc8b commit 919ab18

30 files changed

+1336
-190
lines changed

Makefile

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ USE_RTTI=1
2525
ROCKSDB_USE_IO_URING=0
2626
ROCKSDB_DISABLE_TCMALLOC=1
2727
SKIP_FORMAT_BUCK_CHECKS=1
28+
ifneq ($(shell command -v ld.gold),)
29+
LDFLAGS += -fuse-ld=gold
30+
#LDFLAGS += -Wl,--icf=all # only reduce size 3.2%
31+
endif
2832
# end topling specific
2933

3034
# Transform parallel LOG output into something more readable.
@@ -2615,6 +2619,10 @@ gen-pc:
26152619
# ---------------------------------------------------------------------------
26162620
# Jni stuff
26172621
# ---------------------------------------------------------------------------
2622+
ifndef JAVA_HOME
2623+
JAVA_HOME := $(shell javac -J-XshowSettings:properties -version 2>&1 | awk '/java.home/{print $$NF}')
2624+
$(warning Auto detected JAVA_HOME = ${JAVA_HOME}, if it is not true please set JAVA_HOME)
2625+
endif
26182626
JAVA_INCLUDE = -I$(JAVA_HOME)/include/ -I$(JAVA_HOME)/include/linux
26192627
ifeq ($(PLATFORM), OS_SOLARIS)
26202628
ARCH := $(shell isainfo -b)
@@ -2966,7 +2974,7 @@ ifeq ($(JAVA_HOME),)
29662974
endif
29672975
$(AM_V_at)rm -f ./java/target/$(ROCKSDBJNILIB)
29682976
$(AM_V_at)$(CXX) $(CXXFLAGS) -shared -fPIC -o ./java/target/$(ROCKSDBJNILIB) $(ALL_JNI_NATIVE_OBJECTS) $(LIB_OBJECTS) $(JAVA_LDFLAGS) $(LDFLAGS)
2969-
$(AM_V_at)cp -a ${TOPLING_CORE_DIR}/${BUILD_ROOT}/lib_shared/*${COMPILER}*-r.so java/target
2977+
$(AM_V_at)cp -a ${TOPLING_CORE_DIR}/${BUILD_ROOT}/lib_shared/*${COMPILER}*-${BUILD_TYPE_SIG}.so java/target
29702978
$(AM_V_at)cp -a sideplugin/rockside/src/topling/web/{style.css,index.html} java/target
29712979
ifeq ($(STRIP_DEBUG_INFO),1)
29722980
$(AM_V_at)strip java/target/*.so

db/c.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,8 @@ struct rocksdb_perfcontext_t {
285285
struct rocksdb_pinnableslice_t {
286286
PinnableSlice rep;
287287
};
288+
static_assert(sizeof(rocksdb_pinnableslice_t) == sizeof(PinnableSlice));
289+
static_assert(sizeof(rocksdb_pinnableslice_t) == 96, "see _opaque_data_ in c.h");
288290
struct rocksdb_transactiondb_options_t {
289291
TransactionDBOptions rep;
290292
};

db/db_impl/db_impl_open.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,11 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src,
5050
if (result.memtable_as_log_index) {
5151
result.recycle_log_file_num = 0;
5252
result.manual_wal_flush = false;
53+
#if !defined(ROCKSDB_UNIT_TEST)
54+
// avoid infrequent CFs reference too many WALs when frequent CFs
55+
// writing many data
56+
result.atomic_flush = true;
57+
#endif
5358
}
5459

5560
// result.max_open_files means an "infinite" open files.

include/rocksdb/c.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,11 @@ struct rocksdb_slice_t {
148148
size_t size;
149149
};
150150
typedef struct rocksdb_slice_t rocksdb_slice_t;
151+
struct rocksdb_pinnableslice_t {
152+
const char* data;
153+
size_t size;
154+
unsigned char _opaque_data_[80];
155+
};
151156
#endif
152157

153158
/* DB operations */

include/rocksdb/write_batch.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,7 @@ class WriteBatch : public WriteBatchBase {
486486
bool HasProtectionInfo() const { return prot_info_ != nullptr; }
487487

488488
private:
489+
protected:
489490
friend class WriteBatchInternal;
490491
friend class LocalSavePoint;
491492
// TODO(myabandeh): this is needed for a hack to collapse the write batch and

java/Makefile

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ JAVA_CMD := $(JAVA_HOME)/bin/java
256256
else
257257
JAVA_CMD := java
258258
endif
259-
JAVA_CMD := env LD_PRELOAD=libjemalloc.so:target/librocksdbjni-linux64.so ${JAVA_CMD}
259+
JAVA_CMD := env LD_PRELOAD=libjemalloc.so:librocksdbjni-linux64.so LD_LIBRARY_PATH=target:${LD_LIBRARY_PATH} ${JAVA_CMD}
260260
endif
261261

262262
ifeq ($(JAVAC_CMD),)
@@ -291,8 +291,18 @@ JAVAC_VERSION_GE_MIN := $(shell [ $(JAVAC_MAJOR_VERSION) -ge $(MIN_JAVAC_MAJOR_V
291291
# Set the default JAVA_ARGS to "" for DEBUG_LEVEL=0
292292
JAVA_ARGS ?=
293293

294+
ifeq (${ROCKSDB_FORCE_DIRECT_BUFFER_ZERO_COPY},true)
295+
JAVA_ARGS += --add-opens java.base/jdk.internal.ref=ALL-UNNAMED
296+
endif
297+
JAVA_ARGS += --add-opens java.base/java.nio=ALL-UNNAMED
298+
294299
JAVAC_ARGS ?=
295300

301+
JAVAC_ARGS += --add-exports java.base/jdk.internal.misc=ALL-UNNAMED
302+
303+
JAVADOC_CMD += --add-exports java.base/jdk.internal.misc=ALL-UNNAMED
304+
JAVADOC_CMD += -Xdoclint:none
305+
296306
# Read plugin configuration
297307
PLUGIN_PATH = ../plugin
298308
ROCKSDB_PLUGIN_MKS = $(foreach plugin, $(ROCKSDB_PLUGINS), $(PLUGIN_PATH)/$(plugin)/*.mk)
@@ -322,6 +332,8 @@ endif
322332
# When debugging add -Xcheck:jni to the java args
323333
ifneq ($(DEBUG_LEVEL),0)
324334
JAVA_ARGS += -ea -Xcheck:jni
335+
endif
336+
ifeq (${ENABLE_JAVA_LINT},1)
325337
JAVAC_ARGS += -Xlint:deprecation -Xlint:unchecked
326338
endif
327339

java/jtest.sh

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
2+
export ROCKSDB_GET_PREFER_ZERO_COPY=true
3+
export ROCKSDB_FORCE_DIRECT_BUFFER_ZERO_COPY=true
4+
export TOPLINGDB_EAGER_FETCH_VALUE=true
5+
6+
#export CXX=g++-12
7+
#export CC=gcc-12
8+
#export DEBUG_LEVEL=1
9+
10+
export CPU="-march=native"
11+
export UPDATE_REPO=0
12+
export BUILD_PREFIX=../build-toplingdb/
13+
export PREFIX=/opt
14+
export EXTRA_CXXFLAGS='-DROCKSDB_DYNAMIC_CREATE_CF -DTOPLINGDB_WITH_TIMESTAMP -DTOPLINGDB_WITH_WIDE_COLUMNS -DTOPLINGDB_WITH_FABRICATED_COMPLEXITY -DROCKSDB_UNIT_TEST'
15+
#export RUN_JAVA_TESTS='%PerfContextTest'
16+
17+
make -j`nproc` jtest

java/rocksjni/kv_helper.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -284,8 +284,8 @@ class JDirectBufferPinnableSlice {
284284
class Iterator; // declare
285285
struct JZeroCopyIter {
286286
Iterator* iter;
287-
Slice key;
288-
Slice value;
287+
Slice key{nullptr, 0};
288+
Slice value{nullptr, 0};
289289
bool Valid() const { return key.data_ != nullptr; }
290290
~JZeroCopyIter();
291291
JZeroCopyIter(Iterator* it) : iter(it) {}

java/rocksjni/options.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8713,6 +8713,7 @@ JNIEXPORT void JNICALL Java_org_rocksdb_ReadOptions_finishZeroCopy0
87138713
{
87148714
auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::ReadOptionsWithValue*>(jhandle);
87158715
ROCKSDB_VERIFY(opt->internal_is_in_pinning_section == true);
8716+
opt->m_multi_get.ClearPinningList();
87168717
opt->ClearZeroCopyList();
87178718
opt->FinishPin();
87188719
}

java/rocksjni/portal.h

Lines changed: 63 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -8956,58 +8956,82 @@ class BlockBasedTableOptionsJni
89568956
}
89578957
};
89588958

8959-
struct ReadOptionsWithValue : public ReadOptions {
8960-
using ReadOptions::ReadOptions;
8961-
// do not copy m_inuse_list & m_zfree_list
8962-
ReadOptionsWithValue(const ReadOptionsWithValue& y) : ReadOptions(y) {}
8963-
ReadOptionsWithValue& operator=(const ReadOptionsWithValue& y) {
8964-
ReadOptions::operator=(y);
8965-
// do not copy m_inuse_list & m_zfree_list
8966-
return *this;
8967-
}
8968-
struct PinnableSliceNode : PinnableSlice {
8969-
PinnableSliceNode* next;
8959+
/// requires Object::Reset()
8960+
template<class Object>
8961+
class ObjectPool {
8962+
struct ListNode : Object {
8963+
ListNode* next;
89708964
};
8971-
struct PinnableSliceList {
8972-
~PinnableSliceList() {
8965+
struct ListMeta {
8966+
~ListMeta() {
89738967
for (auto node = m_head; node; ) {
89748968
auto next = node->next;
89758969
delete node;
89768970
node = next;
89778971
}
89788972
}
8979-
PinnableSliceNode* m_head = nullptr;
8973+
ListNode* m_head = nullptr;
89808974
size_t m_size = 0;
89818975
};
8982-
size_t ZeroCopyListLen() const { return m_inuse_list.m_size; }
8983-
auto NewPinnableSlice() const {
8984-
if (auto p = m_zfree_list.m_head) {
8985-
m_zfree_list.m_head = p->next;
8986-
m_zfree_list.m_size--;
8987-
return std::unique_ptr<PinnableSliceNode>(p);
8988-
}
8989-
return std::unique_ptr<PinnableSliceNode>(new PinnableSliceNode());
8990-
}
8991-
void RegisterZeroCopy(std::unique_ptr<PinnableSliceNode>&& node) const {
8992-
node->next = m_inuse_list.m_head;
8993-
m_inuse_list.m_head = node.release();
8994-
m_inuse_list.m_size++;
8995-
}
8996-
void ClearZeroCopyList() {
8997-
auto pp = &m_inuse_list.m_head;
8976+
ListMeta m_pinning_list;
8977+
ListMeta m_pooling_list;
8978+
public:
8979+
~ObjectPool() {
8980+
ROCKSDB_ASSERT_EQ(m_pinning_list.m_size, 0);
8981+
}
8982+
auto NewObjectUniquePtr() {
8983+
if (auto p = m_pooling_list.m_head) {
8984+
m_pooling_list.m_head = p->next;
8985+
m_pooling_list.m_size--;
8986+
return std::unique_ptr<ListNode>(p);
8987+
}
8988+
return std::unique_ptr<ListNode>(new ListNode());
8989+
}
8990+
void PinObject(std::unique_ptr<ListNode>&& node) {
8991+
node->next = m_pinning_list.m_head;
8992+
m_pinning_list.m_head = node.release();
8993+
m_pinning_list.m_size++;
8994+
}
8995+
void ClearPinningList() {
8996+
auto pp = &m_pinning_list.m_head;
89988997
while (auto p = *pp)
89998998
p->Reset(), pp = &p->next;
9000-
*pp = m_zfree_list.m_head;
9001-
m_zfree_list.m_head = m_inuse_list.m_head;
9002-
m_zfree_list.m_size += m_inuse_list.m_size;
9003-
new (&m_inuse_list) PinnableSliceList();
8999+
*pp = m_pooling_list.m_head;
9000+
m_pooling_list.m_head = m_pinning_list.m_head;
9001+
m_pooling_list.m_size += m_pinning_list.m_size;
9002+
new (&m_pinning_list) ListMeta();
90049003
}
9005-
~ReadOptionsWithValue() {
9006-
ROCKSDB_ASSERT_EQ(m_inuse_list.m_size, 0);
9004+
size_t PinningListLen() const { return m_pinning_list.m_size; }
9005+
};
9006+
9007+
struct ReadOptionsWithValue : public ReadOptions {
9008+
using ReadOptions::ReadOptions;
9009+
// do not copy m_single_get & m_multi_get
9010+
ReadOptionsWithValue(const ReadOptionsWithValue& y) : ReadOptions(y) {}
9011+
ReadOptionsWithValue& operator=(const ReadOptionsWithValue& y) {
9012+
ReadOptions::operator=(y);
9013+
// do not copy m_single_get & m_multi_get
9014+
return *this;
90079015
}
9008-
private:
9009-
mutable PinnableSliceList m_inuse_list;
9010-
mutable PinnableSliceList m_zfree_list;
9016+
size_t ZeroCopyListLen() const { return m_single_get.PinningListLen(); }
9017+
auto NewPinnableSlice() {
9018+
return m_single_get.NewObjectUniquePtr();
9019+
}
9020+
template<class PinnableSliceNode>
9021+
void RegisterZeroCopy(std::unique_ptr<PinnableSliceNode>&& node) {
9022+
m_single_get.PinObject(std::move(node));
9023+
}
9024+
void ClearZeroCopyList() {
9025+
m_single_get.ClearPinningList();
9026+
}
9027+
struct MultiGetVector : std::vector<PinnableSlice> {
9028+
void Reset() {
9029+
for (PinnableSlice& x : *this)
9030+
x.Reset();
9031+
}
9032+
};
9033+
ObjectPool<PinnableSlice> m_single_get;
9034+
ObjectPool<MultiGetVector> m_multi_get;
90119035
};
90129036

90139037
} // namespace ROCKSDB_NAMESPACE

0 commit comments

Comments
 (0)