Skip to content

Commit 817a18d

Browse files
authored
feat(rocksdb): support building RocksDB with the HDFS plugin in the Pegasus server (#2362)
#2361 Enable building RocksDB with HDFS plugin through the following steps: 1. Introduce [rocksdb-hdfs-env](https://github.com/riversand963/rocksdb-hdfs-env) plugin when building RocksDB as a third-party dependency. 2. Configure Java and Hadoop environment variables, as well as the dynamic library search paths, to support compiling and linking the plugin. 3. Apply patches to fix issues encountered during compilation.
1 parent a9a11da commit 817a18d

File tree

5 files changed

+93
-8
lines changed

5 files changed

+93
-8
lines changed

.github/actions/rebuild_thirdparty_if_needed/action.yaml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,18 @@ runs:
3737
# Build third-parties and leave some necessary libraries and source.
3838
run: |
3939
rm -f /root/thirdparties-src.zip
40+
../admin_tools/download_hadoop.sh hadoop-bin
41+
rm -rf hadoop-bin/share/doc
42+
mv hadoop-bin ..
43+
# The RocksDB HDFS plugin (rocksdb-hdfs-env) in thirdparty relies on ${HADOOP_HOME}
44+
# environment variable to locate the libraries to link against.
45+
export HADOOP_HOME="$(dirname "$(pwd)")"/hadoop-bin
4046
mkdir build
4147
cmake -DCMAKE_BUILD_TYPE=Release -DROCKSDB_PORTABLE=1 -DUSE_JEMALLOC=${USE_JEMALLOC} -DENABLE_ASAN=${ENABLE_ASAN} -B build/
4248
cmake --build build/ -j $(nproc)
4349
rm -rf build/Build build/Download/[a-y]* build/Source/[a-g]* build/Source/[i-q]* build/Source/[s-z]*
4450
find ./ -name '*CMakeFiles*' -type d -exec rm -rf "{}" +
45-
../admin_tools/download_hadoop.sh hadoop-bin
4651
../admin_tools/download_zk.sh zookeeper-bin
47-
rm -rf hadoop-bin/share/doc
4852
rm -rf zookeeper-bin/docs
49-
mv hadoop-bin ..
5053
mv zookeeper-bin ..
5154
shell: bash

cmake_modules/BaseFunctions.cmake

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,42 @@ function(dsn_setup_include_path)#TODO(huangwei5): remove this
325325
include_directories(${THIRDPARTY_INSTALL_DIR}/include)
326326
endfunction(dsn_setup_include_path)
327327

328+
function(dsn_setup_java_libs)
329+
if (NOT DEFINED ARCH_TYPE)
330+
message(FATAL_ERROR "ARCH_TYPE is not defined. Please configure with -DARCH_TYPE=...")
331+
endif()
332+
333+
if (NOT DEFINED JAVA_HOME)
334+
message(FATAL_ERROR "JAVA_HOME is not defined. Please configure with -DJAVA_HOME=...")
335+
endif()
336+
337+
if (NOT EXISTS "${JAVA_HOME}")
338+
message(FATAL_ERROR "JAVA_HOME does not exist: ${JAVA_HOME}")
339+
endif()
340+
341+
message(STATUS "JAVA_HOME = ${JAVA_HOME}")
342+
343+
if (APPLE)
344+
if (NOT EXISTS "${JAVA_HOME}/lib/server/libjvm.dylib"
345+
AND NOT EXISTS "${JAVA_HOME}/jre/lib/server/libjvm.dylib")
346+
message(FATAL_ERROR "libjvm.dylib not found under JAVA_HOME: ${JAVA_HOME}")
347+
endif()
348+
else()
349+
if (NOT EXISTS "${JAVA_HOME}/lib/server/libjvm.so"
350+
AND NOT EXISTS "${JAVA_HOME}/jre/lib/${ARCH_TYPE}/server/libjvm.so")
351+
message(FATAL_ERROR "libjvm.so not found under JAVA_HOME: ${JAVA_HOME}")
352+
endif()
353+
endif()
354+
355+
# Provide directories to be searched for JVM libraries such as libjvm.so, libjava.so
356+
# and libverify.so.
357+
#
358+
# Currently these directories are used by the RocksDB HDFS plugin (rocksdb-hdfs-env)
359+
# in thirdparty to be searched while linking against JVM libraries for JNI.
360+
link_directories(${JAVA_HOME}/jre/lib/${ARCH_TYPE}/server)
361+
link_directories(${JAVA_HOME}/jre/lib/${ARCH_TYPE})
362+
endfunction(dsn_setup_java_libs)
363+
328364
function(dsn_setup_thirdparty_libs)
329365
set(BOOST_ROOT ${THIRDPARTY_INSTALL_DIR})
330366
set(Boost_USE_MULTITHREADED ON)
@@ -360,10 +396,7 @@ function(dsn_setup_thirdparty_libs)
360396
endif()
361397
find_package(RocksDB REQUIRED)
362398

363-
# libhdfs
364-
find_package(JNI REQUIRED)
365-
message (STATUS "JAVA_JVM_LIBRARY=${JAVA_JVM_LIBRARY}")
366-
link_libraries(${JAVA_JVM_LIBRARY})
399+
dsn_setup_java_libs()
367400

368401
find_package(OpenSSL REQUIRED)
369402
include_directories(${OPENSSL_INCLUDE_DIR})

run.sh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,17 @@ function run_build()
276276
echo "Build start time: `date`"
277277
start_time=`date +%s`
278278

279+
case "$(uname)" in
280+
Darwin)
281+
echo "Currently, macOS does not support ${ROOT}/admin_tools/config_hdfs.sh"
282+
;;
283+
*)
284+
# The RocksDB HDFS plugin (rocksdb-hdfs-env) in thirdparty relies on ${HADOOP_HOME}
285+
# environment variable to locate the libraries to link against.
286+
source "${ROOT}"/admin_tools/config_hdfs.sh
287+
;;
288+
esac
289+
279290
if [[ ${SKIP_THIRDPARTY} == "YES" ]]; then
280291
echo "Skip building third-parties..."
281292
else
@@ -301,6 +312,8 @@ function run_build()
301312
fi
302313

303314
CMAKE_OPTIONS="${CMAKE_OPTIONS}
315+
-DARCH_TYPE=${ARCH_TYPE}
316+
-DJAVA_HOME=${JAVA_HOME}
304317
-DENABLE_GCOV=${ENABLE_GCOV}
305318
-DENABLE_GPERF=${ENABLE_GPERF}
306319
-DBoost_NO_BOOST_CMAKE=ON

thirdparty/CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -502,7 +502,7 @@ set(ROCKSDB_OPTIONS
502502
if (NOT APPLE)
503503
set(ROCKSDB_OPTIONS
504504
${ROCKSDB_OPTIONS}
505-
-DROCKSDB_PLUGINS=encfs)
505+
"-DROCKSDB_PLUGINS=encfs hdfs")
506506
endif ()
507507
ExternalProject_Add(rocksdb
508508
URL ${OSS_URL_PREFIX}/rocksdb-v8.5.3.tar.gz
@@ -511,6 +511,9 @@ ExternalProject_Add(rocksdb
511511
PATCH_COMMAND patch -p1 < ${TP_DIR}/fix_rocksdb-cmake-PORTABLE-option.patch
512512
COMMAND rm -rf ${TP_DIR}/build/Source/rocksdb/plugin/encfs
513513
COMMAND git clone -b main --depth=1 https://github.com/pegasus-kv/encfs.git ${TP_DIR}/build/Source/rocksdb/plugin/encfs
514+
COMMAND rm -rf ${TP_DIR}/build/Source/rocksdb/plugin/hdfs
515+
COMMAND git clone -b master --depth=1 https://github.com/riversand963/rocksdb-hdfs-env.git ${TP_DIR}/build/Source/rocksdb/plugin/hdfs
516+
COMMAND cd ${TP_DIR}/build/Source/rocksdb/plugin/hdfs && patch -p1 < ${TP_DIR}/fix_rocksdb-plugin-hdfs.patch
514517
DEPENDS googletest jemalloc lz4 snappy zstd
515518
CMAKE_ARGS ${ROCKSDB_OPTIONS}
516519
DOWNLOAD_EXTRACT_TIMESTAMP true
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
diff --git a/CMakeLists.txt b/CMakeLists.txt
2+
index c660747..5fef10e 100644
3+
--- a/CMakeLists.txt
4+
+++ b/CMakeLists.txt
5+
@@ -5,6 +5,7 @@ cmake_minimum_required(VERSION 3.4)
6+
# Windows is not supported.
7+
8+
set(hdfs_SOURCES "env_hdfs.cc" "env_hdfs_impl.cc" PARENT_SCOPE)
9+
+set(hdfs_HEADERS "env_hdfs.h" PARENT_SCOPE)
10+
set(hdfs_LIBS "hdfs" "dl" "verify" "java" "jvm" PARENT_SCOPE)
11+
set(hdfs_INCLUDE_PATHS "$ENV{JAVA_HOME}/include" "$ENV{JAVA_HOME}/include/linux" "$ENV{HADOOP_HOME}/include" PARENT_SCOPE)
12+
set(hdfs_LINK_PATHS "$ENV{JAVA_HOME}/jre/lib/amd64/server" "$ENV{JAVA_HOME}/jre/lib/amd64" "$ENV{HADOOP_HOME}/lib/native" PARENT_SCOPE)
13+
diff --git a/env_hdfs_impl.cc b/env_hdfs_impl.cc
14+
index 01574bc..3927e5d 100644
15+
--- a/env_hdfs_impl.cc
16+
+++ b/env_hdfs_impl.cc
17+
@@ -14,6 +14,7 @@
18+
#include <iostream>
19+
#include <sstream>
20+
#include "logging/logging.h"
21+
+#include "port/sys_time.h"
22+
#include "rocksdb/status.h"
23+
#include "util/string_util.h"
24+
25+
@@ -524,7 +525,7 @@ IOStatus HdfsFileSystem::FileExists(const std::string& fname,
26+
default: // anything else should be an error
27+
ROCKS_LOG_FATAL(mylog, "FileExists hdfsExists call failed");
28+
return IOStatus::IOError("hdfsExists call failed with error " +
29+
- ROCKSDB_NAMESPACE::ToString(value) + " on path " + fname + ".\n");
30+
+ std::to_string(value) + " on path " + fname + ".\n");
31+
}
32+
}
33+

0 commit comments

Comments
 (0)