Skip to content

Commit 979b6b3

Browse files
authored
arrow: Enable encrypted Parquet support (#14358)
Efficiently fuzzing encrypted Parquet reading will probably require more work for proper decryption keys to be configured. This is just a preparatory step (and it might surface basic issues already).
1 parent cb9fa98 commit 979b6b3

File tree

3 files changed

+27
-18
lines changed

3 files changed

+27
-18
lines changed

projects/arrow/Dockerfile

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,17 @@ RUN apt-get update -y -q && \
2323
flex \
2424
ninja-build
2525

26-
RUN wget https://github.com/boostorg/boost/releases/download/boost-1.87.0/boost-1.87.0-cmake.tar.gz && \
27-
tar -xvf boost-1.87.0-cmake.tar.gz && \
28-
cd boost-1.87.0/ && \
29-
CFLAGS="" CXXFLAGS="" ./bootstrap.sh && \
30-
CFLAGS="" CXXFLAGS="" ./b2 headers && \
31-
CFLAGS="" CXXFLAGS="" ./b2 --with-process runtime-link=static link=static variant=release install -j 10 && \
32-
cp -R -f boost/ /usr/include/
26+
ARG boost_version=1.89.0
27+
# uuid and locale needed for Thrift source build
28+
ARG boost_components=filesystem;system;process;multiprecision;crc;uuid;locale
29+
30+
RUN wget https://github.com/boostorg/boost/releases/download/boost-${boost_version}/boost-${boost_version}-cmake.tar.gz && \
31+
tar -xf boost-${boost_version}-cmake.tar.gz && \
32+
cd boost-${boost_version}/ && \
33+
mkdir -p build / && \
34+
cd build/ && \
35+
cmake .. -GNinja -DBOOST_INCLUDE_LIBRARIES="${boost_components}" -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF && \
36+
cmake --build . --target install
3337

3438
RUN git clone --depth=1 --recurse-submodules \
3539
https://github.com/apache/arrow.git $SRC/arrow

projects/arrow/build.sh

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,21 +15,31 @@
1515
#
1616
################################################################################
1717

18+
# 1. Build instrumented OpenSSL
19+
20+
OPENSSL_VERSION=3.5.4
21+
22+
cd /root
23+
wget https://github.com/openssl/openssl/releases/download/openssl-${OPENSSL_VERSION}/openssl-${OPENSSL_VERSION}.tar.gz
24+
tar -xf openssl-${OPENSSL_VERSION}.tar.gz
25+
cd openssl-${OPENSSL_VERSION}
26+
./Configure no-apps no-docs no-tests no-shared
27+
make -j
28+
make install
29+
30+
# 2. Build Arrow C++ proper
31+
1832
ARROW=${SRC}/arrow/cpp
1933

2034
BUILD_DIR=${SRC}/build-dir
2135
mkdir -p ${BUILD_DIR}
2236
cd ${BUILD_DIR}
2337

24-
# The CMake build setup compiles and runs the Thrift compiler, but ASAN
25-
# would report leaks and error out.
26-
export ASAN_OPTIONS="detect_leaks=0"
27-
2838
cmake ${ARROW} -GNinja \
2939
-DCMAKE_BUILD_TYPE=Release \
3040
-DARROW_DEPENDENCY_SOURCE=BUNDLED \
41+
-DARROW_OPENSSL_USE_SHARED=off \
3142
-DBOOST_SOURCE=SYSTEM \
32-
-DBoost_USE_STATIC_RUNTIME=on \
3343
-DARROW_BOOST_USE_SHARED=off \
3444
-DCMAKE_C_FLAGS="${CFLAGS}" \
3545
-DCMAKE_CXX_FLAGS="${CXXFLAGS}" \
@@ -45,7 +55,7 @@ cmake ${ARROW} -GNinja \
4555
-DARROW_TEST_LINKAGE=static \
4656
-DPARQUET_BUILD_EXAMPLES=off \
4757
-DPARQUET_BUILD_EXECUTABLES=off \
48-
-DPARQUET_REQUIRE_ENCRYPTION=off \
58+
-DPARQUET_REQUIRE_ENCRYPTION=on \
4959
\
5060
-DARROW_CSV=on \
5161
-DARROW_JEMALLOC=off \
@@ -71,4 +81,3 @@ ${ARROW}/build-support/fuzzing/generate_corpuses.sh ${BUILD_DIR}/release
7181
find . -executable -name "*-fuzz" -exec cp -a -v '{}' ${OUT} \;
7282
# Copy seed corpuses
7383
find . -name "*-fuzz_seed_corpus.zip" -exec cp -a -v '{}' ${OUT} \;
74-

projects/arrow/replay_build.sh

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,6 @@ ARROW=${SRC}/arrow/cpp
2020
BUILD_DIR=${SRC}/build-dir
2121
cd ${BUILD_DIR}
2222

23-
# The CMake build setup compiles and runs the Thrift compiler, but ASAN
24-
# would report leaks and error out.
25-
export ASAN_OPTIONS="detect_leaks=0"
26-
2723
cmake --build . -j$(nproc)
2824

2925
# Copy fuzz targets

0 commit comments

Comments
 (0)