Skip to content

Commit 590a513

Browse files
authored
DX-114802: Fix Mac build to actually use VCPKG for LLVM. (#16)
* Change release naming scheme to add commit ids. Add action url to release Add action url to release * Mac vcpkg root setting. Old grpc uninstall causing error? fix vcpkg root error Fix vcpgkg llvm for mac. vcpkg vcpkg Go back to brew for build tools. vcpkg just for llvm Update macos path fix path to system files vcpkg vcpkg vcpkg fix re2 dependency Make JNI CMake args more robust - Check if protobuf_ep-install exists before adding Protobuf_ROOT - Check if re2_ep-install exists before adding re2_ROOT - This handles cases where system dependencies are used instead of bundled Use vcpkg RE2 instead of bundled RE2 for macOS Gandiva build vcpkg installs RE2 as a dependency of LLVM. When we use bundled RE2, the Arrow C++ build compiles Gandiva against vcpkg's RE2 headers (which use std::string_view API) but links against bundled RE2 (which uses StringPiece API), causing linker errors. This change uses vcpkg's RE2 for both compilation and linking when vcpkg LLVM is used, ensuring ABI compatibility. * Add username to tag fix release notes * Fix merge error * Fix quotes * fix env var addressing
1 parent fdfb66b commit 590a513

File tree

2 files changed

+87
-10
lines changed

2 files changed

+87
-10
lines changed

.github/workflows/jarbuild.yml

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -228,12 +228,13 @@ jobs:
228228
with:
229229
repository: Microsoft/vcpkg
230230
path: arrow/vcpkg
231+
fetch-depth: 0
231232
- name: Install vcpkg
232233
run: |
233234
cd arrow/vcpkg
234235
./bootstrap-vcpkg.sh
235-
echo "VCPKG_ROOT=${PWD}/arrow/vcpkg" >> ${GITHUB_ENV}
236-
echo "${PWD}/arrow/vcpkg" >> ${GITHUB_PATH}
236+
echo "VCPKG_ROOT_LOCAL=${PWD}" >> ${GITHUB_ENV}
237+
echo "${PWD}" >> ${GITHUB_PATH}
237238
- name: Clean up disk space
238239
run: |
239240
echo "=== Free disk space before cleanup ==="
@@ -284,25 +285,28 @@ jobs:
284285
brew bundle --file=arrow/cpp/Brewfile
285286
286287
# Clean up any existing LLVM installations in favor of vcpkg.
287-
brew uninstall llvm || :
288+
# Need to uninstall all versioned LLVM packages (llvm@18, llvm@17, etc.)
289+
for llvm_pkg in $(brew list | grep -E '^llvm(@[0-9]+)?$'); do
290+
brew uninstall "${llvm_pkg}" || :
291+
done
288292
289293
# We want to link aws-sdk-cpp statically but Homebrew's
290294
# aws-sdk-cpp provides only shared library. If we have
291295
# Homebrew's aws-sdk-cpp, our build mix Homebrew's
292296
# aws-sdk-cpp and bundled aws-sdk-cpp. We uninstall Homebrew's
293297
# aws-sdk-cpp to ensure using only bundled aws-sdk-cpp.
294-
brew uninstall aws-sdk-cpp
298+
brew uninstall aws-sdk-cpp || :
295299
# We want to use bundled RE2 for static linking. If
296300
# Homebrew's RE2 is installed, its header file may be used.
297301
# We uninstall Homebrew's RE2 to ensure using bundled RE2.
298302
brew uninstall grpc || : # gRPC depends on RE2
299303
brew uninstall grpc@1.54 || : # gRPC 1.54 may be installed too
300-
brew uninstall re2
304+
brew uninstall re2 || :
301305
# We want to use bundled Protobuf for static linking. If
302306
# Homebrew's Protobuf is installed, its library file may be
303307
# used on test We uninstall Homebrew's Protobuf to ensure using
304308
# bundled Protobuf.
305-
brew uninstall protobuf
309+
brew uninstall protobuf || :
306310
307311
echo ""
308312
echo "=== Free disk space before LLVM build ==="
@@ -312,7 +316,8 @@ jobs:
312316
# Use vcpkg to install LLVM.
313317
vcpkg install \
314318
--clean-after-build \
315-
--x-install-root=${VCPKG_ROOT}/installed \
319+
--vcpkg-root=${VCPKG_ROOT_LOCAL} \
320+
--x-install-root=${VCPKG_ROOT_LOCAL}/installed \
316321
--x-manifest-root=arrow/ci/vcpkg \
317322
--overlay-ports=arrow/ci/vcpkg/overlay/llvm/ \
318323
--x-feature=gandiva-llvm
@@ -558,7 +563,7 @@ jobs:
558563
# GH-499: How to create release notes?
559564
echo "Creating release: ${{ steps.commit_ids.outputs.release_tag }}"
560565
gh release create "${{ steps.commit_ids.outputs.release_tag }}" \
561-
-n "Action URL: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" \
566+
-n "Release ${{ steps.commit_ids.outputs.release_name }} RC${{ steps.commit_ids.outputs.rc }}<br>Triggered by: ${{ github.actor }}<br>Action URL: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID<br>arrow_branch: ${{github.event.inputs.ARROW_BRANCH}}<br>arrow_repo: ${{github.event.inputs.ARROW_REPO}}<br>release_tag_name: ${{github.event.inputs.RELEASE_TAG_NAME}}<br>arrow-java branch: ${{github.ref_name}}" \
562567
--prerelease \
563568
--repo ${GITHUB_REPOSITORY} \
564569
--title "Apache Arrow Java ${{ steps.commit_ids.outputs.version }} RC${{ steps.commit_ids.outputs.rc }} (arrow-java: ${{ steps.commit_ids.outputs.arrow_java_commit }}, arrow: ${{ steps.commit_ids.outputs.arrow_commit }})"

ci/scripts/jni_macos_build.sh

Lines changed: 74 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,55 @@ export ARROW_TEST_DATA="${arrow_dir}/testing/data"
7878
export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data"
7979
export AWS_EC2_METADATA_DISABLED=TRUE
8080

81+
# Determine vcpkg triplet based on architecture
82+
vcpkg_arch="$(arch)"
83+
case "${vcpkg_arch}" in
84+
arm64)
85+
vcpkg_triplet="arm64-osx"
86+
;;
87+
i386|x86_64)
88+
vcpkg_triplet="x64-osx"
89+
;;
90+
*)
91+
vcpkg_triplet="arm64-osx"
92+
;;
93+
esac
94+
95+
# Set LLVM_DIR to point to vcpkg-installed LLVM if VCPKG_ROOT_LOCAL is set
96+
llvm_dir_arg=""
97+
gandiva_cxx_flags=""
98+
osx_sysroot_arg=""
99+
re2_source_arg="-Dre2_SOURCE=BUNDLED"
100+
if [ -n "${VCPKG_ROOT_LOCAL:-}" ]; then
101+
vcpkg_installed="${VCPKG_ROOT_LOCAL}/installed/${vcpkg_triplet}"
102+
llvm_cmake_dir="${vcpkg_installed}/share/llvm"
103+
if [ -d "${llvm_cmake_dir}" ]; then
104+
llvm_dir_arg="-DLLVM_DIR=${llvm_cmake_dir}"
105+
106+
# vcpkg's clang needs to know where to find system headers
107+
# Arrow's GandivaAddBitcode.cmake uses CMAKE_OSX_SYSROOT to set SDKROOT env var
108+
sdk_path="$(xcrun --show-sdk-path)"
109+
if [ -d "${sdk_path}" ]; then
110+
osx_sysroot_arg="-DCMAKE_OSX_SYSROOT=${sdk_path}"
111+
fi
112+
113+
# Also pass the C++ standard library include path via ARROW_GANDIVA_PC_CXX_FLAGS
114+
xcode_path="$(xcode-select -p)"
115+
cxx_include_path="${xcode_path}/Toolchains/XcodeDefault.xctoolchain/usr/include/c++/v1"
116+
if [ -d "${cxx_include_path}" ]; then
117+
gandiva_cxx_flags="-DARROW_GANDIVA_PC_CXX_FLAGS=-stdlib=libc++;-isystem;${cxx_include_path}"
118+
fi
119+
120+
# Use vcpkg's RE2 since it's installed as a dependency of LLVM
121+
# This ensures ABI compatibility - vcpkg's RE2 uses std::string_view API
122+
# which matches what vcpkg's LLVM and Abseil expect
123+
re2_cmake_dir="${vcpkg_installed}/share/re2"
124+
if [ -d "${re2_cmake_dir}" ]; then
125+
re2_source_arg="-Dre2_ROOT=${vcpkg_installed}"
126+
fi
127+
fi
128+
fi
129+
81130
cmake \
82131
-S "${arrow_dir}/cpp" \
83132
-B "${build_dir}/cpp" \
@@ -100,10 +149,13 @@ cmake \
100149
-DCMAKE_INSTALL_PREFIX="${install_dir}" \
101150
-DCMAKE_UNITY_BUILD="${CMAKE_UNITY_BUILD}" \
102151
-DGTest_SOURCE=BUNDLED \
152+
${llvm_dir_arg} \
153+
${osx_sysroot_arg} \
154+
${gandiva_cxx_flags} \
103155
-DPARQUET_BUILD_EXAMPLES=OFF \
104156
-DPARQUET_BUILD_EXECUTABLES=OFF \
105157
-DPARQUET_REQUIRE_ENCRYPTION=OFF \
106-
-Dre2_SOURCE=BUNDLED \
158+
${re2_source_arg} \
107159
-GNinja
108160
cmake --build "${build_dir}/cpp" --target install
109161
github_actions_group_end
@@ -125,7 +177,27 @@ if [ "${ARROW_RUN_TESTS:-}" == "ON" ]; then
125177
github_actions_group_end
126178
fi
127179

128-
export JAVA_JNI_CMAKE_ARGS="-DProtobuf_ROOT=${build_dir}/cpp/protobuf_ep-install"
180+
# Pass paths to dependencies so the JNI build can find them
181+
# Build up the JNI CMake args based on what's available
182+
jni_cmake_args="${llvm_dir_arg}"
183+
184+
# Add Protobuf path if bundled, otherwise CMake will find system Protobuf
185+
if [ -d "${build_dir}/cpp/protobuf_ep-install" ]; then
186+
jni_cmake_args="${jni_cmake_args} -DProtobuf_ROOT=${build_dir}/cpp/protobuf_ep-install"
187+
fi
188+
189+
# RE2 path for the JNI build - prefer vcpkg's RE2 if we used it for the C++ build,
190+
# otherwise fall back to bundled RE2 if available
191+
if [ -n "${VCPKG_ROOT_LOCAL:-}" ]; then
192+
vcpkg_re2_dir="${VCPKG_ROOT_LOCAL}/installed/${vcpkg_triplet}"
193+
if [ -d "${vcpkg_re2_dir}/share/re2" ]; then
194+
jni_cmake_args="${jni_cmake_args} -Dre2_ROOT=${vcpkg_re2_dir}"
195+
fi
196+
elif [ -d "${build_dir}/cpp/re2_ep-install" ]; then
197+
jni_cmake_args="${jni_cmake_args} -Dre2_ROOT=${build_dir}/cpp/re2_ep-install"
198+
fi
199+
200+
export JAVA_JNI_CMAKE_ARGS="${jni_cmake_args}"
129201
"${source_dir}/ci/scripts/jni_build.sh" \
130202
"${source_dir}" \
131203
"${install_dir}" \

0 commit comments

Comments
 (0)