Skip to content

Commit f16b6ac

Browse files
authored
GH-47399: [C++] Update bundled Apache ORC to 2.2.0 with Protobuf patch (#47408)
### Rationale for this change apache/orc#2357 is needed for recent Protobuf. ### What changes are included in this PR? * Update bundled Apache ORC to 2.2.0. * apache/orc#2345 is needed for MSVC. * Apply apache/orc#2357 because it's not released yet. * Enable `CMP0077` for bundled dependencies by default to avoid `set(CACHE)`. ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. * GitHub Issue: #47399 Authored-by: Sutou Kouhei <[email protected]> Signed-off-by: Raúl Cumplido <[email protected]>
1 parent a9cdeaf commit f16b6ac

File tree

4 files changed

+187
-65
lines changed

4 files changed

+187
-65
lines changed

cpp/cmake_modules/ThirdpartyToolchain.cmake

Lines changed: 56 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1024,6 +1024,14 @@ macro(prepare_fetchcontent)
10241024
# We should remove it once we have updated the dependencies:
10251025
# https://github.com/apache/arrow/issues/45985
10261026
set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
1027+
# Use "NEW" for CMP0077 by default.
1028+
#
1029+
# https://cmake.org/cmake/help/latest/policy/CMP0077.html
1030+
#
1031+
# option() honors normal variables.
1032+
set(CMAKE_POLICY_DEFAULT_CMP0077
1033+
NEW
1034+
CACHE STRING "")
10271035
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "")
10281036

10291037
if(MSVC)
@@ -4599,8 +4607,26 @@ target_include_directories(arrow::hadoop INTERFACE "${HADOOP_HOME}/include")
45994607
# Apache ORC
46004608

46014609
function(build_orc)
4610+
list(APPEND CMAKE_MESSAGE_INDENT "Apache ORC: ")
4611+
46024612
message(STATUS "Building Apache ORC from source")
46034613

4614+
set(ORC_PATCHES)
4615+
if(MSVC)
4616+
# We can remove this once bundled Apache ORC is 2.2.1 or later.
4617+
list(APPEND ORC_PATCHES ${CMAKE_CURRENT_LIST_DIR}/orc-2345.patch)
4618+
endif()
4619+
if(Protobuf_VERSION VERSION_GREATER_EQUAL 32.0)
4620+
# We can remove this once bundled Apache ORC is 2.2.1 or later.
4621+
list(APPEND ORC_PATCHES ${CMAKE_CURRENT_LIST_DIR}/orc-2357.patch)
4622+
endif()
4623+
if(ORC_PATCHES)
4624+
find_program(PATCH patch REQUIRED)
4625+
set(ORC_PATCH_COMMAND ${PATCH} -p1 -i ${ORC_PATCHES})
4626+
else()
4627+
set(ORC_PATCH_COMMAND)
4628+
endif()
4629+
46044630
if(LZ4_VENDORED)
46054631
set(ORC_LZ4_TARGET lz4_static)
46064632
set(ORC_LZ4_ROOT "${lz4_SOURCE_DIR}")
@@ -4615,98 +4641,62 @@ function(build_orc)
46154641
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.29)
46164642
fetchcontent_declare(orc
46174643
${FC_DECLARE_COMMON_OPTIONS}
4644+
PATCH_COMMAND ${ORC_PATCH_COMMAND}
46184645
URL ${ORC_SOURCE_URL}
46194646
URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}")
46204647
prepare_fetchcontent()
46214648

46224649
set(CMAKE_UNITY_BUILD FALSE)
46234650

4624-
set(ORC_PREFER_STATIC_LZ4
4625-
OFF
4626-
CACHE BOOL "" FORCE)
4627-
set(LZ4_HOME
4628-
"${ORC_LZ4_ROOT}"
4629-
CACHE STRING "" FORCE)
4630-
set(LZ4_INCLUDE_DIR
4631-
"${ORC_LZ4_INCLUDE_DIR}"
4632-
CACHE STRING "" FORCE)
4633-
set(LZ4_LIBRARY
4634-
${ORC_LZ4_TARGET}
4635-
CACHE STRING "" FORCE)
4651+
set(ORC_PREFER_STATIC_LZ4 OFF)
4652+
set(LZ4_HOME "${ORC_LZ4_ROOT}")
4653+
set(LZ4_INCLUDE_DIR "${ORC_LZ4_INCLUDE_DIR}")
4654+
set(LZ4_LIBRARY ${ORC_LZ4_TARGET})
46364655

4637-
set(ORC_PREFER_STATIC_PROTOBUF
4638-
OFF
4639-
CACHE BOOL "" FORCE)
4656+
set(ORC_PREFER_STATIC_PROTOBUF OFF)
46404657
get_target_property(PROTOBUF_INCLUDE_DIR ${ARROW_PROTOBUF_LIBPROTOBUF}
46414658
INTERFACE_INCLUDE_DIRECTORIES)
46424659
get_filename_component(Protobuf_ROOT "${PROTOBUF_INCLUDE_DIR}" DIRECTORY)
4643-
set(PROTOBUF_HOME
4644-
${Protobuf_ROOT}
4645-
CACHE STRING "" FORCE)
4660+
set(PROTOBUF_HOME ${Protobuf_ROOT})
46464661
# ORC uses this.
46474662
target_include_directories(${ARROW_PROTOBUF_LIBPROTOC}
46484663
INTERFACE "${PROTOBUF_INCLUDE_DIR}")
46494664
set(PROTOBUF_EXECUTABLE ${ARROW_PROTOBUF_PROTOC})
46504665
set(PROTOBUF_LIBRARY ${ARROW_PROTOBUF_LIBPROTOBUF})
46514666
set(PROTOC_LIBRARY ${ARROW_PROTOBUF_LIBPROTOC})
46524667

4653-
set(ORC_PREFER_STATIC_SNAPPY
4654-
OFF
4655-
CACHE BOOL "" FORCE)
4668+
set(ORC_PREFER_STATIC_SNAPPY OFF)
46564669
get_target_property(SNAPPY_INCLUDE_DIR ${Snappy_TARGET} INTERFACE_INCLUDE_DIRECTORIES)
46574670
get_filename_component(Snappy_ROOT "${SNAPPY_INCLUDE_DIR}" DIRECTORY)
4658-
set(SNAPPY_HOME
4659-
${Snappy_ROOT}
4660-
CACHE STRING "" FORCE)
4661-
set(SNAPPY_LIBRARY
4662-
${Snappy_TARGET}
4663-
CACHE STRING "" FORCE)
4671+
set(SNAPPY_HOME ${Snappy_ROOT})
4672+
set(SNAPPY_LIBRARY ${Snappy_TARGET})
46644673

4665-
set(ORC_PREFER_STATIC_ZLIB
4666-
OFF
4667-
CACHE BOOL "" FORCE)
4674+
set(ORC_PREFER_STATIC_ZLIB OFF)
46684675
get_target_property(ZLIB_INCLUDE_DIR ZLIB::ZLIB INTERFACE_INCLUDE_DIRECTORIES)
46694676
get_filename_component(ZLIB_ROOT "${ZLIB_INCLUDE_DIR}" DIRECTORY)
4670-
set(ZLIB_HOME
4671-
${ZLIB_ROOT}
4672-
CACHE STRING "" FORCE)
4673-
# From CMake 3.21 onwards the set(CACHE) command does not remove any normal
4674-
# variable of the same name from the current scope. We have to manually remove
4675-
# the variable via unset to avoid ORC not finding the ZLIB_LIBRARY.
4677+
set(ZLIB_HOME ${ZLIB_ROOT})
4678+
# From CMake 3.21 onwards the set(CACHE) command does not remove
4679+
# any normal variable of the same name from the current scope. We
4680+
# have to manually remove the variable via unset to avoid ORC not
4681+
# finding the ZLIB_LIBRARY.
46764682
unset(ZLIB_LIBRARY)
46774683
set(ZLIB_LIBRARY
46784684
ZLIB::ZLIB
46794685
CACHE STRING "" FORCE)
46804686

4681-
set(ORC_PREFER_STATIC_ZSTD
4682-
OFF
4683-
CACHE BOOL "" FORCE)
4687+
set(ORC_PREFER_STATIC_ZSTD OFF)
46844688
get_target_property(ZSTD_INCLUDE_DIR ${ARROW_ZSTD_LIBZSTD}
46854689
INTERFACE_INCLUDE_DIRECTORIES)
46864690
get_filename_component(ZSTD_ROOT "${ZSTD_INCLUDE_DIR}" DIRECTORY)
4687-
set(ZSTD_HOME
4688-
${ZSTD_ROOT}
4689-
CACHE STRING "" FORCE)
4691+
set(ZSTD_HOME ${ZSTD_ROOT})
46904692
set(ZSTD_LIBRARY ${ARROW_ZSTD_LIBZSTD})
46914693

4692-
set(BUILD_CPP_TESTS
4693-
OFF
4694-
CACHE BOOL "" FORCE)
4695-
set(BUILD_JAVA
4696-
OFF
4697-
CACHE BOOL "" FORCE)
4698-
set(BUILD_LIBHDFSPP
4699-
OFF
4700-
CACHE BOOL "" FORCE)
4701-
set(BUILD_TOOLS
4702-
OFF
4703-
CACHE BOOL "" FORCE)
4704-
set(INSTALL_VENDORED_LIBS
4705-
OFF
4706-
CACHE BOOL "" FORCE)
4707-
set(STOP_BUILD_ON_WARNING
4708-
OFF
4709-
CACHE BOOL "" FORCE)
4694+
set(BUILD_CPP_TESTS OFF)
4695+
set(BUILD_JAVA OFF)
4696+
set(BUILD_LIBHDFSPP OFF)
4697+
set(BUILD_TOOLS OFF)
4698+
set(INSTALL_VENDORED_LIBS OFF)
4699+
set(STOP_BUILD_ON_WARNING OFF)
47104700

47114701
fetchcontent_makeavailable(orc)
47124702

@@ -4769,16 +4759,17 @@ function(build_orc)
47694759

47704760
externalproject_add(orc_ep
47714761
${EP_COMMON_OPTIONS}
4772-
URL ${ORC_SOURCE_URL}
4773-
URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}"
47744762
BUILD_BYPRODUCTS ${ORC_STATIC_LIB}
47754763
CMAKE_ARGS ${ORC_CMAKE_ARGS}
47764764
DEPENDS ${ARROW_PROTOBUF_LIBPROTOBUF}
47774765
${ARROW_PROTOBUF_PROTOC}
47784766
${ARROW_ZSTD_LIBZSTD}
47794767
${Snappy_TARGET}
47804768
${ORC_LZ4_TARGET}
4781-
ZLIB::ZLIB)
4769+
ZLIB::ZLIB
4770+
PATCH_COMMAND ${ORC_PATCH_COMMAND}
4771+
URL ${ORC_SOURCE_URL}
4772+
URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}")
47824773
add_library(orc::orc STATIC IMPORTED)
47834774
set_target_properties(orc::orc PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}")
47844775
target_include_directories(orc::orc BEFORE INTERFACE "${ORC_INCLUDE_DIR}")
@@ -4806,6 +4797,8 @@ function(build_orc)
48064797
set(ARROW_BUNDLED_STATIC_LIBS
48074798
${ARROW_BUNDLED_STATIC_LIBS}
48084799
PARENT_SCOPE)
4800+
4801+
list(POP_BACK CMAKE_MESSAGE_INDENT)
48094802
endfunction()
48104803

48114804
if(ARROW_ORC)

cpp/cmake_modules/orc-2345.patch

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
From a76249e13a6e364e0507a12cb71abaaf1647252e Mon Sep 17 00:00:00 2001
19+
From: Yuriy Chernyshov <[email protected]>
20+
Date: Thu, 31 Jul 2025 13:20:15 +0200
21+
Subject: [PATCH] Fix Windows build
22+
23+
See
24+
https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/byteswap-uint64-byteswap-ulong-byteswap-ushort?view=msvc-170
25+
---
26+
c++/src/Geospatial.cc | 4 ++--
27+
1 file changed, 2 insertions(+), 2 deletions(-)
28+
29+
diff --git a/c++/src/Geospatial.cc b/c++/src/Geospatial.cc
30+
index 6d7d268703..2b110cacb6 100644
31+
--- a/c++/src/Geospatial.cc
32+
+++ b/c++/src/Geospatial.cc
33+
@@ -66,8 +66,8 @@ namespace orc::geospatial {
34+
35+
#if defined(_MSC_VER)
36+
#include <intrin.h> // IWYU pragma: keep
37+
-#define ORC_BYTE_SWAP64 _byteSwap_uint64
38+
-#define ORC_BYTE_SWAP32 _byteSwap_ulong
39+
+#define ORC_BYTE_SWAP64 _byteswap_uint64
40+
+#define ORC_BYTE_SWAP32 _byteswap_ulong
41+
#else
42+
#define ORC_BYTE_SWAP64 __builtin_bswap64
43+
#define ORC_BYTE_SWAP32 __builtin_bswap32

cpp/cmake_modules/orc-2357.patch

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
From a66baec5731b65a81189f48c242433d01580f344 Mon Sep 17 00:00:00 2001
19+
From: Dongjoon Hyun <[email protected]>
20+
Date: Fri, 15 Aug 2025 12:31:09 -0700
21+
Subject: [PATCH] ORC-1973: [C++] Use `int64_t` instead of
22+
`google::protobuf::int64`
23+
24+
---
25+
c++/src/io/InputStream.cc | 4 ++--
26+
c++/src/io/InputStream.hh | 2 +-
27+
c++/src/io/OutputStream.cc | 4 ++--
28+
c++/src/io/OutputStream.hh | 2 +-
29+
4 files changed, 6 insertions(+), 6 deletions(-)
30+
31+
diff --git a/c++/src/io/InputStream.cc b/c++/src/io/InputStream.cc
32+
index 06ef40bd4c..5e1dc00ccd 100644
33+
--- a/c++/src/io/InputStream.cc
34+
+++ b/c++/src/io/InputStream.cc
35+
@@ -112,8 +112,8 @@ namespace orc {
36+
return false;
37+
}
38+
39+
- google::protobuf::int64 SeekableArrayInputStream::ByteCount() const {
40+
- return static_cast<google::protobuf::int64>(position_);
41+
+ int64_t SeekableArrayInputStream::ByteCount() const {
42+
+ return static_cast<int64_t>(position_);
43+
}
44+
45+
void SeekableArrayInputStream::seek(PositionProvider& seekPosition) {
46+
diff --git a/c++/src/io/InputStream.hh b/c++/src/io/InputStream.hh
47+
index 07aa623b5f..8b251c9301 100644
48+
--- a/c++/src/io/InputStream.hh
49+
+++ b/c++/src/io/InputStream.hh
50+
@@ -72,7 +72,7 @@ namespace orc {
51+
virtual bool Next(const void** data, int* size) override;
52+
virtual void BackUp(int count) override;
53+
virtual bool Skip(int count) override;
54+
- virtual google::protobuf::int64 ByteCount() const override;
55+
+ virtual int64_t ByteCount() const override;
56+
virtual void seek(PositionProvider& position) override;
57+
virtual std::string getName() const override;
58+
};
59+
diff --git a/c++/src/io/OutputStream.cc b/c++/src/io/OutputStream.cc
60+
index fbf1ca61dd..a55050d122 100644
61+
--- a/c++/src/io/OutputStream.cc
62+
+++ b/c++/src/io/OutputStream.cc
63+
@@ -65,8 +65,8 @@ namespace orc {
64+
// PASS
65+
}
66+
67+
- google::protobuf::int64 BufferedOutputStream::ByteCount() const {
68+
- return static_cast<google::protobuf::int64>(dataBuffer_->size());
69+
+ int64_t BufferedOutputStream::ByteCount() const {
70+
+ return static_cast<int64_t>(dataBuffer_->size());
71+
}
72+
73+
bool BufferedOutputStream::WriteAliasedRaw(const void*, int) {
74+
diff --git a/c++/src/io/OutputStream.hh b/c++/src/io/OutputStream.hh
75+
index 6319de96d6..b029818125 100644
76+
--- a/c++/src/io/OutputStream.hh
77+
+++ b/c++/src/io/OutputStream.hh
78+
@@ -61,7 +61,7 @@ namespace orc {
79+
80+
virtual bool Next(void** data, int* size) override;
81+
virtual void BackUp(int count) override;
82+
- virtual google::protobuf::int64 ByteCount() const override;
83+
+ virtual int64_t ByteCount() const override;
84+
virtual bool WriteAliasedRaw(const void* data, int size) override;
85+
virtual bool AllowsAliasing() const override;
86+

cpp/thirdparty/versions.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,8 @@ ARROW_OPENTELEMETRY_BUILD_VERSION=v1.21.0
9090
ARROW_OPENTELEMETRY_BUILD_SHA256_CHECKSUM=98e5546f577a11b52a57faed1f4cc60d8c1daa44760eba393f43eab5a8ec46a2
9191
ARROW_OPENTELEMETRY_PROTO_BUILD_VERSION=v1.7.0
9292
ARROW_OPENTELEMETRY_PROTO_BUILD_SHA256_CHECKSUM=11330d850f5e24d34c4246bc8cb21fcd311e7565d219195713455a576bb11bed
93-
ARROW_ORC_BUILD_VERSION=2.1.2
94-
ARROW_ORC_BUILD_SHA256_CHECKSUM=55451e65dea6ed42afb39fe33a88f9dcea8928dca0a0c9c23ef5545587810b4c
93+
ARROW_ORC_BUILD_VERSION=2.2.0
94+
ARROW_ORC_BUILD_SHA256_CHECKSUM=b15aca45a7e73ffbd1bbc36a78cd1422d41f07721092a25f43448e6e16f4763b
9595
ARROW_PROTOBUF_BUILD_VERSION=v21.3
9696
ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM=2f723218f6cb709ae4cdc4fb5ed56a5951fc5d466f0128ce4c946b8c78c8c49f
9797
# Because of https://github.com/Tencent/rapidjson/pull/1323, we require

0 commit comments

Comments
 (0)