Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion contrib/arrow
Submodule arrow updated 6189 files
69 changes: 52 additions & 17 deletions contrib/arrow-cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,25 @@ if (NOT ENABLE_PARQUET)
return()
endif()

# Freebsd: ../contrib/arrow/cpp/src/arrow/util/bit-util.h:27:10: fatal error: endian.h: No such file or directory
if (OS_FREEBSD)
message (FATAL_ERROR "Using internal parquet library on FreeBSD is not supported")
# Support C11
if(NOT DEFINED CMAKE_C_STANDARD)
set(CMAKE_C_STANDARD 11)
endif()

# This ensures that a standard higher than the minimum can be passed correctly
if(NOT DEFINED CMAKE_CXX_STANDARD)
set(CMAKE_CXX_STANDARD 20)
elseif(${CMAKE_CXX_STANDARD} VERSION_LESS 20)
message(FATAL_ERROR "Cannot set a CMAKE_CXX_STANDARD smaller than 20")
endif()

set (CMAKE_CXX_STANDARD 17)
# We require a C++20 compliant compiler
set(CMAKE_CXX_STANDARD_REQUIRED ON)

set(ARROW_VERSION "11.0.0")
set(ARROW_VERSION "23.0.0")
string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}")

set(ARROW_VERSION_MAJOR "11")
set(ARROW_VERSION_MAJOR "23")
set(ARROW_VERSION_MINOR "0")
set(ARROW_VERSION_PATCH "0")

Expand Down Expand Up @@ -221,8 +229,8 @@ target_include_directories(_orc SYSTEM PRIVATE
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/arrow")

set(ARROW_GENERATED_SRC_DIR ${CMAKE_CURRENT_BINARY_DIR}/cpp/src)
configure_file(${LIBRARY_DIR}/util/config.h.cmake ${ARROW_GENERATED_SRC_DIR}/arrow/util/config.h)

configure_file("${LIBRARY_DIR}/util/config.h.cmake" "${ARROW_GENERATED_SRC_DIR}/arrow/util/config.h" ESCAPE_QUOTES)
configure_file("${LIBRARY_DIR}/util/config_internal.h.cmake" "${ARROW_GENERATED_SRC_DIR}/arrow/util/config_internal.h" ESCAPE_QUOTES)

# arrow/cpp/src/arrow/CMakeLists.txt (ARROW_SRCS + ARROW_COMPUTE + ARROW_IPC)
# find . \( -iname \*.cc -o -iname \*.cpp -o -iname \*.c \) | sort | awk '{print "\"${LIBRARY_DIR}" substr($1,2) "\"" }' | grep -v 'test.cc' | grep -v 'json' | grep -v 'flight' \|
Expand Down Expand Up @@ -250,6 +258,7 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/array/concatenate.cc"
"${LIBRARY_DIR}/array/data.cc"
"${LIBRARY_DIR}/array/diff.cc"
"${LIBRARY_DIR}/array/statistics.cc"
"${LIBRARY_DIR}/array/util.cc"
"${LIBRARY_DIR}/array/validate.cc"
"${LIBRARY_DIR}/buffer.cc"
Expand All @@ -276,7 +285,6 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/compute/kernels/codegen_internal.cc"
"${LIBRARY_DIR}/compute/kernels/hash_aggregate.cc"
"${LIBRARY_DIR}/compute/kernels/ree_util_internal.cc"
"${LIBRARY_DIR}/compute/kernels/row_encoder.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_arithmetic.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_boolean.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_cast_boolean.cc"
Expand All @@ -298,6 +306,7 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/compute/kernels/scalar_temporal_binary.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_temporal_unary.cc"
"${LIBRARY_DIR}/compute/kernels/scalar_validity.cc"
"${LIBRARY_DIR}/compute/kernels/temporal_internal.cc"
"${LIBRARY_DIR}/compute/kernels/util_internal.cc"
"${LIBRARY_DIR}/compute/kernels/vector_array_sort.cc"
"${LIBRARY_DIR}/compute/kernels/vector_cumulative_ops.cc"
Expand All @@ -313,6 +322,7 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/compute/kernels/vector_selection_internal.cc"
"${LIBRARY_DIR}/compute/kernels/vector_selection_take_internal.cc"
"${LIBRARY_DIR}/compute/kernels/vector_sort.cc"
"${LIBRARY_DIR}/compute/kernels/vector_swizzle.cc"
"${LIBRARY_DIR}/compute/key_hash_internal.cc"
"${LIBRARY_DIR}/compute/key_map_internal.cc"
"${LIBRARY_DIR}/compute/light_array_internal.cc"
Expand All @@ -326,7 +336,11 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/config.cc"
"${LIBRARY_DIR}/datum.cc"
"${LIBRARY_DIR}/device.cc"
"${LIBRARY_DIR}/device_allocation_type_set.cc"
"${LIBRARY_DIR}/extension_type.cc"
"${LIBRARY_DIR}/extension/bool8.cc"
"${LIBRARY_DIR}/extension/json.cc"
"${LIBRARY_DIR}/extension/uuid.cc"
"${LIBRARY_DIR}/integration/c_data_integration_internal.cc"
"${LIBRARY_DIR}/io/buffered.cc"
"${LIBRARY_DIR}/io/caching.cc"
Expand Down Expand Up @@ -374,7 +388,10 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/util/bitmap_builders.cc"
"${LIBRARY_DIR}/util/bitmap_ops.cc"
"${LIBRARY_DIR}/util/bpacking.cc"
"${LIBRARY_DIR}/util/bpacking_scalar.cc"
"${LIBRARY_DIR}/util/bpacking_simd_default.cc"
"${LIBRARY_DIR}/util/byte_size.cc"
"${LIBRARY_DIR}/util/byte_stream_split_internal.cc"
"${LIBRARY_DIR}/util/cancel.cc"
"${LIBRARY_DIR}/util/compression.cc"
"${LIBRARY_DIR}/util/counting_semaphore.cc"
Expand All @@ -384,20 +401,25 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/util/decimal.cc"
"${LIBRARY_DIR}/util/delimiting.cc"
"${LIBRARY_DIR}/util/dict_util.cc"
"${LIBRARY_DIR}/util/fixed_width_internal.cc"
"${LIBRARY_DIR}/util/float16.cc"
"${LIBRARY_DIR}/util/formatting.cc"
"${LIBRARY_DIR}/util/future.cc"
"${LIBRARY_DIR}/util/fuzz_internal.cc"
"${LIBRARY_DIR}/util/hashing.cc"
"${LIBRARY_DIR}/util/int_util.cc"
"${LIBRARY_DIR}/util/io_util.cc"
"${LIBRARY_DIR}/util/key_value_metadata.cc"
"${LIBRARY_DIR}/util/list_util.cc"
"${LIBRARY_DIR}/util/logger.cc"
"${LIBRARY_DIR}/util/logging.cc"
"${LIBRARY_DIR}/util/math_internal.cc"
"${LIBRARY_DIR}/util/memory.cc"
"${LIBRARY_DIR}/util/mutex.cc"
"${LIBRARY_DIR}/util/ree_util.cc"
"${LIBRARY_DIR}/util/secure_string.cc"
"${LIBRARY_DIR}/util/string.cc"
"${LIBRARY_DIR}/util/string_builder.cc"
"${LIBRARY_DIR}/util/string_util.cc"
"${LIBRARY_DIR}/util/task_group.cc"
"${LIBRARY_DIR}/util/tdigest.cc"
"${LIBRARY_DIR}/util/thread_pool.cc"
Expand All @@ -410,7 +432,7 @@ set(ARROW_SRCS
"${LIBRARY_DIR}/util/utf8.cc"
"${LIBRARY_DIR}/util/value_parsing.cc"
"${LIBRARY_DIR}/vendored/base64.cpp"
"${LIBRARY_DIR}/vendored/datetime/tz.cpp"
"${LIBRARY_DIR}/vendored/datetime.cpp"
"${LIBRARY_DIR}/vendored/double-conversion/bignum-dtoa.cc"
"${LIBRARY_DIR}/vendored/double-conversion/bignum.cc"
"${LIBRARY_DIR}/vendored/double-conversion/cached-powers.cc"
Expand Down Expand Up @@ -442,6 +464,12 @@ set(ARROW_SRCS
"${ARROW_SRC_DIR}/arrow/adapters/orc/options.cc"
)

add_definitions(-DARROW_WITH_BROTLI)
SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_brotli.cc" ${ARROW_SRCS})

add_definitions(-DARROW_WITH_BZ2)
SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_bz2.cc" ${ARROW_SRCS})

add_definitions(-DARROW_WITH_LZ4)
SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_lz4.cc" ${ARROW_SRCS})

Expand All @@ -454,9 +482,6 @@ SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zlib.cc" ${ARROW_SRCS})
add_definitions(-DARROW_WITH_ZSTD)
SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_zstd.cc" ${ARROW_SRCS})

add_definitions(-DARROW_WITH_BROTLI)
SET(ARROW_SRCS "${LIBRARY_DIR}/util/compression_brotli.cc" ${ARROW_SRCS})


add_library(_arrow ${ARROW_SRCS})
add_library(ch_contrib::arrow ALIAS _arrow)
Expand All @@ -473,6 +498,8 @@ target_link_libraries(_arrow PRIVATE
ch_contrib::zlib
ch_contrib::zstd
ch_contrib::brotli
ch_contrib::bzip2
ch_contrib::curl
)
target_link_libraries(_arrow PUBLIC _orc)

Expand All @@ -493,19 +520,23 @@ set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/parquet")
set(GEN_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/generated")
# arrow/cpp/src/parquet/CMakeLists.txt
set(PARQUET_SRCS
"${LIBRARY_DIR}/arrow/fuzz_internal.cc"
"${LIBRARY_DIR}/arrow/path_internal.cc"
"${LIBRARY_DIR}/arrow/reader.cc"
"${LIBRARY_DIR}/arrow/reader_internal.cc"
"${LIBRARY_DIR}/arrow/schema.cc"
"${LIBRARY_DIR}/arrow/schema_internal.cc"
"${LIBRARY_DIR}/arrow/variant_internal.cc"
"${LIBRARY_DIR}/arrow/writer.cc"
"${LIBRARY_DIR}/benchmark_util.cc"
"${LIBRARY_DIR}/bloom_filter.cc"
"${LIBRARY_DIR}/bloom_filter_reader.cc"
"${LIBRARY_DIR}/chunker_internal.cc"
"${LIBRARY_DIR}/column_reader.cc"
"${LIBRARY_DIR}/column_scanner.cc"
"${LIBRARY_DIR}/column_writer.cc"
"${LIBRARY_DIR}/encoding.cc"
"${LIBRARY_DIR}/decoder.cc"
"${LIBRARY_DIR}/encoder.cc"
"${LIBRARY_DIR}/encryption/crypto_factory.cc"
"${LIBRARY_DIR}/encryption/encryption.cc"
"${LIBRARY_DIR}/encryption/encryption_internal.cc"
Expand All @@ -525,6 +556,9 @@ set(PARQUET_SRCS
"${LIBRARY_DIR}/exception.cc"
"${LIBRARY_DIR}/file_reader.cc"
"${LIBRARY_DIR}/file_writer.cc"
"${LIBRARY_DIR}/geospatial/statistics.cc"
"${LIBRARY_DIR}/geospatial/util_internal.cc"
"${LIBRARY_DIR}/geospatial/util_json_internal.cc"
"${LIBRARY_DIR}/level_comparison.cc"
"${LIBRARY_DIR}/level_comparison_avx2.cc"
"${LIBRARY_DIR}/level_conversion.cc"
Expand All @@ -535,13 +569,13 @@ set(PARQUET_SRCS
"${LIBRARY_DIR}/printer.cc"
"${LIBRARY_DIR}/properties.cc"
"${LIBRARY_DIR}/schema.cc"
"${LIBRARY_DIR}/size_statistics.cc"
"${LIBRARY_DIR}/statistics.cc"
"${LIBRARY_DIR}/stream_reader.cc"
"${LIBRARY_DIR}/stream_writer.cc"
"${LIBRARY_DIR}/types.cc"
"${LIBRARY_DIR}/xxhasher.cc"

"${GEN_LIBRARY_DIR}/parquet_constants.cpp"
"${GEN_LIBRARY_DIR}/parquet_types.cpp"
)
#list(TRANSFORM PARQUET_SRCS PREPEND "${LIBRARY_DIR}/") # cmake 3.12
Expand All @@ -558,7 +592,8 @@ target_link_libraries(_parquet
PRIVATE
boost::headers_only
boost::regex
OpenSSL::Crypto OpenSSL::SSL)
OpenSSL::Crypto OpenSSL::SSL
ch_contrib::rapidjson)

if (SANITIZE STREQUAL "undefined")
target_compile_options(_parquet PRIVATE -fno-sanitize=undefined)
Expand Down
29 changes: 18 additions & 11 deletions contrib/arrow-cmake/cpp/src/arrow/util/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,21 @@
// specific language governing permissions and limitations
// under the License.

#define ARROW_VERSION_MAJOR 11
#define ARROW_VERSION_MAJOR 23
#define ARROW_VERSION_MINOR 0
#define ARROW_VERSION_PATCH 0
#define ARROW_VERSION ((ARROW_VERSION_MAJOR * 1000) + ARROW_VERSION_MINOR) * 1000 + ARROW_VERSION_PATCH

#define ARROW_VERSION_STRING "11.0.0"
#define ARROW_VERSION_STRING "23.0.0"

#define ARROW_SO_VERSION "1100"
#define ARROW_FULL_SO_VERSION "1100.0.0"
#define ARROW_SO_VERSION "2300"
#define ARROW_FULL_SO_VERSION "2300.0.0"

#define ARROW_CXX_COMPILER_ID "Clang"
#define ARROW_CXX_COMPILER_VERSION "ClickHouse"
#define ARROW_CXX_COMPILER_FLAGS ""

#define ARROW_BUILD_TYPE ""

#define ARROW_GIT_ID ""
#define ARROW_GIT_DESCRIPTION ""

#define ARROW_PACKAGE_KIND ""

/* #undef ARROW_COMPUTE */
Expand All @@ -47,15 +43,26 @@
/* #undef ARROW_JEMALLOC */
/* #undef ARROW_JEMALLOC_VENDORED */
/* #undef ARROW_JSON */
/* #undef ARROW_MIMALLOC */
/* #undef ARROW_ORC */
/* #undef ARROW_PARQUET */
/* #undef ARROW_SUBSTRAIT */

/* #undef ARROW_AZURE */
/* #undef ARROW_ENABLE_THREADING */
/* #undef ARROW_GCS */
/* #undef ARROW_HDFS */
/* #undef ARROW_S3 */
/* #undef ARROW_USE_GLOG */
/* #undef ARROW_USE_NATIVE_INT128 */
/* #undef ARROW_WITH_BROTLI */
/* #undef ARROW_WITH_BZ2 */
/* #undef ARROW_WITH_LZ4 */
/* #undef ARROW_WITH_MUSL */
/* #undef ARROW_WITH_OPENTELEMETRY */
/* #undef ARROW_WITH_UCX */

/* #undef GRPCPP_PP_INCLUDE */
/* #undef ARROW_WITH_RE2 */
/* #undef ARROW_WITH_SNAPPY */
/* #undef ARROW_WITH_UTF8PROC */
/* #undef ARROW_WITH_ZLIB */
/* #undef ARROW_WITH_ZSTD */
/* #undef PARQUET_REQUIRE_ENCRYPTION */
24 changes: 24 additions & 0 deletions contrib/arrow-cmake/cpp/src/arrow/util/config_internal.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

// These variables are not exposed as they can make compilation caching
// and increment builds less efficient.

#define ARROW_CXX_COMPILER_FLAGS ""

#define ARROW_GIT_ID ""
#define ARROW_GIT_DESCRIPTION ""
11 changes: 5 additions & 6 deletions contrib/arrow-cmake/flight.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,6 @@ add_custom_command(
# protobuf-internal.cc
set(ARROW_FLIGHT_SRCS
${ARROW_FLIGHT_GENERATED_SRC_DIR}/Flight.pb.cc
${ARROW_FLIGHT_SRC_DIR}/transport/grpc/grpc_client.cc
${ARROW_FLIGHT_SRC_DIR}/transport/grpc/grpc_server.cc
${ARROW_FLIGHT_SRC_DIR}/transport/grpc/protocol_grpc_internal.cc
${ARROW_FLIGHT_SRC_DIR}/transport/grpc/serialization_internal.cc
${ARROW_FLIGHT_SRC_DIR}/transport/grpc/util_internal.cc
${ARROW_FLIGHT_SRC_DIR}/client.cc
${ARROW_FLIGHT_SRC_DIR}/client_cookie_middleware.cc
${ARROW_FLIGHT_SRC_DIR}/client_tracing_middleware.cc
Expand All @@ -50,10 +45,14 @@ set(ARROW_FLIGHT_SRCS
${ARROW_FLIGHT_SRC_DIR}/serialization_internal.cc
${ARROW_FLIGHT_SRC_DIR}/server.cc
${ARROW_FLIGHT_SRC_DIR}/server_auth.cc
${ARROW_FLIGHT_SRC_DIR}/server_middleware.cc
${ARROW_FLIGHT_SRC_DIR}/server_tracing_middleware.cc
${ARROW_FLIGHT_SRC_DIR}/transport.cc
${ARROW_FLIGHT_SRC_DIR}/transport_server.cc
${ARROW_FLIGHT_SRC_DIR}/transport/grpc/grpc_client.cc
${ARROW_FLIGHT_SRC_DIR}/transport/grpc/grpc_server.cc
${ARROW_FLIGHT_SRC_DIR}/transport/grpc/protocol_grpc_internal.cc
${ARROW_FLIGHT_SRC_DIR}/transport/grpc/serialization_internal.cc
${ARROW_FLIGHT_SRC_DIR}/transport/grpc/util_internal.cc
${ARROW_FLIGHT_SRC_DIR}/types.cc
)

Expand Down
2 changes: 1 addition & 1 deletion contrib/flatbuffers
Submodule flatbuffers updated 572 files
4 changes: 4 additions & 0 deletions docs/en/sql-reference/statements/system.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ Clears the mark cache.

Clears the iceberg metadata cache.

## SYSTEM DROP PARQUET METADATA CACHE {#drop-parquet-metadata-cache}

Clears the parquet metadata cache.

## SYSTEM CLEAR|DROP TEXT INDEX CACHES {#drop-text-index-caches}

Clears the text index's header, dictionary and postings caches.
Expand Down
16 changes: 16 additions & 0 deletions programs/local/LocalServer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,10 @@ namespace ServerSetting
extern const ServerSettingsUInt64 iceberg_metadata_files_cache_size;
extern const ServerSettingsUInt64 iceberg_metadata_files_cache_max_entries;
extern const ServerSettingsDouble iceberg_metadata_files_cache_size_ratio;
extern const ServerSettingsString parquet_metadata_cache_policy;
extern const ServerSettingsUInt64 parquet_metadata_cache_size;
extern const ServerSettingsUInt64 parquet_metadata_cache_max_entries;
extern const ServerSettingsDouble parquet_metadata_cache_size_ratio;
extern const ServerSettingsUInt64 max_active_parts_loading_thread_pool_size;
extern const ServerSettingsUInt64 max_io_thread_pool_free_size;
extern const ServerSettingsUInt64 max_io_thread_pool_size;
Expand Down Expand Up @@ -972,6 +976,18 @@ void LocalServer::processConfig()
}
global_context->setIcebergMetadataFilesCache(iceberg_metadata_files_cache_policy, iceberg_metadata_files_cache_size, iceberg_metadata_files_cache_max_entries, iceberg_metadata_files_cache_size_ratio);
#endif
#if USE_PARQUET
String parquet_metadata_cache_policy = server_settings[ServerSetting::parquet_metadata_cache_policy];
size_t parquet_metadata_cache_size = server_settings[ServerSetting::parquet_metadata_cache_size];
size_t parquet_metadata_cache_max_entries = server_settings[ServerSetting::parquet_metadata_cache_max_entries];
double parquet_metadata_cache_size_ratio = server_settings[ServerSetting::parquet_metadata_cache_size_ratio];
if (parquet_metadata_cache_size > max_cache_size)
{
parquet_metadata_cache_size = max_cache_size;
LOG_INFO(log, "Lowered Parquet metadata cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(parquet_metadata_cache_size));
}
global_context->setParquetMetadataCache(parquet_metadata_cache_policy, parquet_metadata_cache_size, parquet_metadata_cache_max_entries, parquet_metadata_cache_size_ratio);
#endif

Names allowed_disks_table_engines;
splitInto<','>(allowed_disks_table_engines, server_settings[ServerSetting::allowed_disks_for_table_engines].value);
Expand Down
Loading
Loading