diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index 55addaa666c6c9..7ab17efcaf0bde 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -469,6 +469,7 @@ include_directories( ${GENSRC_DIR}/ ${THIRDPARTY_DIR}/include ${GPERFTOOLS_HOME}/include + ${THIRDPARTY_DIR}/include/paimon # paimon-cpp headers ) if ("${DORIS_JAVA_HOME}" STREQUAL "") diff --git a/be/cmake/thirdparty.cmake b/be/cmake/thirdparty.cmake index 9bb7b8ba748769..3a0aa5b0896f89 100644 --- a/be/cmake/thirdparty.cmake +++ b/be/cmake/thirdparty.cmake @@ -179,3 +179,17 @@ add_thirdparty(icudata LIB64) add_thirdparty(pugixml LIB64) + +# paimon-cpp libraries +# Main paimon library +add_thirdparty(paimon_static LIB64) +# File format libraries +add_thirdparty(paimon_parquet_file_format_static LIB64) +add_thirdparty(paimon_orc_file_format_static LIB64) +add_thirdparty(paimon_avro_file_format_static LIB64) +add_thirdparty(paimon_blob_file_format_static LIB64) +# File system libraries +add_thirdparty(paimon_local_file_system_static LIB64) +# Index libraries +add_thirdparty(paimon_file_index_static LIB64) +add_thirdparty(paimon_global_index_static LIB64) diff --git a/be/src/vec/exec/format/table/paimon_cpp_reader.cpp b/be/src/vec/exec/format/table/paimon_cpp_reader.cpp new file mode 100644 index 00000000000000..fa658606df4a00 --- /dev/null +++ b/be/src/vec/exec/format/table/paimon_cpp_reader.cpp @@ -0,0 +1,266 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "paimon_cpp_reader.h" + +#include +#include + +#include "arrow/c/bridge.h" +#include "arrow/record_batch.h" +#include "arrow/result.h" +#include "paimon/defs.h" +#include "paimon/memory/memory_pool.h" +#include "paimon/read_context.h" +#include "paimon/table/source/table_read.h" +#include "vec/exec/format/table/paimon_doris_file_system.h" +#include "runtime/descriptors.h" +#include "runtime/runtime_state.h" +#include "util/url_coding.h" +#include "vec/core/block.h" +#include "vec/core/column_with_type_and_name.h" + +namespace doris::vectorized { +#include "common/compile_check_begin.h" + +namespace { +constexpr const char* VALUE_KIND_FIELD = "_VALUE_KIND"; + +} // namespace + +PaimonCppReader::PaimonCppReader(const std::vector& file_slot_descs, + RuntimeState* state, RuntimeProfile* profile, + const TFileRangeDesc& range, + const TFileScanRangeParams* range_params) + : _file_slot_descs(file_slot_descs), + _state(state), + _profile(profile), + _range(range), + _range_params(range_params) { + TimezoneUtils::find_cctz_time_zone(TimezoneUtils::default_time_zone, _ctzz); + if (range.__isset.table_format_params && + range.table_format_params.__isset.table_level_row_count) { + _remaining_table_level_row_count = range.table_format_params.table_level_row_count; + } else { + _remaining_table_level_row_count = -1; + } +} + +PaimonCppReader::~PaimonCppReader() = default; + +Status PaimonCppReader::init_reader() { + return _init_paimon_reader(); +} + +Status PaimonCppReader::get_next_block(Block* block, size_t* read_rows, bool* eof) { + if (_push_down_agg_type == TPushAggOp::type::COUNT && _remaining_table_level_row_count >= 0) { + auto rows = std::min(_remaining_table_level_row_count, + (int64_t)_state->query_options().batch_size); + _remaining_table_level_row_count -= rows; + auto mutate_columns = block->mutate_columns(); + for (auto& col : mutate_columns) { + col->resize(rows); + } + block->set_columns(std::move(mutate_columns)); + *read_rows = rows; + *eof = false; + if (_remaining_table_level_row_count == 0) { + *eof = true; + } + return Status::OK(); + } + + if (!_batch_reader) { + return Status::InternalError("paimon-cpp reader is not initialized"); + } + + if (_col_name_to_block_idx.empty()) { + _col_name_to_block_idx = block->get_name_to_pos_map(); + } + + auto batch_result = _batch_reader->NextBatch(); + if (!batch_result.ok()) { + return Status::InternalError("paimon-cpp read batch failed: {}", + batch_result.status().ToString()); + } + auto batch = std::move(batch_result).value(); + if (paimon::BatchReader::IsEofBatch(batch)) { + *read_rows = 0; + *eof = true; + return Status::OK(); + } + + arrow::Result> import_result = + arrow::ImportRecordBatch(batch.first.get(), batch.second.get()); + if (!import_result.ok()) { + return Status::InternalError("failed to import paimon-cpp arrow batch: {}", + import_result.status().message()); + } + + auto record_batch = std::move(import_result).ValueUnsafe(); + const auto num_rows = record_batch->num_rows(); + const auto num_columns = record_batch->num_columns(); + for (int c = 0; c < num_columns; ++c) { + const auto& field = record_batch->schema()->field(c); + if (field->name() == VALUE_KIND_FIELD) { + continue; + } + + auto it = _col_name_to_block_idx.find(field->name()); + if (it == _col_name_to_block_idx.end()) { + return Status::InternalError("paimon-cpp column {} not found in block", field->name()); + } + const vectorized::ColumnWithTypeAndName& column_with_name = + block->get_by_position(it->second); + try { + RETURN_IF_ERROR(column_with_name.type->get_serde()->read_column_from_arrow( + column_with_name.column->assume_mutable_ref(), record_batch->column(c).get(), 0, + num_rows, _ctzz)); + } catch (Exception& e) { + return Status::InternalError("Failed to convert from arrow to block: {}", e.what()); + } + } + + *read_rows = num_rows; + *eof = false; + return Status::OK(); +} + +Status PaimonCppReader::get_columns(std::unordered_map* name_to_type, + std::unordered_set* missing_cols) { + for (const auto& slot : _file_slot_descs) { + name_to_type->emplace(slot->col_name(), slot->type()); + } + return Status::OK(); +} + +Status PaimonCppReader::close() { + if (_batch_reader) { + _batch_reader->Close(); + } + return Status::OK(); +} + +Status PaimonCppReader::_init_paimon_reader() { + register_paimon_doris_file_system(); + RETURN_IF_ERROR(_decode_split(&_split)); + + auto table_path_opt = _resolve_table_path(); + if (!table_path_opt.has_value()) { + return Status::InternalError( + "paimon-cpp missing paimon_table; cannot resolve paimon table root path"); + } + auto options = _build_options(); + auto read_columns = _build_read_columns(); + + std::string table_path = std::move(table_path_opt.value()); + paimon::ReadContextBuilder builder(table_path); + if (!read_columns.empty()) { + builder.SetReadSchema(read_columns); + } + if (!options.empty()) { + builder.SetOptions(options); + } + + auto context_result = builder.Finish(); + if (!context_result.ok()) { + return Status::InternalError("paimon-cpp build read context failed: {}", + context_result.status().ToString()); + } + auto context = std::move(context_result).value(); + + auto table_read_result = paimon::TableRead::Create(std::move(context)); + if (!table_read_result.ok()) { + return Status::InternalError("paimon-cpp create table read failed: {}", + table_read_result.status().ToString()); + } + auto table_read = std::move(table_read_result).value(); + auto reader_result = table_read->CreateReader(_split); + if (!reader_result.ok()) { + return Status::InternalError("paimon-cpp create reader failed: {}", + reader_result.status().ToString()); + } + _table_read = std::move(table_read); + _batch_reader = std::move(reader_result).value(); + return Status::OK(); +} + +Status PaimonCppReader::_decode_split(std::shared_ptr* split) { + if (!_range.__isset.table_format_params || !_range.table_format_params.__isset.paimon_params || + !_range.table_format_params.paimon_params.__isset.paimon_split) { + return Status::InternalError("paimon-cpp missing paimon_split in scan range"); + } + const auto& encoded_split = _range.table_format_params.paimon_params.paimon_split; + std::string decoded_split; + if (!base64_decode(encoded_split, &decoded_split)) { + return Status::InternalError("paimon-cpp base64 decode paimon_split failed"); + } + auto pool = paimon::GetDefaultPool(); + auto split_result = + paimon::Split::Deserialize(decoded_split.data(), decoded_split.size(), pool); + if (!split_result.ok()) { + return Status::InternalError("paimon-cpp deserialize split failed: {}", + split_result.status().ToString()); + } + *split = std::move(split_result).value(); + return Status::OK(); +} + +std::optional PaimonCppReader::_resolve_table_path() const { + if (_range.__isset.table_format_params && _range.table_format_params.__isset.paimon_params && + _range.table_format_params.paimon_params.__isset.paimon_table && + !_range.table_format_params.paimon_params.paimon_table.empty()) { + return _range.table_format_params.paimon_params.paimon_table; + } + return std::nullopt; +} + +std::vector PaimonCppReader::_build_read_columns() const { + std::vector columns; + columns.reserve(_file_slot_descs.size()); + for (const auto& slot : _file_slot_descs) { + columns.emplace_back(slot->col_name()); + } + return columns; +} + +std::map PaimonCppReader::_build_options() const { + std::map options; + if (_range.__isset.table_format_params && _range.table_format_params.__isset.paimon_params && + _range.table_format_params.paimon_params.__isset.paimon_options) { + options.insert(_range.table_format_params.paimon_params.paimon_options.begin(), + _range.table_format_params.paimon_params.paimon_options.end()); + } + + if (_range_params && _range_params->__isset.properties && !_range_params->properties.empty()) { + for (const auto& kv : _range_params->properties) { + options[kv.first] = kv.second; + } + } else if (_range.__isset.table_format_params && + _range.table_format_params.__isset.paimon_params && + _range.table_format_params.paimon_params.__isset.hadoop_conf) { + for (const auto& kv : _range.table_format_params.paimon_params.hadoop_conf) { + options[kv.first] = kv.second; + } + } + + options[paimon::Options::FILE_SYSTEM] = "doris"; + return options; +} + +#include "common/compile_check_end.h" +} // namespace doris::vectorized diff --git a/be/src/vec/exec/format/table/paimon_cpp_reader.h b/be/src/vec/exec/format/table/paimon_cpp_reader.h new file mode 100644 index 00000000000000..73cd7c360d8f3c --- /dev/null +++ b/be/src/vec/exec/format/table/paimon_cpp_reader.h @@ -0,0 +1,90 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cctz/time_zone.h" +#include "common/status.h" +#include "exec/olap_common.h" +#include "paimon/reader/batch_reader.h" +#include "paimon/table/source/split.h" +#include "vec/exec/format/generic_reader.h" + +namespace paimon { +class TableRead; +} // namespace paimon + +namespace doris { +class RuntimeProfile; +class RuntimeState; +class SlotDescriptor; +} // namespace doris + +namespace doris::vectorized { +#include "common/compile_check_begin.h" + +class Block; + +class PaimonCppReader : public GenericReader { + ENABLE_FACTORY_CREATOR(PaimonCppReader); + +public: + PaimonCppReader(const std::vector& file_slot_descs, RuntimeState* state, + RuntimeProfile* profile, const TFileRangeDesc& range, + const TFileScanRangeParams* range_params); + ~PaimonCppReader() override; + + Status init_reader(); + Status get_next_block(Block* block, size_t* read_rows, bool* eof) override; + Status get_columns(std::unordered_map* name_to_type, + std::unordered_set* missing_cols) override; + Status close() override; + +private: + Status _init_paimon_reader(); + Status _decode_split(std::shared_ptr* split); + // Resolve paimon table root path for schema/manifest lookup. + std::optional _resolve_table_path() const; + std::vector _build_read_columns() const; + std::map _build_options() const; + + const std::vector& _file_slot_descs; + RuntimeState* _state = nullptr; + [[maybe_unused]] RuntimeProfile* _profile = nullptr; + const TFileRangeDesc& _range; + const TFileScanRangeParams* _range_params = nullptr; + + std::shared_ptr _split; + std::unique_ptr _table_read; + std::unique_ptr _batch_reader; + + std::unordered_map _col_name_to_block_idx; + int64_t _remaining_table_level_row_count = -1; + cctz::time_zone _ctzz; +}; + +#include "common/compile_check_end.h" +} // namespace doris::vectorized diff --git a/be/src/vec/exec/format/table/paimon_doris_file_system.cpp b/be/src/vec/exec/format/table/paimon_doris_file_system.cpp new file mode 100644 index 00000000000000..74374b4a84e9c6 --- /dev/null +++ b/be/src/vec/exec/format/table/paimon_doris_file_system.cpp @@ -0,0 +1,643 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "paimon_doris_file_system.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/status.h" +#include "gen_cpp/Types_types.h" +#include "io/file_factory.h" +#include "io/fs/file_reader.h" +#include "io/fs/file_system.h" +#include "io/fs/file_writer.h" +#include "io/fs/local_file_system.h" +#include "paimon/factories/factory.h" +#include "paimon/fs/file_system.h" +#include "paimon/fs/file_system_factory.h" +#include "paimon/result.h" +#include "paimon/status.h" + +namespace paimon { + +struct ParsedUri { + std::string scheme; + std::string authority; +}; + +std::string to_lower(std::string value) { + std::ranges::transform(value, value.begin(), + [](unsigned char c) { return static_cast(std::tolower(c)); }); + return value; +} + +ParsedUri parse_uri(const std::string& path) { + ParsedUri parsed; + size_t scheme_pos = path.find("://"); + size_t delim_len = 3; + if (scheme_pos == std::string::npos) { + scheme_pos = path.find(":/"); + delim_len = 2; + } + if (scheme_pos == std::string::npos || scheme_pos == 0) { + return parsed; + } + parsed.scheme = to_lower(path.substr(0, scheme_pos)); + size_t authority_start = scheme_pos + delim_len; + if (authority_start >= path.size() || path[authority_start] == '/') { + return parsed; + } + size_t next_slash = path.find('/', authority_start); + if (next_slash == std::string::npos) { + parsed.authority = path.substr(authority_start); + } else { + parsed.authority = path.substr(authority_start, next_slash - authority_start); + } + return parsed; +} + +bool is_s3_scheme(const std::string& scheme) { + return scheme == "s3" || scheme == "s3a" || scheme == "s3n" || scheme == "oss" || + scheme == "obs" || scheme == "cos" || scheme == "cosn" || scheme == "gs" || + scheme == "abfs" || scheme == "abfss" || scheme == "wasb" || scheme == "wasbs"; +} + +bool is_hdfs_scheme(const std::string& scheme) { + return scheme == "hdfs" || scheme == "viewfs" || scheme == "local"; +} + +bool is_http_scheme(const std::string& scheme) { + return scheme == "http" || scheme == "https"; +} + +doris::TFileType::type map_scheme_to_file_type(const std::string& scheme) { + if (scheme.empty()) { + return doris::TFileType::FILE_HDFS; + } + if (scheme == "file") { + return doris::TFileType::FILE_LOCAL; + } + if (is_hdfs_scheme(scheme)) { + return doris::TFileType::FILE_HDFS; + } + if (is_s3_scheme(scheme)) { + return doris::TFileType::FILE_S3; + } + if (is_http_scheme(scheme)) { + return doris::TFileType::FILE_HTTP; + } + if (scheme == "ofs" || scheme == "gfs" || scheme == "jfs") { + return doris::TFileType::FILE_BROKER; + } + return doris::TFileType::FILE_HDFS; +} + +std::string replace_scheme(const std::string& path, const std::string& scheme) { + size_t scheme_pos = path.find("://"); + size_t delim_len = 3; + if (scheme_pos == std::string::npos) { + scheme_pos = path.find(":/"); + delim_len = 2; + } + if (scheme_pos == std::string::npos) { + return path; + } + return scheme + "://" + path.substr(scheme_pos + delim_len); +} + +std::string normalize_local_path(const std::string& path) { + if (!path.starts_with("file:")) { + return path; + } + constexpr size_t file_prefix_len = 5; + size_t start = file_prefix_len; + if (path.compare(start, 2, "//") == 0 && path.size() - start > 2) { + size_t next_slash = path.find('/', start + 2); + if (next_slash == std::string::npos) { + return ""; + } + start = next_slash; + } + return path.substr(start); +} + +std::string normalize_path_for_type(const std::string& path, const std::string& scheme, + doris::TFileType::type type) { + if (type == doris::TFileType::FILE_LOCAL) { + return normalize_local_path(path); + } + if (type == doris::TFileType::FILE_S3 && scheme != "s3" && !is_http_scheme(scheme)) { + return replace_scheme(path, "s3"); + } + return path; +} + +std::string build_fs_cache_key(doris::TFileType::type type, const ParsedUri& uri, + const std::string& default_fs_name) { + switch (type) { + case doris::TFileType::FILE_LOCAL: + return "local"; + case doris::TFileType::FILE_S3: + return "s3://" + uri.authority; + case doris::TFileType::FILE_HTTP: + return "http://" + uri.authority; + case doris::TFileType::FILE_BROKER: + return "broker"; + case doris::TFileType::FILE_HDFS: + default: + if (!uri.scheme.empty() || !uri.authority.empty()) { + return uri.scheme + "://" + uri.authority; + } + return default_fs_name; + } +} + +paimon::Status to_paimon_status(const doris::Status& status) { + if (status.ok()) { + return paimon::Status::OK(); + } + switch (status.code()) { + case doris::ErrorCode::NOT_FOUND: + case doris::ErrorCode::DIR_NOT_EXIST: + return paimon::Status::NotExist(status.to_string()); + case doris::ErrorCode::ALREADY_EXIST: + case doris::ErrorCode::FILE_ALREADY_EXIST: + return paimon::Status::Exist(status.to_string()); + case doris::ErrorCode::INVALID_ARGUMENT: + case doris::ErrorCode::INVALID_INPUT_SYNTAX: + return paimon::Status::Invalid(status.to_string()); + case doris::ErrorCode::NOT_IMPLEMENTED_ERROR: + return paimon::Status::NotImplemented(status.to_string()); + default: + return paimon::Status::IOError(status.to_string()); + } +} + +std::string join_path(const std::string& base, const std::string& child) { + if (base.empty()) { + return child; + } + if (base.back() == '/') { + return base + child; + } + return base + "/" + child; +} + +std::string parent_path_no_scheme(const std::string& path) { + if (path.empty()) { + return ""; + } + size_t end = path.size(); + while (end > 1 && path[end - 1] == '/') { + --end; + } + size_t pos = path.rfind('/', end - 1); + if (pos == std::string::npos) { + return ""; + } + if (pos == 0) { + return "/"; + } + return path.substr(0, pos); +} + +std::string parent_path(const std::string& path) { + ParsedUri uri = parse_uri(path); + if (uri.scheme.empty()) { + return parent_path_no_scheme(path); + } + size_t scheme_pos = path.find("://"); + size_t delim_len = 3; + if (scheme_pos == std::string::npos) { + scheme_pos = path.find(":/"); + delim_len = 2; + } + if (scheme_pos == std::string::npos) { + return parent_path_no_scheme(path); + } + size_t start = scheme_pos + delim_len; + size_t slash = path.find('/', start); + if (slash == std::string::npos) { + return ""; + } + std::string path_part = path.substr(slash); + std::string parent_part = parent_path_no_scheme(path_part); + if (parent_part.empty()) { + return ""; + } + std::string prefix = uri.scheme + "://"; + if (!uri.authority.empty()) { + prefix += uri.authority; + } + return prefix + parent_part; +} + +class DorisInputStream : public InputStream { +public: + DorisInputStream(doris::io::FileReaderSPtr reader, std::string path) + : reader_(std::move(reader)), path_(std::move(path)) {} + + Status Seek(int64_t offset, SeekOrigin origin) override { + int64_t target = 0; + if (origin == SeekOrigin::FS_SEEK_SET) { + target = offset; + } else if (origin == SeekOrigin::FS_SEEK_CUR) { + target = position_ + offset; + } else if (origin == SeekOrigin::FS_SEEK_END) { + target = static_cast(reader_->size()) + offset; + } else { + return Status::Invalid("unknown seek origin"); + } + if (target < 0) { + return Status::Invalid("seek position is negative"); + } + position_ = target; + return Status::OK(); + } + + Result GetPos() const override { return position_; } + + Result Read(char* buffer, uint32_t size) override { + size_t bytes_read = 0; + doris::Status status = reader_->read_at(position_, doris::Slice(buffer, size), &bytes_read); + if (!status.ok()) { + return to_paimon_status(status); + } + if (bytes_read != size) { + return Status::IOError("read size ", bytes_read, " != expected ", size); + } + position_ += static_cast(bytes_read); + return static_cast(bytes_read); + } + + Result Read(char* buffer, uint32_t size, uint64_t offset) override { + size_t bytes_read = 0; + doris::Status status = reader_->read_at(offset, doris::Slice(buffer, size), &bytes_read); + if (!status.ok()) { + return to_paimon_status(status); + } + if (bytes_read != size) { + return Status::IOError("read size ", bytes_read, " != expected ", size); + } + return static_cast(bytes_read); + } + + void ReadAsync(char* buffer, uint32_t size, uint64_t offset, + std::function&& callback) override { + Result result = Read(buffer, size, offset); + Status status = Status::OK(); + if (!result.ok()) { + status = result.status(); + } + callback(status); + } + + Result GetUri() const override { return path_; } + + Result Length() const override { return static_cast(reader_->size()); } + + Status Close() override { return to_paimon_status(reader_->close()); } + +private: + doris::io::FileReaderSPtr reader_; + std::string path_; + int64_t position_ = 0; +}; + +class DorisOutputStream : public OutputStream { +public: + DorisOutputStream(doris::io::FileWriterPtr writer, std::string path) + : writer_(std::move(writer)), path_(std::move(path)) {} + + Result Write(const char* buffer, uint32_t size) override { + doris::Status status = writer_->append(doris::Slice(buffer, size)); + if (!status.ok()) { + return to_paimon_status(status); + } + return static_cast(size); + } + + Status Flush() override { return Status::OK(); } + + Result GetPos() const override { + return static_cast(writer_->bytes_appended()); + } + + Result GetUri() const override { return path_; } + + Status Close() override { return to_paimon_status(writer_->close()); } + +private: + doris::io::FileWriterPtr writer_; + std::string path_; +}; + +class DorisBasicFileStatus : public BasicFileStatus { +public: + DorisBasicFileStatus(std::string path, bool is_dir) : path_(std::move(path)), is_dir_(is_dir) {} + + bool IsDir() const override { return is_dir_; } + std::string GetPath() const override { return path_; } + +private: + std::string path_; + bool is_dir_; +}; + +class DorisFileStatus : public FileStatus { +public: + DorisFileStatus(std::string path, bool is_dir, uint64_t length, int64_t mtime) + : path_(std::move(path)), is_dir_(is_dir), length_(length), mtime_(mtime) {} + + uint64_t GetLen() const override { return length_; } + bool IsDir() const override { return is_dir_; } + std::string GetPath() const override { return path_; } + int64_t GetModificationTime() const override { return mtime_; } + +private: + std::string path_; + bool is_dir_; + uint64_t length_; + int64_t mtime_; +}; + +class DorisFileSystem : public FileSystem { +public: + explicit DorisFileSystem(std::map options) + : options_(std::move(options)) { + auto it = options_.find("fs.defaultFS"); + if (it != options_.end()) { + default_fs_name_ = it->second; + } + } + + Result> Open(const std::string& path) const override { + PAIMON_ASSIGN_OR_RAISE(auto resolved, resolve_path(path)); + auto& fs = resolved.first; + auto& normalized_path = resolved.second; + doris::io::FileReaderSPtr reader; + doris::io::FileReaderOptions reader_options = doris::io::FileReaderOptions::DEFAULT; + doris::Status status = fs->open_file(normalized_path, &reader, &reader_options); + if (!status.ok()) { + return to_paimon_status(status); + } + return std::make_unique(std::move(reader), normalized_path); + } + + Result> Create(const std::string& path, + bool overwrite) const override { + PAIMON_ASSIGN_OR_RAISE(auto resolved, resolve_path(path)); + auto& fs = resolved.first; + auto& normalized_path = resolved.second; + if (!overwrite) { + bool exists = false; + doris::Status exists_status = fs->exists(normalized_path, &exists); + if (!exists_status.ok()) { + return to_paimon_status(exists_status); + } + if (exists) { + return Status::Exist("file already exists: ", normalized_path); + } + } + std::string parent = parent_path(normalized_path); + if (!parent.empty()) { + doris::Status mkdir_status = fs->create_directory(parent); + if (!mkdir_status.ok()) { + return to_paimon_status(mkdir_status); + } + } + doris::io::FileWriterPtr writer; + doris::Status status = fs->create_file(normalized_path, &writer); + if (!status.ok()) { + return to_paimon_status(status); + } + return std::make_unique(std::move(writer), normalized_path); + } + + Status Mkdirs(const std::string& path) const override { + PAIMON_ASSIGN_OR_RAISE(auto resolved, resolve_path(path)); + doris::Status status = resolved.first->create_directory(resolved.second); + return to_paimon_status(status); + } + + Status Rename(const std::string& src, const std::string& dst) const override { + PAIMON_ASSIGN_OR_RAISE(auto src_resolved, resolve_path(src)); + PAIMON_ASSIGN_OR_RAISE(auto dst_resolved, resolve_path(dst)); + doris::Status status = src_resolved.first->rename(src_resolved.second, dst_resolved.second); + return to_paimon_status(status); + } + + Status Delete(const std::string& path, bool recursive = true) const override { + PAIMON_ASSIGN_OR_RAISE(auto resolved, resolve_path(path)); + bool exists = false; + doris::Status exists_status = resolved.first->exists(resolved.second, &exists); + if (!exists_status.ok()) { + return to_paimon_status(exists_status); + } + if (!exists) { + return Status::OK(); + } + int64_t size = 0; + doris::Status size_status = resolved.first->file_size(resolved.second, &size); + if (size_status.ok()) { + return to_paimon_status(resolved.first->delete_file(resolved.second)); + } + if (recursive) { + return to_paimon_status(resolved.first->delete_directory(resolved.second)); + } + return to_paimon_status(size_status); + } + + Result> GetFileStatus(const std::string& path) const override { + PAIMON_ASSIGN_OR_RAISE(auto resolved, resolve_path(path)); + bool exists = false; + doris::Status exists_status = resolved.first->exists(resolved.second, &exists); + if (!exists_status.ok()) { + return to_paimon_status(exists_status); + } + if (!exists) { + return Status::NotExist("path not exists: ", resolved.second); + } + int64_t size = 0; + doris::Status size_status = resolved.first->file_size(resolved.second, &size); + if (size_status.ok()) { + return std::make_unique(resolved.second, false, + static_cast(size), 0); + } + std::vector files; + bool list_exists = false; + doris::Status list_status = + resolved.first->list(resolved.second, false, &files, &list_exists); + if (!list_status.ok()) { + return to_paimon_status(list_status); + } + if (!list_exists && files.empty()) { + return Status::NotExist("path not exists: ", resolved.second); + } + return std::make_unique(resolved.second, true, 0, 0); + } + + Status ListDir(const std::string& directory, + std::vector>* status_list) const override { + PAIMON_ASSIGN_OR_RAISE(auto resolved, resolve_path(directory)); + auto file_status = GetFileStatus(directory); + if (file_status.ok() && !file_status.value()->IsDir()) { + return Status::IOError("path is not a directory: ", directory); + } + std::vector files; + bool exists = false; + doris::Status status = resolved.first->list(resolved.second, false, &files, &exists); + if (!status.ok()) { + return to_paimon_status(status); + } + if (!exists) { + return Status::OK(); + } + status_list->reserve(status_list->size() + files.size()); + for (const auto& file : files) { + status_list->emplace_back(std::make_unique( + join_path(resolved.second, file.file_name), !file.is_file)); + } + return Status::OK(); + } + + Status ListFileStatus(const std::string& path, + std::vector>* status_list) const override { + PAIMON_ASSIGN_OR_RAISE(auto resolved, resolve_path(path)); + auto self_status = GetFileStatus(path); + if (!self_status.ok()) { + if (self_status.status().IsNotExist()) { + return Status::OK(); + } + return self_status.status(); + } + if (!self_status.value()->IsDir()) { + status_list->emplace_back(std::move(self_status).value()); + return Status::OK(); + } + std::vector files; + bool exists = false; + doris::Status list_status = resolved.first->list(resolved.second, false, &files, &exists); + if (!list_status.ok()) { + return to_paimon_status(list_status); + } + if (!exists) { + return Status::OK(); + } + status_list->reserve(status_list->size() + files.size()); + for (const auto& file : files) { + uint64_t length = file.is_file ? static_cast(file.file_size) : 0; + status_list->emplace_back(std::make_unique( + join_path(resolved.second, file.file_name), !file.is_file, length, 0)); + } + return Status::OK(); + } + + Result Exists(const std::string& path) const override { + PAIMON_ASSIGN_OR_RAISE(auto resolved, resolve_path(path)); + bool exists = false; + doris::Status status = resolved.first->exists(resolved.second, &exists); + if (!status.ok()) { + return to_paimon_status(status); + } + return exists; + } + +private: + Result> resolve_path( + const std::string& path) const { + auto uri = parse_uri(path); + doris::TFileType::type type = map_scheme_to_file_type(uri.scheme); + std::string normalized_path = normalize_path_for_type(path, uri.scheme, type); + if (type == doris::TFileType::FILE_LOCAL) { + doris::io::FileSystemSPtr fs = doris::io::global_local_filesystem(); + return std::make_pair(std::move(fs), normalized_path); + } + std::string fs_key = build_fs_cache_key(type, uri, default_fs_name_); + { + std::lock_guard lock(fs_lock_); + auto it = fs_cache_.find(fs_key); + if (it != fs_cache_.end()) { + return std::make_pair(it->second, normalized_path); + } + } + doris::io::FSPropertiesRef fs_properties(type); + const std::map* properties = &options_; + std::map properties_override; + if (type == doris::TFileType::FILE_HTTP && !options_.contains("uri") && + !uri.scheme.empty()) { + properties_override = options_; + properties_override["uri"] = uri.scheme + "://" + uri.authority; + properties = &properties_override; + } + fs_properties.properties = properties; + if (!broker_addresses_.empty()) { + fs_properties.broker_addresses = &broker_addresses_; + } + doris::io::FileDescription file_description = { + .path = normalized_path, .file_size = -1, .mtime = 0, .fs_name = default_fs_name_}; + auto fs_result = doris::FileFactory::create_fs(fs_properties, file_description); + if (!fs_result.has_value()) { + return to_paimon_status(fs_result.error()); + } + doris::io::FileSystemSPtr fs = std::move(fs_result).value(); + { + std::lock_guard lock(fs_lock_); + fs_cache_.emplace(std::move(fs_key), fs); + } + return std::make_pair(std::move(fs), std::move(normalized_path)); + } + + std::map options_; + std::vector broker_addresses_; + std::string default_fs_name_; + mutable std::mutex fs_lock_; + mutable std::unordered_map fs_cache_; +}; + +class DorisFileSystemFactory : public FileSystemFactory { +public: + static const char IDENTIFIER[]; + + const char* Identifier() const override { return IDENTIFIER; } + + Result> Create( + const std::string& path, + const std::map& options) const override { + return std::make_unique(options); + } +}; + +const char DorisFileSystemFactory::IDENTIFIER[] = "doris"; + +REGISTER_PAIMON_FACTORY(DorisFileSystemFactory); + +} // namespace paimon + +namespace doris::vectorized { + +void register_paimon_doris_file_system() {} + +} // namespace doris::vectorized diff --git a/be/src/vec/exec/format/table/paimon_doris_file_system.h b/be/src/vec/exec/format/table/paimon_doris_file_system.h new file mode 100644 index 00000000000000..bc47270788ece2 --- /dev/null +++ b/be/src/vec/exec/format/table/paimon_doris_file_system.h @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +namespace doris::vectorized { + +// Force-link helper so the paimon-cpp file system factory registration is kept. +void register_paimon_doris_file_system(); + +} // namespace doris::vectorized diff --git a/be/src/vec/exec/scan/file_scanner.cpp b/be/src/vec/exec/scan/file_scanner.cpp index 414620b0cbe15b..a1866fba19c64d 100644 --- a/be/src/vec/exec/scan/file_scanner.cpp +++ b/be/src/vec/exec/scan/file_scanner.cpp @@ -67,6 +67,7 @@ #include "vec/exec/format/table/iceberg_reader.h" #include "vec/exec/format/table/lakesoul_jni_reader.h" #include "vec/exec/format/table/max_compute_jni_reader.h" +#include "vec/exec/format/table/paimon_cpp_reader.h" #include "vec/exec/format/table/paimon_jni_reader.h" #include "vec/exec/format/table/paimon_reader.h" #include "vec/exec/format/table/remote_doris_reader.h" @@ -997,9 +998,15 @@ Status FileScanner::_get_next_reader() { _cur_reader = std::move(mc_reader); } else if (range.__isset.table_format_params && range.table_format_params.table_format_type == "paimon") { - _cur_reader = PaimonJniReader::create_unique(_file_slot_descs, _state, _profile, - range, _params); - init_status = ((PaimonJniReader*)(_cur_reader.get()))->init_reader(); + if (_state->query_options().enable_paimon_cpp_reader) { + _cur_reader = PaimonCppReader::create_unique(_file_slot_descs, _state, _profile, + range, _params); + init_status = ((PaimonCppReader*)(_cur_reader.get()))->init_reader(); + } else { + _cur_reader = PaimonJniReader::create_unique(_file_slot_descs, _state, _profile, + range, _params); + init_status = ((PaimonJniReader*)(_cur_reader.get()))->init_reader(); + } } else if (range.__isset.table_format_params && range.table_format_params.table_format_type == "hudi") { _cur_reader = HudiJniReader::create_unique(*_params, @@ -1020,8 +1027,9 @@ Status FileScanner::_get_next_reader() { } // Set col_name_to_block_idx for JNI readers to avoid repeated map creation if (_cur_reader) { - static_cast(_cur_reader.get()) - ->set_col_name_to_block_idx(&_src_block_name_to_idx); + if (auto* jni_reader = dynamic_cast(_cur_reader.get())) { + jni_reader->set_col_name_to_block_idx(&_src_block_name_to_idx); + } } break; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 11df0e53eb109c..2254a7d1b6064b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -718,6 +718,7 @@ public class SessionVariable implements Serializable, Writable { public static final String DESCRIBE_EXTEND_VARIANT_COLUMN = "describe_extend_variant_column"; public static final String FORCE_JNI_SCANNER = "force_jni_scanner"; + public static final String ENABLE_PAIMON_CPP_READER = "enable_paimon_cpp_reader"; public static final String ENABLE_COUNT_PUSH_DOWN_FOR_EXTERNAL_TABLE = "enable_count_push_down_for_external_table"; @@ -1254,13 +1255,14 @@ public void checkQuerySlotCount(String slotCnt) { public enum IgnoreSplitType { NONE, IGNORE_JNI, - IGNORE_NATIVE + IGNORE_NATIVE, + IGNORE_PAIMON_CPP } public static final String IGNORE_SPLIT_TYPE = "ignore_split_type"; @VariableMgr.VarAttr(name = IGNORE_SPLIT_TYPE, checker = "checkIgnoreSplitType", - options = {"NONE", "IGNORE_JNI", "IGNORE_NATIVE"}, + options = {"NONE", "IGNORE_JNI", "IGNORE_NATIVE", "IGNORE_PAIMON_CPP"}, description = {"忽略指定类型的 split", "Ignore splits of the specified type"}) public String ignoreSplitType = IgnoreSplitType.NONE.toString(); @@ -2633,6 +2635,11 @@ public boolean isEnableHboNonStrictMatchingMode() { description = {"强制使用 jni 方式读取外表", "Force the use of jni mode to read external table"}) private boolean forceJniScanner = false; + @VariableMgr.VarAttr(name = ENABLE_PAIMON_CPP_READER, + fuzzy = true, + description = {"Paimon 非原生文件读取使用 paimon-cpp", "Use paimon-cpp for non-native Paimon reads"}) + private boolean enablePaimonCppReader = false; + @VariableMgr.VarAttr(name = ENABLE_COUNT_PUSH_DOWN_FOR_EXTERNAL_TABLE, fuzzy = true, description = {"对外表启用 count(*) 下推优化", "enable count(*) pushdown optimization for external table"}) @@ -3373,6 +3380,7 @@ private void setFuzzyForCatalog(Random random) { // jni this.forceJniScanner = random.nextBoolean(); + this.enablePaimonCppReader = random.nextBoolean(); // statistics this.fetchHiveRowCountSync = random.nextBoolean(); @@ -4892,6 +4900,7 @@ public TQueryOptions toThrift() { tResult.setEnableParquetFilterByMinMax(enableParquetFilterByMinMax); tResult.setEnableParquetFilterByBloomFilter(enableParquetFilterByBloomFilter); tResult.setEnableOrcFilterByMinMax(enableOrcFilterByMinMax); + tResult.setEnablePaimonCppReader(enablePaimonCppReader); tResult.setCheckOrcInitSargsSuccess(checkOrcInitSargsSuccess); tResult.setTruncateCharOrVarcharColumns(truncateCharOrVarcharColumns); @@ -5616,6 +5625,10 @@ public boolean isForceJniScanner() { return forceJniScanner; } + public boolean isEnablePaimonCppReader() { + return enablePaimonCppReader; + } + public String getIgnoreSplitType() { return ignoreSplitType; } @@ -5624,7 +5637,8 @@ public void checkIgnoreSplitType(String value) { try { IgnoreSplitType.valueOf(value); } catch (Exception e) { - throw new UnsupportedOperationException("We only support `NONE`, `IGNORE_JNI` and `IGNORE_NATIVE`"); + throw new UnsupportedOperationException( + "We only support `NONE`, `IGNORE_JNI`, `IGNORE_NATIVE` and `IGNORE_PAIMON_CPP`"); } } @@ -5636,6 +5650,10 @@ public void setForceJniScanner(boolean force) { forceJniScanner = force; } + public void setEnablePaimonCppReader(boolean enable) { + enablePaimonCppReader = enable; + } + public boolean isEnableCountPushDownForExternalTable() { return enableCountPushDownForExternalTable; } diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index 5622929166ef5f..a06b8872811acc 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -425,6 +425,8 @@ struct TQueryOptions { // Enable hybrid sorting: dynamically selects between PdqSort and TimSort based on // runtime profiling to choose the most efficient algorithm for the data pattern 183: optional bool enable_use_hybrid_sort = false; + // Use paimon-cpp to read Paimon splits on BE + 184: optional bool enable_paimon_cpp_reader = false; // For cloud, to control if the content would be written into file cache // In write path, to control if the content would be written into file cache. diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh index 948415feaf393e..ca3b1a92c45515 100755 --- a/thirdparty/build-thirdparty.sh +++ b/thirdparty/build-thirdparty.sh @@ -1975,6 +1975,38 @@ build_pugixml() { cp "${TP_SOURCE_DIR}/${PUGIXML_SOURCE}/src/pugiconfig.hpp" "${TP_INSTALL_DIR}/include/" } +# paimon-cpp +build_paimon_cpp() { + check_if_source_exist "${PAIMON_CPP_SOURCE}" + cd "${TP_SOURCE_DIR}/${PAIMON_CPP_SOURCE}" + + rm -rf "${BUILD_DIR}" + mkdir -p "${BUILD_DIR}" + cd "${BUILD_DIR}" + + # Add link directories and libraries for brotli and libunwind + LDFLAGS="-L${TP_LIB_DIR}" \ + CXXFLAGS="-Wno-nontrivial-memcall" \ + "${CMAKE_CMD}" -G "${GENERATOR}" -DCMAKE_POLICY_VERSION_MINIMUM=3.5 \ + -DCMAKE_INSTALL_PREFIX="${TP_INSTALL_DIR}" \ + -DCMAKE_PREFIX_PATH="${TP_INSTALL_DIR}" \ + -DCMAKE_LIBRARY_PATH="${TP_LIB_DIR}" \ + -DPAIMON_BUILD_STATIC=ON \ + -DPAIMON_ENABLE_ORC=OFF \ + -DPAIMON_ENABLE_AVRO=OFF \ + -DPAIMON_ENABLE_LANCE=OFF \ + -DPAIMON_ENABLE_JINDO=OFF \ + -DPAIMON_ENABLE_LUMINA=OFF \ + -DCMAKE_C_COMPILER=gcc \ + -DCMAKE_CXX_COMPILER=g++ \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_EXE_LINKER_FLAGS="-L${TP_LIB_DIR} -lbrotlienc -lbrotlidec -lbrotlicommon -lunwind -llzma" \ + -DCMAKE_SHARED_LINKER_FLAGS="-L${TP_LIB_DIR} -lbrotlienc -lbrotlidec -lbrotlicommon -lunwind -llzma" \ + .. + "${BUILD_SYSTEM}" -j "${PARALLEL}" + "${BUILD_SYSTEM}" install +} + if [[ "${#packages[@]}" -eq 0 ]]; then packages=( jindofs @@ -2048,6 +2080,7 @@ if [[ "${#packages[@]}" -eq 0 ]]; then brotli icu pugixml + paimon_cpp ) if [[ "$(uname -s)" == 'Darwin' ]]; then read -r -a packages <<<"binutils gettext ${packages[*]}" diff --git a/thirdparty/download-thirdparty.sh b/thirdparty/download-thirdparty.sh index 2334b1b9b23011..21caee4eef1190 100755 --- a/thirdparty/download-thirdparty.sh +++ b/thirdparty/download-thirdparty.sh @@ -101,6 +101,18 @@ md5sum_func() { return 0 } +is_git_package() { + local TP_ARCH="$1" + local GIT_URL_VAR="${TP_ARCH}_GIT_URL" + [[ -n "${!GIT_URL_VAR}" ]] +} + +git_url_for() { + local TP_ARCH="$1" + local GIT_URL_VAR="${TP_ARCH}_GIT_URL" + echo "${!GIT_URL_VAR}" +} + # return 0 if download succeed. # return 1 if not. download_func() { @@ -157,6 +169,10 @@ download_func() { # download thirdparty archives echo "===== Downloading thirdparty archives..." for TP_ARCH in "${TP_ARCHIVES[@]}"; do + if is_git_package "${TP_ARCH}"; then + echo "Skip downloading ${TP_ARCH} (git repo: $(git_url_for "${TP_ARCH}"))" + continue + fi NAME="${TP_ARCH}_NAME" MD5SUM="${TP_ARCH}_MD5SUM" if [[ -z "${REPOSITORY_URL}" ]]; then @@ -182,6 +198,9 @@ echo "===== Downloading thirdparty archives...done" # check if all tp archives exists echo "===== Checking all thirdpart archives..." for TP_ARCH in "${TP_ARCHIVES[@]}"; do + if is_git_package "${TP_ARCH}"; then + continue + fi NAME="${TP_ARCH}_NAME" if [[ ! -r "${TP_SOURCE_DIR}/${!NAME}" ]]; then echo "Failed to fetch ${!NAME}" @@ -199,6 +218,9 @@ SUFFIX_XZ="\.tar\.xz$" SUFFIX_ZIP="\.zip$" SUFFIX_BZ2="\.tar\.bz2$" for TP_ARCH in "${TP_ARCHIVES[@]}"; do + if is_git_package "${TP_ARCH}"; then + continue + fi NAME="${TP_ARCH}_NAME" SOURCE="${TP_ARCH}_SOURCE" @@ -238,6 +260,57 @@ for TP_ARCH in "${TP_ARCHIVES[@]}"; do done echo "===== Unpacking all thirdparty archives...done" +# Clone and checkout git repositories +echo "===== Cloning git repositories..." +for TP_ARCH in "${TP_ARCHIVES[@]}"; do + if ! is_git_package "${TP_ARCH}"; then + continue + fi + + GIT_URL_VAR="${TP_ARCH}_GIT_URL" + GIT_TAG_VAR="${TP_ARCH}_GIT_TAG" + SOURCE_VAR="${TP_ARCH}_SOURCE" + + GIT_URL="${!GIT_URL_VAR}" + GIT_TAG="${!GIT_TAG_VAR}" + SOURCE_DIR="${TP_SOURCE_DIR}/${!SOURCE_VAR}" + + if [[ -z "${GIT_URL}" ]] || [[ -z "${GIT_TAG}" ]] || [[ -z "${!SOURCE_VAR}" ]]; then + echo "Warning: ${TP_ARCH} git configuration incomplete, skipping" + continue + fi + + if [[ ! -d "${SOURCE_DIR}" ]]; then + echo "Cloning ${TP_ARCH} from ${GIT_URL}..." + cd "${TP_SOURCE_DIR}" + if ! git clone "${GIT_URL}" "${!SOURCE_VAR}"; then + echo "Failed to clone ${TP_ARCH}" + exit 1 + fi + else + echo "${TP_ARCH} repository already exists, updating..." + cd "${SOURCE_DIR}" + git fetch origin || true + fi + + cd "${SOURCE_DIR}" + if ! git checkout "${GIT_TAG}" 2>/dev/null; then + echo "Tag ${GIT_TAG} not found, trying to fetch..." + is_shallow="$(git rev-parse --is-shallow-repository 2>/dev/null || echo false)" + if [[ "${is_shallow}" == "true" ]]; then + git fetch --unshallow origin || git fetch --depth=2147483647 origin + else + git fetch origin + fi + if ! git checkout "${GIT_TAG}"; then + echo "Failed to checkout ${GIT_TAG} for ${TP_ARCH}" + exit 1 + fi + fi + echo "Successfully checked out ${GIT_TAG} for ${TP_ARCH}" +done +echo "===== Cloning git repositories...done" + echo "===== Patching thirdparty archives..." ################################################################################### diff --git a/thirdparty/vars.sh b/thirdparty/vars.sh index 980a2c5fc92002..3084270edb1b47 100644 --- a/thirdparty/vars.sh +++ b/thirdparty/vars.sh @@ -559,6 +559,15 @@ PUGIXML_NAME=pugixml-1.15.tar.gz PUGIXML_SOURCE=pugixml-1.15 PUGIXML_MD5SUM="3b894c29455eb33a40b165c6e2de5895" +# paimon-cpp +# Using git clone since there's no official release yet +# We'll use a specific commit or tag for reproducibility +PAIMON_CPP_GIT_URL="https://github.com/alibaba/paimon-cpp.git" +PAIMON_CPP_GIT_TAG="43d9d0271b7416fa85f1674586efa94f384a6b5c" +PAIMON_CPP_NAME=paimon-cpp +PAIMON_CPP_SOURCE=paimon-cpp +PAIMON_CPP_MD5SUM="" # Not applicable for git repos + # all thirdparties which need to be downloaded is set in array TP_ARCHIVES export TP_ARCHIVES=( 'LIBEVENT' @@ -642,6 +651,7 @@ export TP_ARCHIVES=( 'ICU' 'JINDOFS' 'PUGIXML' + 'PAIMON_CPP' ) if [[ "$(uname -s)" == 'Darwin' ]]; then