Skip to content

Commit 3b006e9

Browse files
authored
Merge branch 'alibaba:main' into main
2 parents 5dbc107 + 670a294 commit 3b006e9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+583
-299
lines changed
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# Copyright 2025-present Alibaba Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
name: Build Release
16+
17+
on:
18+
push:
19+
branches:
20+
- '**'
21+
tags:
22+
- '**'
23+
pull_request:
24+
25+
concurrency:
26+
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
27+
cancel-in-progress: true
28+
29+
permissions:
30+
contents: read
31+
32+
jobs:
33+
clang-release:
34+
runs-on: ubuntu-24.04
35+
timeout-minutes: 120
36+
strategy:
37+
fail-fast: false
38+
steps:
39+
- name: Checkout paimon-cpp
40+
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
41+
with:
42+
lfs: true
43+
- name: Build Paimon
44+
shell: bash
45+
env:
46+
CC: clang
47+
CXX: clang++
48+
run: ci/scripts/build_paimon.sh $(pwd) false false Release
49+
gcc-release:
50+
runs-on: ubuntu-24.04
51+
timeout-minutes: 120
52+
strategy:
53+
fail-fast: false
54+
steps:
55+
- name: Checkout paimon-cpp
56+
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
57+
with:
58+
lfs: true
59+
- name: Build Paimon
60+
shell: bash
61+
env:
62+
CC: gcc-14
63+
CXX: g++-14
64+
run: ci/scripts/build_paimon.sh $(pwd) false false Release

CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,6 @@ if(PAIMON_BUILD_TESTS)
319319
-L
320320
unittest
321321
--output-on-failure)
322-
add_compile_options(-fno-access-control)
323322
add_dependencies(unittest paimon-tests)
324323

325324
include_directories(SYSTEM ${GTEST_INCLUDE_DIR})

ci/scripts/build_paimon.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,15 @@ set -eux
1919
source_dir=${1}
2020
enable_sanitizer=${2:-false}
2121
check_clang_tidy=${3:-false}
22+
build_type=${4:-Debug}
2223
build_dir=${1}/build
2324

2425
mkdir ${build_dir}
2526
pushd ${build_dir}
2627

2728
CMAKE_ARGS=(
2829
"-G Ninja"
29-
"-DCMAKE_BUILD_TYPE=Debug"
30+
"-DCMAKE_BUILD_TYPE=${build_type}"
3031
"-DPAIMON_BUILD_TESTS=ON"
3132
"-DPAIMON_ENABLE_LANCE=ON"
3233
"-DPAIMON_ENABLE_JINDO=ON"

cmake_modules/BuildUtils.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,7 @@ function(add_test_case REL_TEST_NAME)
256256
if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
257257
target_compile_options(${TEST_NAME} PRIVATE -Wno-global-constructors)
258258
endif()
259+
target_compile_options(${TEST_NAME} PRIVATE -fno-access-control)
259260

260261
add_test(${TEST_NAME}
261262
${BUILD_SUPPORT_DIR}/run-test.sh

cmake_modules/ThirdpartyToolchain.cmake

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,10 @@ macro(build_avro)
548548

549549
get_target_property(AVRO_ZSTD_INCLUDE_DIR zstd INTERFACE_INCLUDE_DIRECTORIES)
550550
get_filename_component(AVRO_ZSTD_ROOT "${AVRO_ZSTD_INCLUDE_DIR}" DIRECTORY)
551+
552+
get_target_property(AVRO_ZLIB_INCLUDE_DIR zlib INTERFACE_INCLUDE_DIRECTORIES)
553+
get_filename_component(AVRO_ZLIB_ROOT "${AVRO_ZLIB_INCLUDE_DIR}" DIRECTORY)
554+
551555
set(AVRO_CMAKE_CXX_FLAGS "${EP_CXX_FLAGS} -Wno-error")
552556
set(AVRO_CMAKE_C_FLAGS "${EP_C_FLAGS} -Wno-error")
553557

@@ -558,7 +562,7 @@ macro(build_avro)
558562
"-DCMAKE_C_FLAGS=${AVRO_CMAKE_C_FLAGS}"
559563
"-DAVRO_BUILD_TESTS=OFF"
560564
"-DAVRO_BUILD_EXECUTABLES=OFF"
561-
"-DZLIB_ROOT=${THIRDPARTY_ZLIB_ROOT}"
565+
"-DZLIB_ROOT=${AVRO_ZLIB_ROOT}"
562566
"-Dzstd_ROOT=${AVRO_ZSTD_ROOT}"
563567
"-DSnappy_ROOT=${AVRO_SNAPPY_ROOT}")
564568
externalproject_add(avro_ep

include/paimon/global_index/bitmap_global_index_result.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
#include "paimon/visibility.h"
2727

2828
namespace paimon {
29-
/// Represents a global index query result that **lazily materializes** its matching row IDs as a
29+
/// Represents a global index query result that **lazily materializes** its matching row ids as a
3030
/// Roaring bitmap. The underlying 64-bit Roaring bitmap is **not constructed during object
3131
/// creation**; instead, it is built on-demand the first time GetBitmap() is called. This design
3232
/// avoids unnecessary computation and memory allocation when the bitmap is not needed (e.g., during
@@ -67,6 +67,8 @@ class PAIMON_EXPORT BitmapGlobalIndexResult : public GlobalIndexResult {
6767

6868
Result<bool> IsEmpty() const override;
6969

70+
Result<std::shared_ptr<GlobalIndexResult>> AddOffset(int64_t offset) override;
71+
7072
std::string ToString() const override;
7173

7274
/// @return A non-owning, const pointer to the bitmap. The returned pointer is valid as long as

include/paimon/global_index/bitmap_topk_global_index_result.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,12 @@
2626
#include "paimon/visibility.h"
2727

2828
namespace paimon {
29-
/// Represents a Top-K global index result that combines a Roaring bitmap of candidate row IDs
29+
/// Represents a Top-K global index result that combines a Roaring bitmap of candidate row ids
3030
/// with an array of associated relevance scores.
3131
///
3232
/// **Important Ordering Note**: Despite inheriting from TopKGlobalIndexResult, the results are
3333
/// **NOT sorted by score**. Instead, both the bitmap and the score vector are ordered by
34-
/// **ascending row ID**. This design enables efficient merging and set operations while preserving
34+
/// **ascending row id**. This design enables efficient merging and set operations while preserving
3535
/// row id-to-score mapping.
3636
class PAIMON_EXPORT BitmapTopKGlobalIndexResult : public TopKGlobalIndexResult {
3737
public:
@@ -74,16 +74,18 @@ class PAIMON_EXPORT BitmapTopKGlobalIndexResult : public TopKGlobalIndexResult {
7474
Result<std::shared_ptr<GlobalIndexResult>> Or(
7575
const std::shared_ptr<GlobalIndexResult>& other) override;
7676

77+
Result<std::shared_ptr<GlobalIndexResult>> AddOffset(int64_t offset) override;
78+
7779
Result<bool> IsEmpty() const override;
7880

7981
std::string ToString() const override;
8082

81-
/// @return A non-owning, const pointer to the bitmap. The row IDs in the bitmap are stored in
83+
/// @return A non-owning, const pointer to the bitmap. The row ids in the bitmap are stored in
8284
/// ascending order (as guaranteed by Roaring64 iteration).
8385
Result<const RoaringBitmap64*> GetBitmap() const;
8486

8587
/// @return A const reference to a vector of float scores, where the i-th element corresponds to
86-
/// the i-th row ID when iterating the bitmap in **ascending row ID order**.
88+
/// the i-th row id when iterating the bitmap in **ascending row id order**.
8789
const std::vector<float>& GetScores() const;
8890

8991
private:

include/paimon/global_index/global_index_io_meta.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,17 @@
2525
namespace paimon {
2626
/// Metadata describing a single file entry in a global index.
2727
struct PAIMON_EXPORT GlobalIndexIOMeta {
28-
GlobalIndexIOMeta(const std::string& _file_name, int64_t _file_size, const Range& _row_id_range,
28+
GlobalIndexIOMeta(const std::string& _file_name, int64_t _file_size, int64_t _range_end,
2929
const std::shared_ptr<Bytes>& _metadata)
3030
: file_name(_file_name),
3131
file_size(_file_size),
32-
row_id_range(_row_id_range),
32+
range_end(_range_end),
3333
metadata(_metadata) {}
3434

3535
std::string file_name;
3636
int64_t file_size;
37-
/// The inclusive range of row IDs covered by this file (i.e., [from, to]).
38-
Range row_id_range;
37+
/// The inclusive range end covered by this file (i.e., the last local row id).
38+
int64_t range_end;
3939
/// Optional binary metadata associated with the file, such as serialized
4040
/// secondary index structures or inline index bytes.
4141
/// May be null if no additional metadata is available.

include/paimon/global_index/global_index_reader.h

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,18 +26,20 @@
2626

2727
namespace paimon {
2828
/// Reads and evaluates filter predicates against a global file index.
29-
/// `GlobalIndexReader` is an implementation of the `FunctionVisitor` interface
30-
/// specialized to produce `std::shared_ptr<GlobalIndexResult>` objects.
3129
///
3230
/// Derived classes are expected to implement the visitor methods (e.g., `VisitEqual`,
3331
/// `VisitIsNull`, etc.) to return index-based results that indicate which
3432
/// row satisfy the given predicate.
33+
///
34+
/// @note All `GlobalIndexResult` objects returned by implementations of this class use **local row
35+
/// ids** that start from 0 — not global row ids in the entire table.
36+
/// The `GlobalIndexResult` can be converted to global row ids by calling `AddOffset()`.
3537
class PAIMON_EXPORT GlobalIndexReader : public FunctionVisitor<std::shared_ptr<GlobalIndexResult>> {
3638
public:
3739
/// TopKPreFilter: A lightweight pre-filtering function applied **before** similarity scoring.
38-
/// It operates solely on row_id and is typically driven by other global index, such as bitmap,
39-
/// or range index. This filter enables early pruning of irrelevant candidates (e.g., "only
40-
/// consider rows with label X"), significantly reducing the search space. Returns true to
40+
/// It operates solely on **local row ids** and is typically driven by other global index, such
41+
/// as bitmap, or range index. This filter enables early pruning of irrelevant candidates (e.g.,
42+
/// "only consider rows with label X"), significantly reducing the search space. Returns true to
4143
/// include the row in Top-K computation; false to exclude it.
4244
///
4345
/// @note Must be thread-safe.
@@ -47,7 +49,8 @@ class PAIMON_EXPORT GlobalIndexReader : public FunctionVisitor<std::shared_ptr<G
4749
///
4850
/// @param k Number of top results to return.
4951
/// @param query The query vector (must match the dimensionality of the indexed vectors).
50-
/// @param filter A pre-filter based on row_id, implemented by leveraging other global index
52+
/// @param filter A pre-filter based on **local row ids**, implemented by leveraging other
53+
/// global index
5154
/// structures (e.g., bitmap index) for efficient candidate pruning.
5255
/// @param predicate A runtime filtering condition that may involve graph traversal of
5356
/// structured attributes. **Using this parameter often yields better
@@ -58,7 +61,8 @@ class PAIMON_EXPORT GlobalIndexReader : public FunctionVisitor<std::shared_ptr<G
5861
/// context-aware filtering at query time.
5962
/// @note All fields referenced in the predicate must have been materialized
6063
/// in the index during build to ensure availability.
61-
/// @note `VisitTopK` is thread-safe while other `VisitXXX` is not.
64+
/// @note `VisitTopK` is thread-safe (not coroutine-safe) while other `VisitXXX` is not
65+
/// thread-safe.
6266
virtual Result<std::shared_ptr<TopKGlobalIndexResult>> VisitTopK(
6367
int32_t k, const std::vector<float>& query, TopKPreFilter filter,
6468
const std::shared_ptr<Predicate>& predicate) = 0;

include/paimon/global_index/global_index_result.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class PAIMON_EXPORT GlobalIndexResult : public std::enable_shared_from_this<Glob
4444
virtual int64_t Next() = 0;
4545
};
4646

47-
/// Checks whether the global index result contains no matching row IDs.
47+
/// Checks whether the global index result contains no matching row ids.
4848
///
4949
/// @return A `Result<bool>` where:
5050
/// - `true` indicates the result is empty (no matching rows),
@@ -67,6 +67,10 @@ class PAIMON_EXPORT GlobalIndexResult : public std::enable_shared_from_this<Glob
6767
virtual Result<std::shared_ptr<GlobalIndexResult>> Or(
6868
const std::shared_ptr<GlobalIndexResult>& other);
6969

70+
/// Adds the given offset to each row id in current result and returns the new global index
71+
/// result.
72+
virtual Result<std::shared_ptr<GlobalIndexResult>> AddOffset(int64_t offset) = 0;
73+
7074
virtual std::string ToString() const = 0;
7175

7276
/// Serializes a GlobalIndexResult object into a byte array.
@@ -103,7 +107,7 @@ class PAIMON_EXPORT GlobalIndexResult : public std::enable_shared_from_this<Glob
103107
};
104108

105109
/// Represents the result of a Top-K query against a global index.
106-
/// This class encapsulates a set of top-K candidates (row ID + score pairs) and provides
110+
/// This class encapsulates a set of top-K candidates (row id + score pairs) and provides
107111
/// an iterator interface to traverse them.
108112
class PAIMON_EXPORT TopKGlobalIndexResult : public GlobalIndexResult {
109113
public:
@@ -115,7 +119,7 @@ class PAIMON_EXPORT TopKGlobalIndexResult : public GlobalIndexResult {
115119
public:
116120
virtual ~TopKIterator() = default;
117121

118-
/// Checks whether more row IDs are available.
122+
/// Checks whether more row ids are available.
119123
virtual bool HasNext() const = 0;
120124

121125
/// Retrieves the next (row_id, score) pair and advances the iterator.

0 commit comments

Comments
 (0)