Skip to content

Commit 00bc545

Browse files
authored
feat(c++): support passing filter and projection expression in scan (#4224)
I found that direct binding doesn't seem too burdensome compared to using protobuf serde. Performance-wise, using binding avoids the serde cost but requires putting the Rust data structures on the heap using `Box::new`, not sure whether this will be a problem if we have complex nested expr. On the other hand, it will make things easier is if vortex-expr can consume substrait. Scalar, DType, and Expr support are not complete (e.g., decimal/date/time is not included), but you can create DType using `from_arrow` and cast a Scalar from primitive types to another. --------- Signed-off-by: Xinyu Zeng <[email protected]>
1 parent d313582 commit 00bc545

File tree

25 files changed

+971
-299
lines changed

25 files changed

+971
-299
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,7 @@ jobs:
461461
run: |
462462
mkdir -p build
463463
cd build
464-
cmake -DVORTEX_ENABLE_TESTING=ON ..
464+
cmake -DVORTEX_ENABLE_TESTING=ON -DVORTEX_ENABLE_ASAN=ON ..
465465
cmake --build . --parallel $(nproc)
466466
ctest -V
467467
- name: Build and run the example in release mode

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-cxx/CMakeLists.txt

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ endif()
1818
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -Wextra -Wpedantic -Wno-dollar-in-identifier-extension")
1919

2020
option(VORTEX_ENABLE_TESTING "Enable building test binary for vortex-cxx" OFF)
21+
option(VORTEX_ENABLE_ASAN "Enable address sanitizer" OFF)
2122

2223
include(FetchContent)
2324
FetchContent_Declare(
@@ -27,7 +28,7 @@ FetchContent_Declare(
2728
)
2829
FetchContent_MakeAvailable(Corrosion)
2930

30-
set(RUST_SOURCE_FILE lib.rs read.rs write.rs)
31+
set(RUST_SOURCE_FILE lib.rs)
3132

3233
# Import Rust crate using Corrosion
3334
corrosion_import_crate(
@@ -43,11 +44,8 @@ FetchContent_Declare(nanoarrow
4344
URL "https://www.apache.org/dyn/closer.lua?action=download&filename=arrow/apache-arrow-nanoarrow-0.7.0/apache-arrow-nanoarrow-0.7.0.tar.gz")
4445
FetchContent_MakeAvailable(nanoarrow)
4546

46-
# C++ source files
47-
set(CPP_SOURCE_FILE
48-
cpp/src/write_options.cpp
49-
cpp/src/file.cpp
50-
cpp/src/scan.cpp
47+
file(GLOB_RECURSE CPP_SOURCE_FILE CONFIGURE_DEPENDS
48+
"${CMAKE_CURRENT_SOURCE_DIR}/cpp/src/*.cpp"
5149
)
5250

5351
# Public headers
@@ -62,17 +60,21 @@ set(CPP_PRIVATE_INCLUDE_DIRS
6260

6361
# Create the main library combining C++ and Rust code
6462
add_library(vortex STATIC ${CPP_SOURCE_FILE})
65-
target_include_directories(vortex PUBLIC ${CPP_INCLUDE_DIRS})
63+
target_include_directories(vortex PUBLIC ${CPP_INCLUDE_DIRS}
64+
${CMAKE_CURRENT_BINARY_DIR}/corrosion_generated/cxxbridge/vortex_cxx_bridge/include)
6665
target_include_directories(vortex PRIVATE
6766
${CPP_PRIVATE_INCLUDE_DIRS}
68-
${CMAKE_CURRENT_BINARY_DIR}/corrosion_generated/cxxbridge/vortex_cxx_bridge/include
69-
${CMAKE_CURRENT_BINARY_DIR}/corrosion_generated/cxxbridge/vortex_cxx_bridge/src
7067
)
7168
target_link_libraries(vortex
7269
PUBLIC nanoarrow_static
7370
PRIVATE vortex_cxx_bridge
7471
)
7572

73+
if (VORTEX_ENABLE_ASAN)
74+
target_compile_options(vortex PRIVATE -fsanitize=leak,address,undefined -fno-omit-frame-pointer -fno-common -O1)
75+
target_link_options(vortex PRIVATE -fsanitize=leak,address,undefined)
76+
endif()
77+
7678
# Tests
7779
if (VORTEX_ENABLE_TESTING)
7880
FetchContent_Declare(
@@ -96,6 +98,10 @@ if (VORTEX_ENABLE_TESTING)
9698
set(APPLE_LINK_FLAGS "-framework CoreFoundation -framework Security")
9799
endif()
98100
target_link_libraries(vortex_cxx_test PRIVATE vortex_cxx_bridge ${APPLE_LINK_FLAGS})
101+
if (VORTEX_ENABLE_ASAN)
102+
target_compile_options(vortex_cxx_test PRIVATE -fsanitize=leak,address,undefined -fno-omit-frame-pointer -fno-common -O1)
103+
target_link_options(vortex_cxx_test PRIVATE -fsanitize=leak,address,undefined)
104+
endif()
99105
include(GoogleTest)
100106
gtest_discover_tests(vortex_cxx_test)
101107
endif()

vortex-cxx/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,12 @@ workspace = true
2121
crate-type = ["staticlib"]
2222

2323
[dependencies]
24+
anyhow = { workspace = true }
2425
arrow-array = { workspace = true, features = ["ffi"] }
2526
arrow-schema = { workspace = true }
2627
cxx = "1.0"
2728
futures = { workspace = true, features = ["thread-pool"] }
29+
paste = { workspace = true }
2830
prost = { workspace = true }
2931
take_mut = { workspace = true }
3032
tokio = { workspace = true, features = ["rt", "rt-multi-thread", "macros"] }

vortex-cxx/cpp/include/vortex.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,6 @@
66
#include "vortex/file.hpp"
77
#include "vortex/scan.hpp"
88
#include "vortex/write_options.hpp"
9-
#include "vortex/exception.hpp"
9+
#include "vortex/exception.hpp"
10+
#include "vortex/expr.hpp"
11+
#include "vortex/scalar.hpp"
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
#pragma once
5+
6+
#include <string>
7+
#include <nanoarrow/common/inline_types.h>
8+
#include "vortex_cxx_bridge/lib.h"
9+
10+
namespace vortex {
11+
12+
enum class PType : uint8_t {
13+
U8 = 0,
14+
U16,
15+
U32,
16+
U64,
17+
I8,
18+
I16,
19+
I32,
20+
I64,
21+
F16,
22+
F32,
23+
F64,
24+
};
25+
26+
namespace dtype {
27+
class DType {
28+
public:
29+
DType() = delete;
30+
explicit DType(rust::Box<ffi::DType> impl) : impl_(std::move(impl)) {
31+
}
32+
DType(DType &&other) noexcept = default;
33+
DType &operator=(DType &&other) = default;
34+
~DType() = default;
35+
36+
DType(const DType &) = delete;
37+
DType &operator=(const DType &) = delete;
38+
39+
std::string ToString() const;
40+
41+
const rust::Box<ffi::DType> &GetImpl() {
42+
return impl_;
43+
}
44+
45+
private:
46+
rust::Box<ffi::DType> impl_;
47+
};
48+
49+
// Factory functions
50+
DType null();
51+
DType bool_(bool nullable = false);
52+
DType primitive(PType ptype, bool nullable = false);
53+
DType int8(bool nullable = false);
54+
DType int16(bool nullable = false);
55+
DType int32(bool nullable = false);
56+
DType int64(bool nullable = false);
57+
DType uint8(bool nullable = false);
58+
DType uint16(bool nullable = false);
59+
DType uint32(bool nullable = false);
60+
DType uint64(bool nullable = false);
61+
DType float16(bool nullable = false);
62+
DType float32(bool nullable = false);
63+
DType float64(bool nullable = false);
64+
DType decimal(uint8_t precision = 10, int8_t scale = 0, bool nullable = false);
65+
DType utf8(bool nullable = false);
66+
DType binary(bool nullable = false);
67+
/// TODO: Other DTypes are only supported by creating from Arrow for now.
68+
DType from_arrow(struct ArrowSchema &schema, bool non_nullable = false);
69+
} // namespace dtype
70+
71+
} // namespace vortex
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
#pragma once
5+
6+
#include "vortex/scalar.hpp"
7+
#include "vortex_cxx_bridge/lib.h"
8+
#include <vector>
9+
10+
namespace vortex::expr {
11+
using scalar::Scalar;
12+
class Expr {
13+
public:
14+
Expr() = delete;
15+
explicit Expr(rust::Box<ffi::Expr> impl) : impl_(std::move(impl)) {
16+
}
17+
Expr(Expr &&other) noexcept = default;
18+
Expr &operator=(Expr &&other) noexcept = default;
19+
~Expr() = default;
20+
21+
Expr(const Expr &) = delete;
22+
Expr &operator=(const Expr &) = delete;
23+
24+
rust::Box<ffi::Expr> IntoImpl() && {
25+
return std::move(impl_);
26+
}
27+
28+
private:
29+
rust::Box<ffi::Expr> impl_;
30+
};
31+
32+
Expr literal(Scalar scalar);
33+
Expr root();
34+
Expr column(std::string_view name);
35+
Expr get_item(std::string_view field, Expr expr);
36+
Expr not_(Expr expr);
37+
Expr is_null(Expr expr);
38+
Expr eq(Expr lhs, Expr rhs);
39+
Expr not_eq_(Expr lhs, Expr rhs);
40+
Expr gt(Expr lhs, Expr rhs);
41+
Expr gt_eq(Expr lhs, Expr rhs);
42+
Expr lt(Expr lhs, Expr rhs);
43+
Expr lt_eq(Expr lhs, Expr rhs);
44+
Expr and_(Expr lhs, Expr rhs);
45+
Expr or_(Expr lhs, Expr rhs);
46+
Expr checked_add(Expr lhs, Expr rhs);
47+
Expr select(const std::vector<std::string_view> &fields, Expr child);
48+
} // namespace vortex::expr

vortex-cxx/cpp/include/vortex/file.hpp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44
#pragma once
55

66
#include <nanoarrow/common/inline_types.h>
7-
8-
#include <memory>
7+
#include "vortex_cxx_bridge/lib.h"
98

109
namespace vortex {
1110
class ScanBuilder;
@@ -14,9 +13,9 @@ class VortexFile {
1413
public:
1514
static VortexFile Open(const std::string &path);
1615

17-
VortexFile(VortexFile &&other) noexcept;
18-
VortexFile &operator=(VortexFile &&other) noexcept;
19-
~VortexFile();
16+
VortexFile(VortexFile &&other) noexcept = default;
17+
VortexFile &operator=(VortexFile &&other) noexcept = default;
18+
~VortexFile() = default;
2019

2120
VortexFile(const VortexFile &) = delete;
2221
VortexFile &operator=(const VortexFile &) = delete;
@@ -29,10 +28,10 @@ class VortexFile {
2928
ScanBuilder CreateScanBuilder() const;
3029

3130
private:
32-
struct Impl;
33-
explicit VortexFile(std::unique_ptr<Impl> impl);
31+
explicit VortexFile(rust::Box<ffi::VortexFile> impl) : impl_(std::move(impl)) {
32+
}
3433

35-
std::unique_ptr<Impl> impl_;
34+
rust::Box<ffi::VortexFile> impl_;
3635
};
3736

3837
} // namespace vortex
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
#pragma once
5+
6+
#include <string_view>
7+
#include "dtype.hpp"
8+
#include "vortex_cxx_bridge/lib.h"
9+
10+
namespace vortex::scalar {
11+
using dtype::DType;
12+
class Scalar {
13+
public:
14+
Scalar() = delete;
15+
explicit Scalar(rust::Box<ffi::Scalar> impl) : impl_(std::move(impl)) {
16+
}
17+
Scalar(Scalar &&other) noexcept = default;
18+
Scalar &operator=(Scalar &&other) noexcept = default;
19+
~Scalar() = default;
20+
21+
Scalar(const Scalar &) = delete;
22+
Scalar &operator=(const Scalar &) = delete;
23+
24+
rust::Box<ffi::Scalar> IntoImpl() && {
25+
return std::move(impl_);
26+
}
27+
28+
private:
29+
rust::Box<ffi::Scalar> impl_;
30+
};
31+
32+
// Factory functions for creating scalar values
33+
Scalar bool_(bool value);
34+
Scalar int8(int8_t value);
35+
Scalar int16(int16_t value);
36+
Scalar int32(int32_t value);
37+
Scalar int64(int64_t value);
38+
Scalar uint8(uint8_t value);
39+
Scalar uint16(uint16_t value);
40+
Scalar uint32(uint32_t value);
41+
Scalar uint64(uint64_t value);
42+
Scalar float32(float value);
43+
Scalar float64(double value);
44+
Scalar string(std::string_view value);
45+
Scalar binary(const uint8_t *data, size_t length);
46+
/// TODO: Other Scalars are only supported by casting for now.
47+
Scalar cast(Scalar scalar, DType dtype);
48+
} // namespace vortex::scalar

0 commit comments

Comments
 (0)