|
1 | 1 | #pragma once |
| 2 | +#define ENABLE_DUCKDB_FFI |
2 | 3 |
|
3 | 4 | #include "duckdb.hpp" |
| 5 | +#include "duckdb/common/unique_ptr.hpp" |
| 6 | + |
4 | 7 | #include "vortex.hpp" |
5 | 8 | #include "vortex_error.hpp" |
| 9 | +#include "vortex_session.hpp" |
| 10 | + |
| 11 | +namespace vortex { |
| 12 | + |
| 13 | +struct DType { |
| 14 | + explicit DType(vx_dtype *dtype) : dtype(dtype) { |
| 15 | + } |
| 16 | + |
| 17 | + static duckdb::unique_ptr<DType> FromDuckDBTable(const std::vector<duckdb_logical_type> &column_types, |
| 18 | + const std::vector<unsigned char> &column_nullable, |
| 19 | + const std::vector<const char *> &column_names) { |
| 20 | + D_ASSERT(column_names.size() == column_nullable.size()); |
| 21 | + D_ASSERT(column_names.size() == column_types.size()); |
6 | 22 |
|
7 | | -#include <duckdb/common/unique_ptr.hpp> |
| 23 | + auto dtype = Try([&](auto err) { |
| 24 | + return vx_duckdb_logical_type_to_dtype(column_types.data(), column_nullable.data(), column_names.data(), |
| 25 | + column_names.size(), err); |
| 26 | + }); |
8 | 27 |
|
9 | | -struct VortexConversionCache { |
10 | | - explicit VortexConversionCache(const unsigned long cache_id) : cache(vx_conversion_cache_create(cache_id)) { |
| 28 | + return duckdb::make_uniq<DType>(dtype); |
11 | 29 | } |
12 | 30 |
|
13 | | - ~VortexConversionCache() { |
| 31 | + ~DType() { |
| 32 | + if (dtype != nullptr) { |
| 33 | + vx_dtype_free(dtype); |
| 34 | + } |
| 35 | + } |
| 36 | + |
| 37 | + vx_dtype *dtype; |
| 38 | +}; |
| 39 | + |
| 40 | +struct ConversionCache { |
| 41 | + explicit ConversionCache(const unsigned long cache_id) : cache(vx_conversion_cache_create(cache_id)) { |
| 42 | + } |
| 43 | + |
| 44 | + ~ConversionCache() { |
14 | 45 | vx_conversion_cache_free(cache); |
15 | 46 | } |
16 | 47 |
|
17 | 48 | vx_conversion_cache *cache; |
18 | 49 | }; |
19 | 50 |
|
20 | | -struct VortexFileReader { |
21 | | - explicit VortexFileReader(vx_file_reader *file) : file(file) { |
| 51 | +struct FileReader { |
| 52 | + explicit FileReader(vx_file_reader *file) : file(file) { |
22 | 53 | } |
23 | 54 |
|
24 | | - ~VortexFileReader() { |
| 55 | + ~FileReader() { |
25 | 56 | vx_file_reader_free(file); |
26 | 57 | } |
27 | 58 |
|
28 | | - static duckdb::unique_ptr<VortexFileReader> Open(const vx_file_open_options *options) { |
29 | | - vx_error *error; |
30 | | - auto file = vx_file_open_reader(options, &error); |
31 | | - HandleError(error); |
32 | | - return duckdb::make_uniq<VortexFileReader>(file); |
| 59 | + static duckdb::unique_ptr<FileReader> Open(const vx_file_open_options *options, VortexSession &session) { |
| 60 | + auto file = Try([&](auto err) { return vx_file_open_reader(options, session.session, err); }); |
| 61 | + return duckdb::make_uniq<FileReader>(file); |
| 62 | + } |
| 63 | + |
| 64 | + vx_array_iterator *Scan(const vx_file_scan_options *options) { |
| 65 | + return Try([&](auto err) { return vx_file_reader_scan(this->file, options, err); }); |
| 66 | + } |
| 67 | + |
| 68 | + bool CanPrune(const char *filter_expression, unsigned int filter_expression_len) { |
| 69 | + return Try([&](auto err) { |
| 70 | + return vx_file_reader_can_prune(this->file, filter_expression, filter_expression_len, err); |
| 71 | + }); |
| 72 | + } |
| 73 | + |
| 74 | + uint64_t FileRowCount() { |
| 75 | + return Try([&](auto err) { return vx_file_row_count(file, err); }); |
| 76 | + } |
| 77 | + |
| 78 | + struct DType DType() { |
| 79 | + return vortex::DType(vx_file_dtype(file)); |
33 | 80 | } |
34 | 81 |
|
35 | 82 | vx_file_reader *file; |
36 | 83 | }; |
37 | 84 |
|
38 | | -struct VortexArray { |
39 | | - explicit VortexArray(vx_array *array) : array(array) { |
| 85 | +struct Array { |
| 86 | + explicit Array(vx_array *array) : array(array) { |
40 | 87 | } |
41 | 88 |
|
42 | | - ~VortexArray() { |
| 89 | + ~Array() { |
43 | 90 | if (array != nullptr) { |
44 | 91 | vx_array_free(array); |
45 | 92 | } |
46 | 93 | } |
47 | 94 |
|
48 | | - idx_t ToDuckDBVector(idx_t current_row, duckdb_data_chunk output, const VortexConversionCache *cache) const { |
49 | | - vx_error *error; |
50 | | - auto idx = vx_array_to_duckdb_chunk(array, current_row, output, cache->cache, &error); |
51 | | - HandleError(error); |
52 | | - return idx; |
| 95 | + static duckdb::unique_ptr<Array> FromDuckDBChunk(DType &dtype, duckdb::DataChunk &chunk) { |
| 96 | + auto array = Try([&](auto err) { |
| 97 | + return vx_duckdb_chunk_to_array(reinterpret_cast<duckdb_data_chunk>(&chunk), dtype.dtype, err); |
| 98 | + }); |
| 99 | + |
| 100 | + return duckdb::make_uniq<Array>(array); |
| 101 | + } |
| 102 | + |
| 103 | + idx_t ToDuckDBVector(idx_t current_row, duckdb_data_chunk output, const ConversionCache *cache) const { |
| 104 | + return Try([&](auto err) { return vx_array_to_duckdb_chunk(array, current_row, output, cache->cache, err); }); |
53 | 105 | } |
54 | 106 |
|
55 | 107 | vx_array *array; |
56 | 108 | }; |
57 | 109 |
|
58 | | -struct VortexArrayStream { |
59 | | - explicit VortexArrayStream(vx_array_stream *array_stream) : array_stream(array_stream) { |
| 110 | +struct ArrayIterator { |
| 111 | + explicit ArrayIterator(vx_array_iterator *array_iter) : array_iter(array_iter) { |
60 | 112 | } |
61 | 113 |
|
62 | | - ~VortexArrayStream() { |
63 | | - vx_array_stream_free(array_stream); |
| 114 | + ~ArrayIterator() { |
| 115 | + vx_array_iter_free(array_iter); |
64 | 116 | } |
65 | 117 |
|
66 | | - duckdb::unique_ptr<VortexArray> NextArray() const { |
67 | | - vx_error *error; |
68 | | - auto array = vx_array_stream_next(array_stream, &error); |
69 | | - HandleError(error); |
| 118 | + duckdb::unique_ptr<Array> NextArray() const { |
| 119 | + auto array = Try([&](auto err) { return vx_array_iter_next(array_iter, err); }); |
| 120 | + |
70 | 121 | if (array == nullptr) { |
71 | 122 | return nullptr; |
72 | 123 | } |
73 | | - return duckdb::make_uniq<VortexArray>(array); |
| 124 | + |
| 125 | + return duckdb::make_uniq<Array>(array); |
74 | 126 | } |
75 | 127 |
|
76 | | - vx_array_stream *array_stream; |
| 128 | + vx_array_iterator *array_iter; |
77 | 129 | }; |
| 130 | + |
| 131 | +struct ArrayStreamSink { |
| 132 | + explicit ArrayStreamSink(vx_array_sink *sink, duckdb::unique_ptr<DType> dtype) |
| 133 | + : sink(sink), dtype(std::move(dtype)) { |
| 134 | + } |
| 135 | + |
| 136 | + static duckdb::unique_ptr<ArrayStreamSink> Create(std::string file_path, duckdb::unique_ptr<DType> &&dtype) { |
| 137 | + auto sink = Try([&](auto err) { return vx_array_sink_open_file(file_path.c_str(), dtype->dtype, err); }); |
| 138 | + return duckdb::make_uniq<ArrayStreamSink>(sink, std::move(dtype)); |
| 139 | + } |
| 140 | + |
| 141 | + void PushChunk(duckdb::DataChunk &chunk) { |
| 142 | + auto array = Array::FromDuckDBChunk(*dtype, chunk); |
| 143 | + Try([&](auto err) { vx_array_sink_push(sink, array->array, err); }); |
| 144 | + } |
| 145 | + |
| 146 | + void Close() { |
| 147 | + Try([&](auto err) { vx_array_sink_close(sink, err); }); |
| 148 | + this->sink = nullptr; |
| 149 | + } |
| 150 | + |
| 151 | + ~ArrayStreamSink() { |
| 152 | + // "should dctor a sink, before closing it |
| 153 | + // If you throw during writes then the stack will be unwound and the destructor is going to be called before the |
| 154 | + // close method is invoked thus triggering following assertion failure and will clobber the exception printing |
| 155 | + // D_ASSERT(sink == nullptr); |
| 156 | + } |
| 157 | + |
| 158 | + vx_array_sink *sink; |
| 159 | + duckdb::unique_ptr<DType> dtype; |
| 160 | +}; |
| 161 | + |
| 162 | +} // namespace vortex |
0 commit comments