2424#include < duckdb.hpp>
2525#ifndef DUCKDB_AMALGAMATION
2626# include < duckdb.h>
27- # include < duckdb/common/arrow_wrapper.hpp>
27+ # include < duckdb/common/arrow/arrow_wrapper.hpp>
28+ # include < duckdb/function/table/arrow.hpp>
2829# include < duckdb/function/table_function.hpp>
2930# include < duckdb/main/connection.hpp>
3031# include < duckdb/planner/filter/conjunction_filter.hpp>
@@ -178,28 +179,26 @@ namespace {
178179
179180 arrow::Result<std::unique_ptr<duckdb::ArrowArrayStreamWrapper>>
180181 arrow_table_produce_internal (uintptr_t data,
181- std::pair<
182- std::unordered_map<idx_t , std::string>,
183- std::vector<std::string>
184- > &project_columns,
185- duckdb::TableFilterCollection *filters)
182+ duckdb::ArrowStreamParameters ¶meters)
186183 {
187184 auto garrow_table = GARROW_TABLE (reinterpret_cast <gpointer>(data));
188185 auto arrow_table = garrow_table_get_raw (garrow_table);
189186 auto dataset =
190187 std::make_shared<arrow::dataset::InMemoryDataset>(arrow_table);
191188 ARROW_ASSIGN_OR_RAISE (auto scanner_builder, dataset->NewScan ());
192189 bool have_filter =
193- filters &&
194- filters->table_filters &&
195- !filters->table_filters ->filters .empty ();
190+ parameters.filters &&
191+ !parameters.filters ->filters .empty ();
196192 if (have_filter) {
197193 ARROW_RETURN_NOT_OK (
198- scanner_builder->Filter (convert_filters (filters->table_filters ->filters ,
199- project_columns.first )));
194+ scanner_builder->Filter (
195+ convert_filters (parameters.filters ->filters ,
196+ parameters.projected_columns .projection_map )));
200197 }
201- if (!project_columns.second .empty ()) {
202- ARROW_RETURN_NOT_OK (scanner_builder->Project (project_columns.second ));
198+ if (!parameters.projected_columns .columns .empty ()) {
199+ ARROW_RETURN_NOT_OK (
200+ scanner_builder->Project (
201+ parameters.projected_columns .columns ));
203202 }
204203 ARROW_ASSIGN_OR_RAISE (auto scanner, scanner_builder->Finish ());
205204 ARROW_ASSIGN_OR_RAISE (auto reader, scanner->ToRecordBatchReader ());
@@ -212,21 +211,25 @@ namespace {
212211
213212 std::unique_ptr<duckdb::ArrowArrayStreamWrapper>
214213 arrow_table_produce (uintptr_t data,
215- std::pair<
216- std::unordered_map<idx_t , std::string>,
217- std::vector<std::string>
218- > &project_columns,
219- duckdb::TableFilterCollection *filters)
214+ duckdb::ArrowStreamParameters ¶meters)
220215 {
221- auto stream_wrapper_result =
222- arrow_table_produce_internal (data, project_columns, filters);
216+ auto stream_wrapper_result = arrow_table_produce_internal (data, parameters);
223217 if (!stream_wrapper_result.ok ()) {
224218 throw std::runtime_error (
225219 std::string (" [arrow][produce] failed to produce: " ) +
226220 stream_wrapper_result.status ().ToString ());
227221 }
228222 return std::move (*stream_wrapper_result);
229223 }
224+
225+ void
226+ arrow_table_get_schema (uintptr_t data, duckdb::ArrowSchemaWrapper &schema)
227+ {
228+ auto garrow_table = GARROW_TABLE (reinterpret_cast <gpointer>(data));
229+ auto arrow_table = garrow_table_get_raw (garrow_table);
230+ arrow::ExportSchema (*(arrow_table->schema ()),
231+ reinterpret_cast <ArrowSchema *>(&schema));
232+ }
230233}
231234
232235namespace arrow_duckdb {
@@ -245,14 +248,13 @@ namespace arrow_duckdb {
245248 {
246249 auto c_name = StringValueCStr (name);
247250 auto garrow_table = RVAL2GOBJ (arrow_table);
248- const idx_t rows_per_tuple = 1000000 ;
249251 reinterpret_cast <duckdb::Connection *>(connection)
250252 ->TableFunction (
251253 " arrow_scan" ,
252254 {
253255 duckdb::Value::POINTER (reinterpret_cast <uintptr_t >(garrow_table)),
254256 duckdb::Value::POINTER (reinterpret_cast <uintptr_t >(arrow_table_produce)),
255- duckdb::Value::UBIGINT (rows_per_tuple)
257+ duckdb::Value::POINTER ( reinterpret_cast < uintptr_t >(arrow_table_get_schema)),
256258 })
257259 ->CreateView (c_name, true , true );
258260 }
0 commit comments