Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Commit 1a5aace

Browse files
ienkovichalexbaden
authored andcommitted
Introduce ResultSetTableToken::toArrow and use it in PyHDK.
Signed-off-by: ienkovich <[email protected]>
1 parent 5858692 commit 1a5aace

File tree

8 files changed

+75
-12
lines changed

8 files changed

+75
-12
lines changed

omniscidb/ResultSet/ResultSet.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,10 @@ bool ResultSet::hasColNames() const {
468468
return targets_.size() == fields_.size();
469469
}
470470

471+
const std::vector<std::string>& ResultSet::getColNames() const {
472+
return fields_;
473+
}
474+
471475
std::string ResultSet::colName(size_t col_idx) const {
472476
if (fields_.empty()) {
473477
return "col" + std::to_string(col_idx);

omniscidb/ResultSet/ResultSet.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,7 @@ class ResultSet {
289289

290290
void setColNames(std::vector<std::string> fields);
291291
bool hasColNames() const;
292+
const std::vector<std::string>& getColNames() const;
292293
std::string colName(size_t col_idx) const;
293294

294295
/**

omniscidb/ResultSetRegistry/ResultSetRegistry.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,10 @@ ResultSetTableTokenPtr ResultSetRegistry::head(const ResultSetTableToken& token,
212212
}
213213
}
214214

215+
// Copy column names to the resulting table.
216+
auto* first_rs = table->fragments.front().rs.get();
217+
new_results.front()->setColNames(first_rs->getColNames());
218+
215219
data_lock.unlock();
216220
table_lock.unlock();
217221

@@ -256,6 +260,10 @@ ResultSetTableTokenPtr ResultSetRegistry::tail(const ResultSetTableToken& token,
256260
}
257261
}
258262

263+
// Copy column names to the resulting table.
264+
auto* first_rs = table->fragments.front().rs.get();
265+
new_results.front()->setColNames(first_rs->getColNames());
266+
259267
data_lock.unlock();
260268
table_lock.unlock();
261269

omniscidb/ResultSetRegistry/ResultSetTableToken.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
#include "ResultSetTableToken.h"
88
#include "ResultSetRegistry.h"
99

10+
#include "ResultSet/ArrowResultSet.h"
11+
#include "Shared/ArrowUtil.h"
12+
1013
namespace hdk {
1114

1215
ResultSetTableToken::ResultSetTableToken(TableInfoPtr tinfo,
@@ -43,4 +46,25 @@ ResultSetTableTokenPtr ResultSetTableToken::tail(size_t n) const {
4346
return registry_->tail(*this, n);
4447
}
4548

49+
std::shared_ptr<arrow::Table> ResultSetTableToken::toArrow() const {
50+
auto first_rs = resultSet(0);
51+
std::vector<std::string> col_names;
52+
for (size_t col_idx = 0; col_idx < first_rs->colCount(); ++col_idx) {
53+
col_names.push_back(first_rs->colName(col_idx));
54+
}
55+
56+
std::vector<std::shared_ptr<arrow::Table>> converted_tables;
57+
for (size_t rs_idx = 0; rs_idx < resultSetCount(); ++rs_idx) {
58+
ArrowResultSetConverter converter(resultSet(rs_idx), col_names, -1);
59+
converted_tables.push_back(converter.convertToArrowTable());
60+
}
61+
62+
if (converted_tables.size() == (size_t)1) {
63+
return converted_tables.front();
64+
}
65+
66+
ARROW_ASSIGN_OR_THROW(auto res, arrow::ConcatenateTables(converted_tables));
67+
return res;
68+
}
69+
4670
} // namespace hdk

omniscidb/ResultSetRegistry/ResultSetTableToken.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
#include "DataMgr/ChunkMetadata.h"
1212
#include "SchemaMgr/TableInfo.h"
1313

14+
#include "arrow/api.h"
15+
1416
namespace hdk {
1517

1618
class ResultSetRegistry;
@@ -55,6 +57,8 @@ class ResultSetTableToken : public std::enable_shared_from_this<ResultSetTableTo
5557
ResultSetTableTokenPtr head(size_t n) const;
5658
ResultSetTableTokenPtr tail(size_t n) const;
5759

60+
std::shared_ptr<arrow::Table> toArrow() const;
61+
5862
std::string toString() const {
5963
return "ResultSetTableToken(" + std::to_string(dbId()) + ":" +
6064
std::to_string(tableId()) + ")";

omniscidb/Tests/ResultSetArrowConversion.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -950,6 +950,26 @@ TEST(ArrowTable, FixedLenArrays) {
950950
}
951951
}
952952

953+
TEST(ArrowTable, MultifragResult) {
954+
bool prev_enable_multifrag_execution_result =
955+
config().exec.enable_multifrag_execution_result;
956+
ScopeGuard reset = [prev_enable_multifrag_execution_result] {
957+
config().exec.enable_multifrag_execution_result =
958+
prev_enable_multifrag_execution_result;
959+
};
960+
961+
config().exec.enable_multifrag_execution_result = true;
962+
963+
auto res = runSqlQuery("select * from test_chunked;", ExecutorDeviceType::CPU, false);
964+
// Expect two fragments in the result and two batches in converted table.
965+
CHECK_EQ(res.getToken()->resultSetCount(), (size_t)2);
966+
auto table = res.getToken()->toArrow();
967+
CHECK_EQ(table->column(1)->num_chunks(), 2);
968+
compare_columns(table6x4_col_i64, table->column(1));
969+
compare_columns(table6x4_col_bi, table->column(2));
970+
compare_columns(table6x4_col_d, table->column(3));
971+
}
972+
953973
int main(int argc, char* argv[]) {
954974
testing::InitGoogleTest(&argc, argv);
955975
TestHelpers::init_logger_stderr_only(argc, argv);

python/pyhdk/_sql.pxd

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ from libcpp.memory cimport shared_ptr, unique_ptr
88
from libcpp.string cimport string
99
from libcpp.vector cimport vector
1010

11+
from pyarrow.lib cimport CTable as CArrowTable
12+
1113
from pyhdk._common cimport CConfig, CType
1214
from pyhdk._storage cimport CSchemaProvider, CSchemaProviderPtr, CDataProvider, CDataMgr, CBufferProvider
1315
from pyhdk._execute cimport CExecutor, CResultSetPtr, CCompilationOptions, CExecutionOptions, CTargetMetaInfo
@@ -50,6 +52,13 @@ cdef extern from "omniscidb/QueryEngine/RelAlgDagBuilder.h":
5052
cdef cppclass CRelAlgDagBuilder "RelAlgDagBuilder"(CQueryDag):
5153
CRelAlgDagBuilder(const string&, int, CSchemaProviderPtr, shared_ptr[CConfig]) except +
5254

55+
cdef extern from "omniscidb/ResultSetRegistry/ResultSetTableToken.h":
56+
cdef cppclass CResultSetTableToken "hdk::ResultSetTableToken":
57+
size_t rowCount()
58+
shared_ptr[CArrowTable] toArrow() except +
59+
60+
ctypedef shared_ptr[const CResultSetTableToken] CResultSetTableTokenPtr
61+
5362
cdef extern from "omniscidb/QueryEngine/Descriptors/RelAlgExecutionDescriptor.h":
5463
cdef cppclass CExecutionResult "ExecutionResult":
5564
CExecutionResult()
@@ -60,6 +69,7 @@ cdef extern from "omniscidb/QueryEngine/Descriptors/RelAlgExecutionDescriptor.h"
6069
const vector[CTargetMetaInfo]& getTargetsMeta()
6170
string getExplanation()
6271
const string& tableName()
72+
CResultSetTableTokenPtr getToken()
6373

6474
CExecutionResult head(size_t) except +
6575
CExecutionResult tail(size_t) except +

python/pyhdk/_sql.pyx

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -74,20 +74,12 @@ cdef extract_array_value(const CArrayTargetValue *array, const CType *c_type):
7474

7575
cdef class ExecutionResult:
7676
def row_count(self):
77-
cdef shared_ptr[CResultSet] c_res
78-
c_res = self.c_result.getRows()
79-
return int(c_res.get().rowCount())
77+
cdef CResultSetTableTokenPtr c_token = self.c_result.getToken()
78+
return int(c_token.get().rowCount())
8079

8180
def to_arrow(self):
82-
cdef vector[string] col_names
83-
cdef vector[CTargetMetaInfo].const_iterator it = self.c_result.getTargetsMeta().const_begin()
84-
85-
while it != self.c_result.getTargetsMeta().const_end():
86-
col_names.push_back(dereference(it).get_resname())
87-
preincrement(it)
88-
89-
cdef unique_ptr[CArrowResultSetConverter] converter = make_unique[CArrowResultSetConverter](self.c_result.getRows(), col_names, -1)
90-
cdef shared_ptr[CArrowTable] at = converter.get().convertToArrowTable()
81+
cdef CResultSetTableTokenPtr c_token = self.c_result.getToken()
82+
cdef shared_ptr[CArrowTable] at = c_token.get().toArrow()
9183
return pyarrow_wrap_table(at)
9284

9385
def to_explain_str(self):

0 commit comments

Comments
 (0)