Skip to content

Commit 4ed393d

Browse files
committed
a working version with memory leak
1 parent 2edef14 commit 4ed393d

File tree

7 files changed

+153
-79
lines changed

7 files changed

+153
-79
lines changed

src/snowflake/connector/cpp/ArrowIterator/CArrowChunkIterator.cpp

Lines changed: 91 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,48 @@
2020
#include <vector>
2121
#include <iostream>
2222

23+
static const char* NANOARROW_TYPE_ENUM_STRING[] = {
24+
"NANOARROW_TYPE_UNINITIALIZED",
25+
"NANOARROW_TYPE_NA",
26+
"NANOARROW_TYPE_BOOL",
27+
"NANOARROW_TYPE_UINT8",
28+
"NANOARROW_TYPE_INT8",
29+
"NANOARROW_TYPE_UINT16",
30+
"NANOARROW_TYPE_INT16",
31+
"NANOARROW_TYPE_UINT32",
32+
"NANOARROW_TYPE_INT32",
33+
"NANOARROW_TYPE_UINT64",
34+
"NANOARROW_TYPE_INT64",
35+
"NANOARROW_TYPE_HALF_FLOAT",
36+
"NANOARROW_TYPE_FLOAT",
37+
"NANOARROW_TYPE_DOUBLE",
38+
"NANOARROW_TYPE_STRING",
39+
"NANOARROW_TYPE_BINARY",
40+
"NANOARROW_TYPE_FIXED_SIZE_BINARY",
41+
"NANOARROW_TYPE_DATE32",
42+
"NANOARROW_TYPE_DATE64",
43+
"NANOARROW_TYPE_TIMESTAMP",
44+
"NANOARROW_TYPE_TIME32",
45+
"NANOARROW_TYPE_TIME64",
46+
"NANOARROW_TYPE_INTERVAL_MONTHS",
47+
"NANOARROW_TYPE_INTERVAL_DAY_TIME",
48+
"NANOARROW_TYPE_DECIMAL128",
49+
"NANOARROW_TYPE_DECIMAL256",
50+
"NANOARROW_TYPE_LIST",
51+
"NANOARROW_TYPE_STRUCT",
52+
"NANOARROW_TYPE_SPARSE_UNION",
53+
"NANOARROW_TYPE_DENSE_UNION",
54+
"NANOARROW_TYPE_DICTIONARY",
55+
"NANOARROW_TYPE_MAP",
56+
"NANOARROW_TYPE_EXTENSION",
57+
"NANOARROW_TYPE_FIXED_SIZE_LIST",
58+
"NANOARROW_TYPE_DURATION",
59+
"NANOARROW_TYPE_LARGE_STRING",
60+
"NANOARROW_TYPE_LARGE_BINARY",
61+
"NANOARROW_TYPE_LARGE_LIST",
62+
"NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO"
63+
};
64+
2365
#define SF_CHECK_PYTHON_ERR() \
2466
if (py::checkPyError())\
2567
{\
@@ -109,41 +151,51 @@ void CArrowChunkIterator::initColumnConverters()
109151
(*m_cRecordBatches)[m_currentBatchIndex];
110152
m_currentSchema = currentBatch->schema();
111153

112-
ArrowSchema nanoarrowSchema;
154+
nanoarrow::UniqueSchema nanoarrowSchema;
113155
// TODO: Export is not needed when using nanoarrow IPC to read schema
114-
arrow::ExportSchema(*m_currentSchema, &nanoarrowSchema);
156+
arrow::ExportSchema(*m_currentSchema, nanoarrowSchema.get());
115157

116158
for (int i = 0; i < currentBatch->num_columns(); i++)
117159
{
118160
std::shared_ptr<arrow::Array> columnArray = currentBatch->column(i);
119-
std::shared_ptr<arrow::DataType> dt = m_currentSchema->field(i)->type();
120-
std::shared_ptr<const arrow::KeyValueMetadata> metaData =
121-
m_currentSchema->field(i)->metadata();
122161

123-
ArrowSchema* nanoarrowColumnSchema = nanoarrowSchema.children[i];
124-
std::shared_ptr<ArrowSchemaView> nanoarrowColumnSchemaView = std::make_shared<ArrowSchemaView>();
162+
nanoarrow::UniqueSchema nanoarrowColumnSchemaUnique(nanoarrowSchema->children[i]);
163+
ArrowSchemaView nanoarrowColumnSchemaView;
125164
ArrowError error;
126-
ArrowSchemaViewInit(nanoarrowColumnSchemaView.get(), nanoarrowColumnSchema, &error);
165+
ArrowSchemaViewInit(&nanoarrowColumnSchemaView, nanoarrowColumnSchemaUnique.get(), &error);
166+
167+
std::shared_ptr<ArrowArray> nanoarrowUniqueColumnArrowArray = std::make_shared<ArrowArray>();
168+
std::shared_ptr<ArrowArrayView> nanoarrowUniqueColumnArrowArrayView = std::make_shared<ArrowArrayView>();
169+
170+
//nanoarrow::UniqueArray nanoarrowUniqueColumnArrowArray;
171+
//nanoarrow::UniqueArrayView nanoarrowUniqueColumnArrowArrayView;
127172

128-
std::shared_ptr<ArrowArray> nanoarrowColumnArrowArray = std::make_shared<ArrowArray>();
129-
std::shared_ptr<ArrowArrayView> nanoarrowColumnArrowArrayViewInstance = std::make_shared<ArrowArrayView>();
130-
arrow::ExportArray(*columnArray, nanoarrowColumnArrowArray.get());
173+
arrow::ExportArray(*columnArray, nanoarrowUniqueColumnArrowArray.get());
131174

132175
int res = 0;
133-
res = ArrowArrayViewInitFromSchema(nanoarrowColumnArrowArrayViewInstance.get(), nanoarrowColumnSchema, &error);
176+
res = ArrowArrayViewInitFromSchema(nanoarrowUniqueColumnArrowArrayView.get(), nanoarrowColumnSchemaUnique.get(), &error);
134177
if(res != NANOARROW_OK) {
135178
std::string errorInfo = Logger::formatString("ArrowArrayViewInitFromSchema failure");
136179
logger->error(__FILE__, __func__, __LINE__, errorInfo.c_str());
137180
PyErr_SetString(PyExc_Exception, errorInfo.c_str());
138181
}
139-
res = ArrowArrayViewSetArray(nanoarrowColumnArrowArrayViewInstance.get(), nanoarrowColumnArrowArray.get(), &error);
182+
183+
res = ArrowArrayViewSetArray(nanoarrowUniqueColumnArrowArrayView.get(), nanoarrowUniqueColumnArrowArray.get(), &error);
140184
if(res != NANOARROW_OK) {
141185
std::string errorInfo = Logger::formatString("ArrowArrayViewSetArray failure");
142186
logger->error(__FILE__, __func__, __LINE__, errorInfo.c_str());
143187
PyErr_SetString(PyExc_Exception, errorInfo.c_str());
144188
}
189+
190+
// std::shared_ptr<ArrowArrayView> nanoarrowColumnArrowArrayViewInstance = std::shared_ptr<ArrowArrayView>(nanoarrowUniqueColumnArrowArrayView.get());
191+
std::shared_ptr<ArrowArrayView> nanoarrowColumnArrowArrayViewInstance = nanoarrowUniqueColumnArrowArrayView;
192+
// m_uniqueColumnArrowArrays.push_back(nanoarrowUniqueColumnArrowArray.get());
193+
// m_uniqueColumnArrowArrayViews.push_back(nanoarrowUniqueColumnArrowArrayView.get());
194+
m_arrays.push_back(nanoarrowUniqueColumnArrowArray);
195+
m_arrayViews.push_back(nanoarrowUniqueColumnArrowArrayView);
196+
145197
ArrowStringView snowflakeLogicalType;
146-
const char* metadata = nanoarrowSchema.children[i]->metadata;
198+
const char* metadata = nanoarrowSchema->children[i]->metadata;
147199
ArrowMetadataGetValue(metadata, ArrowCharView("logicalType"), &snowflakeLogicalType);
148200
SnowflakeType::Type st = SnowflakeType::snowflakeTypeFromString(
149201
std::string(snowflakeLogicalType.data, snowflakeLogicalType.size_bytes)
@@ -164,7 +216,7 @@ void CArrowChunkIterator::initColumnConverters()
164216
precision = std::stoi(precisionString.data);
165217
}
166218

167-
switch(nanoarrowColumnSchemaView->type)
219+
switch(nanoarrowColumnSchemaView.type)
168220
{
169221
#define _SF_INIT_FIXED_CONVERTER(ARROW_TYPE) \
170222
case ArrowType::ARROW_TYPE: \
@@ -222,7 +274,7 @@ void CArrowChunkIterator::initColumnConverters()
222274
std::string errorInfo = Logger::formatString(
223275
"[Snowflake Exception] unknown arrow internal data type(%d) "
224276
"for FIXED data",
225-
dt->id());
277+
NANOARROW_TYPE_ENUM_STRING[nanoarrowColumnSchemaView.type]);
226278
logger->error(__FILE__, __func__, __LINE__, errorInfo.c_str());
227279
PyErr_SetString(PyExc_Exception, errorInfo.c_str());
228280
return;
@@ -295,7 +347,7 @@ void CArrowChunkIterator::initColumnConverters()
295347
ArrowMetadataGetValue(metadata, ArrowCharView("scale"), &scaleString);
296348
scale = std::stoi(scaleString.data);
297349
}
298-
switch (nanoarrowColumnSchemaView->type)
350+
switch (nanoarrowColumnSchemaView.type)
299351
{
300352
case NANOARROW_TYPE_INT32:
301353
case NANOARROW_TYPE_INT64:
@@ -311,7 +363,7 @@ void CArrowChunkIterator::initColumnConverters()
311363
std::string errorInfo = Logger::formatString(
312364
"[Snowflake Exception] unknown arrow internal data type(%d) "
313365
"for TIME data",
314-
dt->id());
366+
NANOARROW_TYPE_ENUM_STRING[nanoarrowColumnSchemaView.type]);
315367
logger->error(__FILE__, __func__, __LINE__, errorInfo.c_str());
316368
PyErr_SetString(PyExc_Exception, errorInfo.c_str());
317369
return;
@@ -328,7 +380,7 @@ void CArrowChunkIterator::initColumnConverters()
328380
ArrowMetadataGetValue(metadata, ArrowCharView("scale"), &scaleString);
329381
scale = std::stoi(scaleString.data);
330382
}
331-
switch (nanoarrowColumnSchemaView->type)
383+
switch (nanoarrowColumnSchemaView.type)
332384
{
333385
case NANOARROW_TYPE_INT64:
334386
{
@@ -353,13 +405,13 @@ void CArrowChunkIterator::initColumnConverters()
353405
{
354406
m_currentBatchConverters.push_back(
355407
std::make_shared<sf::NumpyTwoFieldTimeStampNTZConverter>(
356-
nanoarrowColumnArrowArrayViewInstance, nanoarrowColumnSchemaView, scale, m_context));
408+
nanoarrowColumnArrowArrayViewInstance, &nanoarrowColumnSchemaView, scale, m_context));
357409
}
358410
else
359411
{
360412
m_currentBatchConverters.push_back(
361413
std::make_shared<sf::TwoFieldTimeStampNTZConverter>(
362-
nanoarrowColumnArrowArrayViewInstance, nanoarrowColumnSchemaView, scale, m_context));
414+
nanoarrowColumnArrowArrayViewInstance, &nanoarrowColumnSchemaView, scale, m_context));
363415
}
364416
break;
365417
}
@@ -369,7 +421,7 @@ void CArrowChunkIterator::initColumnConverters()
369421
std::string errorInfo = Logger::formatString(
370422
"[Snowflake Exception] unknown arrow internal data type(%d) "
371423
"for TIMESTAMP_NTZ data",
372-
dt->id());
424+
NANOARROW_TYPE_ENUM_STRING[nanoarrowColumnSchemaView.type]);
373425
logger->error(__FILE__, __func__, __LINE__, errorInfo.c_str());
374426
PyErr_SetString(PyExc_Exception, errorInfo.c_str());
375427
return;
@@ -386,7 +438,7 @@ void CArrowChunkIterator::initColumnConverters()
386438
ArrowMetadataGetValue(metadata, ArrowCharView("scale"), &scaleString);
387439
scale = std::stoi(scaleString.data);
388440
}
389-
switch (nanoarrowColumnSchemaView->type)
441+
switch (nanoarrowColumnSchemaView.type)
390442
{
391443
case NANOARROW_TYPE_INT64:
392444
{
@@ -400,7 +452,7 @@ void CArrowChunkIterator::initColumnConverters()
400452
{
401453
m_currentBatchConverters.push_back(
402454
std::make_shared<sf::TwoFieldTimeStampLTZConverter>(
403-
nanoarrowColumnArrowArrayViewInstance, nanoarrowColumnSchemaView, scale, m_context));
455+
nanoarrowColumnArrowArrayViewInstance, &nanoarrowColumnSchemaView, scale, m_context));
404456
break;
405457
}
406458

@@ -409,7 +461,7 @@ void CArrowChunkIterator::initColumnConverters()
409461
std::string errorInfo = Logger::formatString(
410462
"[Snowflake Exception] unknown arrow internal data type(%d) "
411463
"for TIMESTAMP_LTZ data",
412-
dt->id());
464+
NANOARROW_TYPE_ENUM_STRING[nanoarrowColumnSchemaView.type]);
413465
logger->error(__FILE__, __func__, __LINE__, errorInfo.c_str());
414466
PyErr_SetString(PyExc_Exception, errorInfo.c_str());
415467
return;
@@ -420,28 +472,31 @@ void CArrowChunkIterator::initColumnConverters()
420472

421473
case SnowflakeType::Type::TIMESTAMP_TZ:
422474
{
423-
int scale = metaData
424-
? std::stoi(metaData->value(metaData->FindKey("scale")))
425-
: 9;
426-
int byteLength =
427-
metaData
428-
? std::stoi(metaData->value(metaData->FindKey("byteLength")))
429-
: 16;
475+
ArrowStringView scaleString;
476+
ArrowStringView byteLengthString;
477+
int scale = 9;
478+
int byteLength = 16;
479+
if(metadata != nullptr) {
480+
ArrowMetadataGetValue(metadata, ArrowCharView("scale"), &scaleString);
481+
ArrowMetadataGetValue(metadata, ArrowCharView("byteLength"), &byteLengthString);
482+
scale = std::stoi(scaleString.data);
483+
byteLength = std::stoi(byteLengthString.data);
484+
}
430485
switch (byteLength)
431486
{
432487
case 8:
433488
{
434489
m_currentBatchConverters.push_back(
435490
std::make_shared<sf::TwoFieldTimeStampTZConverter>(
436-
nanoarrowColumnArrowArrayViewInstance, nanoarrowColumnSchemaView, scale, m_context));
491+
nanoarrowColumnArrowArrayViewInstance, &nanoarrowColumnSchemaView, scale, m_context));
437492
break;
438493
}
439494

440495
case 16:
441496
{
442497
m_currentBatchConverters.push_back(
443498
std::make_shared<sf::ThreeFieldTimeStampTZConverter>(
444-
nanoarrowColumnArrowArrayViewInstance, nanoarrowColumnSchemaView, scale, m_context));
499+
nanoarrowColumnArrowArrayViewInstance, &nanoarrowColumnSchemaView, scale, m_context));
445500
break;
446501
}
447502

@@ -450,7 +505,7 @@ void CArrowChunkIterator::initColumnConverters()
450505
std::string errorInfo = Logger::formatString(
451506
"[Snowflake Exception] unknown arrow internal data type(%d) "
452507
"for TIMESTAMP_TZ data",
453-
dt->id());
508+
NANOARROW_TYPE_ENUM_STRING[nanoarrowColumnSchemaView.type]);
454509
logger->error(__FILE__, __func__, __LINE__, errorInfo.c_str());
455510
PyErr_SetString(PyExc_Exception, errorInfo.c_str());
456511
return;
@@ -464,7 +519,7 @@ void CArrowChunkIterator::initColumnConverters()
464519
{
465520
std::string errorInfo = Logger::formatString(
466521
"[Snowflake Exception] unknown snowflake data type : %s",
467-
metaData->value(metaData->FindKey("logicalType")).c_str());
522+
snowflakeLogicalType.data);
468523
logger->error(__FILE__, __func__, __LINE__, errorInfo.c_str());
469524
PyErr_SetString(PyExc_Exception, errorInfo.c_str());
470525
return;

src/snowflake/connector/cpp/ArrowIterator/CArrowChunkIterator.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <memory>
1212
#include <vector>
1313
#include "nanoarrow.h"
14+
#include "nanoarrow.hpp"
1415

1516
namespace sf
1617
{
@@ -49,6 +50,10 @@ class CArrowChunkIterator : public CArrowIterator
4950

5051
/** list of column converters*/
5152
std::vector<std::shared_ptr<sf::IColumnConverter>> m_currentBatchConverters;
53+
std::vector<nanoarrow::UniqueArray> m_uniqueColumnArrowArrays;
54+
std::vector<nanoarrow::UniqueArrayView> m_uniqueColumnArrowArrayViews;
55+
std::vector<std::shared_ptr<ArrowArray>> m_arrays;
56+
std::vector<std::shared_ptr<ArrowArrayView>> m_arrayViews;
5257
/** row index inside current record batch (start from 0) */
5358
int m_rowIndexInBatch;
5459

src/snowflake/connector/cpp/ArrowIterator/IntConverter.hpp

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include "IColumnConverter.hpp"
99
#include "nanoarrow.h"
10+
#include "nanoarrow.hpp"
1011
#include <memory>
1112

1213
namespace sf
@@ -16,16 +17,21 @@ template <typename T>
1617
class IntConverter : public IColumnConverter
1718
{
1819
public:
19-
// explicit IntConverter(std::shared_ptr<arrow::Array> array)
20-
// : m_array(std::dynamic_pointer_cast<T>(array))
21-
// {
22-
// }
20+
explicit IntConverter(std::shared_ptr<arrow::Array> array)
21+
: m_array(std::dynamic_pointer_cast<T>(array))
22+
{
23+
}
2324

2425
explicit IntConverter(std::shared_ptr<ArrowArrayView> array)
2526
: m_nanoarrowArrayView(array)
2627
{
2728
}
2829

30+
explicit IntConverter(nanoarrow::UniqueArrayView array)
31+
: m_uniqueArray(array.get())
32+
{
33+
}
34+
2935
PyObject* pyLongForward(int64_t value) const
3036
{
3137
return PyLong_FromLongLong(value);
@@ -39,6 +45,7 @@ class IntConverter : public IColumnConverter
3945
PyObject* toPyObject(int64_t rowIndex) const override;
4046

4147
private:
48+
nanoarrow::UniqueArrayView m_uniqueArray;
4249
std::shared_ptr<T> m_array;
4350
std::shared_ptr<ArrowArrayView> m_nanoarrowArrayView;
4451
};
@@ -69,11 +76,18 @@ class NumpyIntConverter : public IColumnConverter
6976
{
7077
}
7178

79+
explicit NumpyIntConverter(nanoarrow::UniqueArrayView array, PyObject * context)
80+
: m_uniqueArray(array.get()),
81+
m_context(context)
82+
{
83+
}
84+
7285
PyObject* toPyObject(int64_t rowIndex) const override;
7386

7487
private:
7588
std::shared_ptr<T> m_array;
7689
std::shared_ptr<ArrowArrayView> m_nanoarrowArrayView;
90+
nanoarrow::UniqueArrayView m_uniqueArray;
7791

7892
PyObject * m_context;
7993
};

src/snowflake/connector/cpp/ArrowIterator/StringConverter.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@ StringConverter::StringConverter(std::shared_ptr<ArrowArrayView> array)
1414
{
1515
}
1616

17+
StringConverter::StringConverter(ArrowArrayView* array)
18+
: m_uniqueArray(array)
19+
{
20+
}
21+
1722
PyObject* StringConverter::toPyObject(int64_t rowIndex) const
1823
{
1924
if(ArrowArrayViewIsNull(m_nanoarrowArrayView.get(), rowIndex)) {

src/snowflake/connector/cpp/ArrowIterator/StringConverter.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "logging.hpp"
1010
#include <memory>
1111
#include "nanoarrow.h"
12+
#include "nanoarrow.hpp"
1213

1314
namespace sf
1415
{
@@ -17,11 +18,14 @@ class StringConverter : public IColumnConverter
1718
{
1819
public:
1920
explicit StringConverter(std::shared_ptr<ArrowArrayView> array);
21+
explicit StringConverter(ArrowArrayView* array);
22+
explicit StringConverter(nanoarrow::UniqueArrayView array);
2023

2124
PyObject* toPyObject(int64_t rowIndex) const override;
2225

2326
private:
2427
std::shared_ptr<ArrowArrayView> m_nanoarrowArrayView;
28+
ArrowArrayView* m_uniqueArray;
2529

2630
static Logger* logger;
2731
};

0 commit comments

Comments
 (0)