Skip to content

Commit 483296c

Browse files
sfc-gh-stakedaankit-bhatnagar167
authored andcommitted
SNOW-86947: Implement all converter
1 parent c3db95c commit 483296c

15 files changed

+335
-148
lines changed

arrow_iterator.pyx

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,11 @@
44

55
# distutils: language = c++
66

7+
from logging import getLogger
78
from cpython.ref cimport PyObject
89

10+
logger = getLogger(__name__)
11+
912
cdef extern from "cpp/ArrowIterator/CArrowChunkIterator.hpp" namespace "sf":
1013
cdef cppclass CArrowChunkIterator:
1114
CArrowChunkIterator(PyObject* context)
@@ -18,19 +21,26 @@ cdef extern from "cpp/ArrowIterator/CArrowChunkIterator.hpp" namespace "sf":
1821

1922

2023
cdef class PyArrowChunkIterator:
21-
cdef CArrowChunkIterator * cIterator
24+
cdef CArrowChunkIterator* cIterator
25+
cdef PyObject* cret
2226

2327
def __cinit__(PyArrowChunkIterator self, object arrow_stream_reader, object arrow_context):
2428
self.cIterator = new CArrowChunkIterator(<PyObject*>arrow_context)
2529
for rb in arrow_stream_reader:
26-
self.cIterator.addRecordBatch(<PyObject *>rb)
30+
self.cIterator.addRecordBatch(<PyObject*>rb)
2731
self.cIterator.reset()
2832

2933
def __dealloc__(PyArrowChunkIterator self):
3034
del self.cIterator
3135

3236
def __next__(PyArrowChunkIterator self):
33-
ret = <object>self.cIterator.nextRow()
37+
cret = self.cIterator.nextRow()
38+
if not cret:
39+
logger.error("Internal error from CArrowChunkIterator\n")
40+
# it looks like this line can help us get into python and detect the global variable immediately
41+
# however, this log will not show up for unclear reason
42+
ret = <object>cret
43+
3444
if ret is None:
3545
raise StopIteration
3646
else:

cpp/ArrowIterator/CArrowChunkIterator.cpp

Lines changed: 55 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
#include "DateConverter.hpp"
1313
#include "TimeStampConverter.hpp"
1414
#include "TimeConverter.hpp"
15-
#include "logging.hpp"
15+
#include <string>
1616

1717
namespace sf
1818
{
@@ -50,6 +50,10 @@ PyObject* CArrowChunkIterator::nextRow()
5050
if (m_rowIndexInBatch < m_rowCountInBatch)
5151
{
5252
this->currentRowAsTuple();
53+
if (py::checkPyError())
54+
{
55+
return nullptr;
56+
}
5357
return m_latestReturnedRow.get();
5458
}
5559
else
@@ -60,11 +64,19 @@ PyObject* CArrowChunkIterator::nextRow()
6064
m_rowIndexInBatch = 0;
6165
m_rowCountInBatch = m_cRecordBatches[m_currentBatchIndex]->num_rows();
6266
this->initColumnConverters();
67+
if (py::checkPyError())
68+
{
69+
return nullptr;
70+
}
6371

6472
logger.info("Current batch index: %d, rows in current batch: %d",
6573
m_currentBatchIndex, m_rowCountInBatch);
6674

6775
this->currentRowAsTuple();
76+
if (py::checkPyError())
77+
{
78+
return nullptr;
79+
}
6880
return m_latestReturnedRow.get();
6981
}
7082
}
@@ -189,15 +201,22 @@ void CArrowChunkIterator::initColumnConverters()
189201

190202
default:
191203
{
192-
/** TODO: how to throw an exception will be decided later */
193-
logger.error("unknown arrow internal data type(%d) for FIXED data",
194-
dt->id());
195-
break;
204+
std::string errorInfo = Logger::formatString(
205+
"[Snowflake Exception] unknown arrow internal data type(%d) "
206+
"for FIXED data",
207+
dt->id());
208+
logger.error(errorInfo.c_str());
209+
PyErr_SetString(PyExc_Exception, errorInfo.c_str());
210+
return;
196211
}
197212
}
198213
break;
199214
}
200215

216+
case SnowflakeType::Type::ANY:
217+
case SnowflakeType::Type::CHAR:
218+
case SnowflakeType::Type::OBJECT:
219+
case SnowflakeType::Type::VARIANT:
201220
case SnowflakeType::Type::TEXT:
202221
{
203222
m_currentBatchConverters.push_back(
@@ -258,10 +277,13 @@ void CArrowChunkIterator::initColumnConverters()
258277

259278
default:
260279
{
261-
/** TODO: how to throw an exception will be decided later */
262-
logger.error("unknown arrow internal data type(%d) for TIME data",
263-
dt->id());
264-
break;
280+
std::string errorInfo = Logger::formatString(
281+
"[Snowflake Exception] unknown arrow internal data type(%d) "
282+
"for TIME data",
283+
dt->id());
284+
logger.error(errorInfo.c_str());
285+
PyErr_SetString(PyExc_Exception, errorInfo.c_str());
286+
return;
265287
}
266288
}
267289
break;
@@ -292,11 +314,13 @@ void CArrowChunkIterator::initColumnConverters()
292314

293315
default:
294316
{
295-
/** TODO: how to throw an exception will be decided later */
296-
logger.error(
297-
"unknown arrow internal data type(%d) for TIMESTAMP_NTZ data",
317+
std::string errorInfo = Logger::formatString(
318+
"[Snowflake Exception] unknown arrow internal data type(%d) "
319+
"for TIMESTAMP_NTZ data",
298320
dt->id());
299-
break;
321+
logger.error(errorInfo.c_str());
322+
PyErr_SetString(PyExc_Exception, errorInfo.c_str());
323+
return;
300324
}
301325
}
302326
break;
@@ -327,11 +351,13 @@ void CArrowChunkIterator::initColumnConverters()
327351

328352
default:
329353
{
330-
/** TODO: how to throw an exception will be decided later */
331-
logger.error(
332-
"unknown arrow internal data type(%d) for TIMESTAMP_LTZ data",
354+
std::string errorInfo = Logger::formatString(
355+
"[Snowflake Exception] unknown arrow internal data type(%d) "
356+
"for TIMESTAMP_LTZ data",
333357
dt->id());
334-
break;
358+
logger.error(errorInfo.c_str());
359+
PyErr_SetString(PyExc_Exception, errorInfo.c_str());
360+
return;
335361
}
336362
}
337363
break;
@@ -366,11 +392,13 @@ void CArrowChunkIterator::initColumnConverters()
366392

367393
default:
368394
{
369-
/** TODO: how to throw an exception will be decided later */
370-
logger.error(
371-
"unknown arrow internal data type(%d) for TIMESTAMP_TZ data",
395+
std::string errorInfo = Logger::formatString(
396+
"[Snowflake Exception] unknown arrow internal data type(%d) "
397+
"for TIMESTAMP_TZ data",
372398
dt->id());
373-
break;
399+
logger.error(errorInfo.c_str());
400+
PyErr_SetString(PyExc_Exception, errorInfo.c_str());
401+
return;
374402
}
375403
}
376404

@@ -379,10 +407,12 @@ void CArrowChunkIterator::initColumnConverters()
379407

380408
default:
381409
{
382-
/** TODO: how to throw an exception will be decided later */
383-
logger.error("unknown snowflake data type : %d",
384-
metaData->value(metaData->FindKey("logicalType")));
385-
break;
410+
std::string errorInfo = Logger::formatString(
411+
"[Snowflake Exception] unknown snowflake data type : %d",
412+
metaData->value(metaData->FindKey("logicalType")));
413+
logger.error(errorInfo.c_str());
414+
PyErr_SetString(PyExc_Exception, errorInfo.c_str());
415+
return;
386416
}
387417
}
388418
}

cpp/ArrowIterator/DateConverter.cpp

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,31 +8,22 @@ namespace sf
88
{
99
Logger DateConverter::logger("snowflake.connector.DateConverter");
1010

11-
py::UniqueRef& DateConverter::m_pyDatetimeDate()
11+
py::UniqueRef& DateConverter::initPyDatetimeDate()
1212
{
1313
static py::UniqueRef pyDatetimeDate;
1414
if (pyDatetimeDate.empty())
1515
{
16-
py::PyUniqueLock lock;
1716
py::UniqueRef pyDatetimeModule;
18-
arrow::Status status = py::importPythonModule("datetime", pyDatetimeModule);
19-
if (!status.ok())
20-
{
21-
/** TODO : How to throw an exception will be decided later */
22-
logger.error("import python module 'datetime' failed");
23-
}
24-
status = py::importFromModule(pyDatetimeModule, "date", pyDatetimeDate);
25-
if (!status.ok())
26-
{
27-
/** TODO : How to throw an exception will be decided later */
28-
logger.error("import python module 'datetime.date' failed");
29-
}
17+
py::importPythonModule("datetime", pyDatetimeModule);
18+
py::importFromModule(pyDatetimeModule, "date", pyDatetimeDate);
19+
Py_XINCREF(pyDatetimeDate.get());
3020
}
3121
return pyDatetimeDate;
3222
}
3323

3424
DateConverter::DateConverter(std::shared_ptr<arrow::Array> array)
35-
: m_array(std::dynamic_pointer_cast<arrow::Date32Array>(array))
25+
: m_array(std::dynamic_pointer_cast<arrow::Date32Array>(array)),
26+
m_pyDatetimeDate(initPyDatetimeDate())
3627
{
3728
}
3829

@@ -41,8 +32,7 @@ PyObject* DateConverter::toPyObject(int64_t rowIndex) const
4132
if (m_array->IsValid(rowIndex))
4233
{
4334
int32_t deltaDays = m_array->Value(rowIndex);
44-
py::PyUniqueLock lock;
45-
return PyObject_CallMethod(m_pyDatetimeDate().get(), "fromordinal", "i",
35+
return PyObject_CallMethod(m_pyDatetimeDate.get(), "fromordinal", "i",
4636
epochDay + deltaDays);
4737
}
4838
else

cpp/ArrowIterator/DateConverter.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,16 @@ class DateConverter : public IColumnConverter
1919
PyObject* toPyObject(int64_t rowIndex) const override;
2020

2121
private:
22+
static py::UniqueRef& initPyDatetimeDate();
23+
2224
std::shared_ptr<arrow::Date32Array> m_array;
2325

2426
/** from Python Ordinal to 1970-01-01 */
2527
static constexpr int epochDay = 719163;
2628

27-
static py::UniqueRef& m_pyDatetimeDate();
28-
2929
static Logger logger;
30+
31+
py::UniqueRef& m_pyDatetimeDate;
3032
};
3133

3234
} // namespace sf

cpp/ArrowIterator/DecimalConverter.cpp

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,23 @@
99
namespace sf
1010
{
1111

12-
py::UniqueRef& DecimalBaseConverter::m_decimalConstructor()
12+
DecimalBaseConverter::DecimalBaseConverter()
13+
: m_pyDecimalConstructor(initPyDecimalConstructor())
1314
{
14-
static py::UniqueRef decimalConstructor;
15-
if (decimalConstructor.empty())
15+
}
16+
17+
py::UniqueRef& DecimalBaseConverter::initPyDecimalConstructor()
18+
{
19+
static py::UniqueRef pyDecimalConstructor;
20+
if (pyDecimalConstructor.empty())
1621
{
17-
py::PyUniqueLock lock;
1822
py::UniqueRef decimalModule;
19-
arrow::Status status = py::importPythonModule("decimal", decimalModule);
20-
21-
status = py::importFromModule(decimalModule, "Decimal", decimalConstructor);
23+
py::importPythonModule("decimal", decimalModule);
24+
py::importFromModule(decimalModule, "Decimal", pyDecimalConstructor);
25+
Py_XINCREF(pyDecimalConstructor.get());
2226
}
2327

24-
return decimalConstructor;
28+
return pyDecimalConstructor;
2529
}
2630

2731
DecimalFromDecimalConverter::DecimalFromDecimalConverter(
@@ -43,8 +47,7 @@ PyObject* DecimalFromDecimalConverter::toPyObject(int64_t rowIndex) const
4347

4448
/** the reason we use c_str() instead of std::string here is that we may
4549
* meet some encoding problem with std::string */
46-
py::PyUniqueLock lock;
47-
return PyObject_CallFunction(m_decimalConstructor().get(), "s#",
50+
return PyObject_CallFunction(m_pyDecimalConstructor.get(), "s#",
4851
formatDecimalString.c_str(),
4952
formatDecimalString.size());
5053
}

cpp/ArrowIterator/DecimalConverter.hpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,14 @@ namespace sf
1313
class DecimalBaseConverter : public IColumnConverter
1414
{
1515
public:
16-
DecimalBaseConverter() = default;
16+
DecimalBaseConverter();
1717
virtual ~DecimalBaseConverter() = default;
1818

1919
protected:
20-
static py::UniqueRef& m_decimalConstructor();
20+
py::UniqueRef& m_pyDecimalConstructor;
21+
22+
private:
23+
static py::UniqueRef& initPyDecimalConstructor();
2124
};
2225

2326
class DecimalFromDecimalConverter : public DecimalBaseConverter
@@ -66,7 +69,7 @@ PyObject* DecimalFromIntConverter<T>::toPyObject(int64_t rowIndex) const
6669
int64_t val = m_array->Value(rowIndex);
6770

6871
py::UniqueRef decimal(
69-
PyObject_CallFunction(m_decimalConstructor().get(), "L", val));
72+
PyObject_CallFunction(m_pyDecimalConstructor.get(), "L", val));
7073
return PyObject_CallMethod(decimal.get(), "scaleb", "i", -m_scale);
7174
}
7275
else

cpp/ArrowIterator/Python/Common.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55

66
namespace sf
77
{
8-
/** this file is needed in the future, when the main functionality is done
9-
* e.g. our own Status data structure */
8+
9+
namespace py
10+
{
11+
// this file will be deleted if it is not used in the future
12+
}
1013
} // namespace sf

cpp/ArrowIterator/Python/Common.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,17 @@
55
#define PC_PYTHON_COMMON_HPP
66

77
#include <Python.h>
8+
#include "Util/macros.hpp"
89

910
namespace sf
1011
{
1112

1213
namespace py
1314
{
15+
inline bool checkPyError()
16+
{
17+
return UNLIKELY(PyErr_Occurred());
18+
}
1419

1520
/**
1621
* A RAII class to wrap the PyObject*. The semantics are like std::unique_ptr.

0 commit comments

Comments
 (0)