Skip to content

Commit 85d621c

Browse files
SNOW-991012 Cleanup some C++ code warnings and performance issues (#1828)
Co-authored-by: Mark Keller <[email protected]>
1 parent 990df44 commit 85d621c

File tree

10 files changed

+97
-79
lines changed

10 files changed

+97
-79
lines changed

DESCRIPTION.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ Source code is also available at: https://github.com/snowflakedb/snowflake-conne
1111
- v3.7.0(TBD)
1212

1313
- Added a new boolean parameter `force_return_table` to `SnowflakeCursor.fetch_arrow_all` to force returning `pyarrow.Table` in case of zero rows.
14+
- Cleanup some C++ code warnings and performance issues.
1415

1516
- v3.6.0(December 09,2023)
1617

setup.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ def build_extension(self, ext):
7575
if options["debug"]:
7676
ext.extra_compile_args.append("-g")
7777
ext.extra_link_args.append("-g")
78+
ext.extra_compile_args.append("-O0")
79+
ext.extra_link_args.append("-O0")
7880
current_dir = os.getcwd()
7981

8082
if ext.name == "snowflake.connector.nanoarrow_arrow_iterator":

src/snowflake/connector/nanoarrow_cpp/ArrowIterator/CArrowChunkIterator.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,15 +40,15 @@ CArrowChunkIterator::CArrowChunkIterator(PyObject* context, char* arrow_bytes, i
4040
m_columnCount, m_useNumpy);
4141
}
4242

43-
std::shared_ptr<ReturnVal> CArrowChunkIterator::next()
43+
ReturnVal CArrowChunkIterator::next()
4444
{
4545
m_rowIndexInBatch++;
4646

4747
if (m_rowIndexInBatch < m_rowCountInBatch)
4848
{
4949
this->createRowPyObject();
5050
SF_CHECK_PYTHON_ERR()
51-
return std::make_shared<ReturnVal>(m_latestReturnedRow.get(), nullptr);
51+
return ReturnVal(m_latestReturnedRow.get(), nullptr);
5252
}
5353
else
5454
{
@@ -69,13 +69,13 @@ std::shared_ptr<ReturnVal> CArrowChunkIterator::next()
6969
this->createRowPyObject();
7070
SF_CHECK_PYTHON_ERR()
7171

72-
return std::make_shared<ReturnVal>(m_latestReturnedRow.get(), nullptr);
72+
return ReturnVal(m_latestReturnedRow.get(), nullptr);
7373
}
7474
}
7575

7676
/** It looks like no one will decrease the ref of this Py_None, so we don't
7777
* increment the ref count here */
78-
return std::make_shared<ReturnVal>(Py_None, nullptr);
78+
return ReturnVal(Py_None, nullptr);
7979
}
8080

8181
void CArrowChunkIterator::createRowPyObject()

src/snowflake/connector/nanoarrow_cpp/ArrowIterator/CArrowChunkIterator.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class CArrowChunkIterator : public CArrowIterator
3737
/**
3838
* @return a python tuple object which contains all data in current row
3939
*/
40-
std::shared_ptr<ReturnVal> next() override;
40+
ReturnVal next() override;
4141

4242
protected:
4343
/**

src/snowflake/connector/nanoarrow_cpp/ArrowIterator/CArrowIterator.cpp

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,48 @@
1010
namespace sf
1111
{
1212

13+
const char* const NANOARROW_TYPE_ENUM_STRING[] = {
14+
"NANOARROW_TYPE_UNINITIALIZED",
15+
"NANOARROW_TYPE_NA",
16+
"NANOARROW_TYPE_BOOL",
17+
"NANOARROW_TYPE_UINT8",
18+
"NANOARROW_TYPE_INT8",
19+
"NANOARROW_TYPE_UINT16",
20+
"NANOARROW_TYPE_INT16",
21+
"NANOARROW_TYPE_UINT32",
22+
"NANOARROW_TYPE_INT32",
23+
"NANOARROW_TYPE_UINT64",
24+
"NANOARROW_TYPE_INT64",
25+
"NANOARROW_TYPE_HALF_FLOAT",
26+
"NANOARROW_TYPE_FLOAT",
27+
"NANOARROW_TYPE_DOUBLE",
28+
"NANOARROW_TYPE_STRING",
29+
"NANOARROW_TYPE_BINARY",
30+
"NANOARROW_TYPE_FIXED_SIZE_BINARY",
31+
"NANOARROW_TYPE_DATE32",
32+
"NANOARROW_TYPE_DATE64",
33+
"NANOARROW_TYPE_TIMESTAMP",
34+
"NANOARROW_TYPE_TIME32",
35+
"NANOARROW_TYPE_TIME64",
36+
"NANOARROW_TYPE_INTERVAL_MONTHS",
37+
"NANOARROW_TYPE_INTERVAL_DAY_TIME",
38+
"NANOARROW_TYPE_DECIMAL128",
39+
"NANOARROW_TYPE_DECIMAL256",
40+
"NANOARROW_TYPE_LIST",
41+
"NANOARROW_TYPE_STRUCT",
42+
"NANOARROW_TYPE_SPARSE_UNION",
43+
"NANOARROW_TYPE_DENSE_UNION",
44+
"NANOARROW_TYPE_DICTIONARY",
45+
"NANOARROW_TYPE_MAP",
46+
"NANOARROW_TYPE_EXTENSION",
47+
"NANOARROW_TYPE_FIXED_SIZE_LIST",
48+
"NANOARROW_TYPE_DURATION",
49+
"NANOARROW_TYPE_LARGE_STRING",
50+
"NANOARROW_TYPE_LARGE_BINARY",
51+
"NANOARROW_TYPE_LARGE_LIST",
52+
"NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO"
53+
};
54+
1355
Logger* CArrowIterator::logger = new Logger("snowflake.connector.CArrowIterator");
1456

1557
CArrowIterator::CArrowIterator(char* arrow_bytes, int64_t arrow_bytes_size)
@@ -53,10 +95,10 @@ CArrowIterator::CArrowIterator(char* arrow_bytes, int64_t arrow_bytes_size)
5395
logger->debug(__FILE__, __func__, __LINE__, "Arrow BatchSize: %d", m_ipcArrowArrayVec.size());
5496
}
5597

56-
std::shared_ptr<ReturnVal> CArrowIterator::checkInitializationStatus()
98+
ReturnVal CArrowIterator::checkInitializationStatus()
5799
{
58100
SF_CHECK_PYTHON_ERR()
59-
return std::make_shared<ReturnVal>(nullptr, nullptr);
101+
return ReturnVal(nullptr, nullptr);
60102
}
61103

62104
}

src/snowflake/connector/nanoarrow_cpp/ArrowIterator/CArrowIterator.hpp

Lines changed: 13 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -13,49 +13,6 @@
1313
#include <string>
1414
#include <vector>
1515

16-
17-
static const char* NANOARROW_TYPE_ENUM_STRING[] = {
18-
"NANOARROW_TYPE_UNINITIALIZED",
19-
"NANOARROW_TYPE_NA",
20-
"NANOARROW_TYPE_BOOL",
21-
"NANOARROW_TYPE_UINT8",
22-
"NANOARROW_TYPE_INT8",
23-
"NANOARROW_TYPE_UINT16",
24-
"NANOARROW_TYPE_INT16",
25-
"NANOARROW_TYPE_UINT32",
26-
"NANOARROW_TYPE_INT32",
27-
"NANOARROW_TYPE_UINT64",
28-
"NANOARROW_TYPE_INT64",
29-
"NANOARROW_TYPE_HALF_FLOAT",
30-
"NANOARROW_TYPE_FLOAT",
31-
"NANOARROW_TYPE_DOUBLE",
32-
"NANOARROW_TYPE_STRING",
33-
"NANOARROW_TYPE_BINARY",
34-
"NANOARROW_TYPE_FIXED_SIZE_BINARY",
35-
"NANOARROW_TYPE_DATE32",
36-
"NANOARROW_TYPE_DATE64",
37-
"NANOARROW_TYPE_TIMESTAMP",
38-
"NANOARROW_TYPE_TIME32",
39-
"NANOARROW_TYPE_TIME64",
40-
"NANOARROW_TYPE_INTERVAL_MONTHS",
41-
"NANOARROW_TYPE_INTERVAL_DAY_TIME",
42-
"NANOARROW_TYPE_DECIMAL128",
43-
"NANOARROW_TYPE_DECIMAL256",
44-
"NANOARROW_TYPE_LIST",
45-
"NANOARROW_TYPE_STRUCT",
46-
"NANOARROW_TYPE_SPARSE_UNION",
47-
"NANOARROW_TYPE_DENSE_UNION",
48-
"NANOARROW_TYPE_DICTIONARY",
49-
"NANOARROW_TYPE_MAP",
50-
"NANOARROW_TYPE_EXTENSION",
51-
"NANOARROW_TYPE_FIXED_SIZE_LIST",
52-
"NANOARROW_TYPE_DURATION",
53-
"NANOARROW_TYPE_LARGE_STRING",
54-
"NANOARROW_TYPE_LARGE_BINARY",
55-
"NANOARROW_TYPE_LARGE_LIST",
56-
"NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO"
57-
};
58-
5916
#define SF_CHECK_ARROW_RC(arrow_status, format_string, ...) \
6017
if (arrow_status != NANOARROW_OK) \
6118
{ \
@@ -96,19 +53,29 @@ static const char* NANOARROW_TYPE_ENUM_STRING[] = {
9653
Py_XDECREF(type);\
9754
Py_XDECREF(traceback);\
9855
\
99-
return std::make_shared<ReturnVal>(nullptr, m_currentPyException.get());\
56+
return ReturnVal(nullptr, m_currentPyException.get());\
10057
}
10158

10259
namespace sf
10360
{
10461

62+
extern const char* const NANOARROW_TYPE_ENUM_STRING[];
63+
10564
/**
10665
* A simple struct to contain return data back cython.
10766
* PyObject would be nullptr if failed and cause string will be populated
67+
*
68+
* Note that `ReturnVal` does not own these pointers, so they should
69+
* not be decref'ed by the receiver.
10870
*/
10971
class ReturnVal
11072
{
11173
public:
74+
ReturnVal() :
75+
successObj(nullptr), exception(nullptr)
76+
{
77+
}
78+
11279
ReturnVal(PyObject * obj, PyObject *except) :
11380
successObj(obj), exception(except)
11481
{
@@ -133,12 +100,12 @@ class CArrowIterator
133100
/**
134101
* @return a python object which might be current row or an Arrow Table
135102
*/
136-
virtual std::shared_ptr<ReturnVal> next() = 0;
103+
virtual ReturnVal next() = 0;
137104
virtual std::vector<uintptr_t> getArrowArrayPtrs() { return {}; };
138105
virtual std::vector<uintptr_t> getArrowSchemaPtrs() { return {}; };
139106

140107
/** check whether initialization succeeded or encountered error */
141-
std::shared_ptr<ReturnVal> checkInitializationStatus();
108+
ReturnVal checkInitializationStatus();
142109

143110
protected:
144111
static Logger* logger;

src/snowflake/connector/nanoarrow_cpp/ArrowIterator/CArrowTableIterator.cpp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
#include "SnowflakeType.hpp"
77
#include "Python/Common.hpp"
88
#include "Util/time.hpp"
9-
#include <memory>
109
#include <string>
1110
#include <cstring>
1211
#include <vector>
@@ -198,16 +197,16 @@ m_convert_number_to_decimal(number_to_decimal)
198197
PyArg_Parse(tz.get(), "s", &m_timezone);
199198
}
200199

201-
std::shared_ptr<ReturnVal> CArrowTableIterator::next()
200+
ReturnVal CArrowTableIterator::next()
202201
{
203202
bool firstDone = this->convertRecordBatchesToTable_nanoarrow();
204203
if (firstDone && !m_ipcArrowArrayVec.empty())
205204
{
206-
return std::make_shared<ReturnVal>(Py_True, nullptr);
205+
return ReturnVal(Py_True, nullptr);
207206
}
208207
else
209208
{
210-
return std::make_shared<ReturnVal>(Py_None, nullptr);
209+
return ReturnVal(Py_None, nullptr);
211210
}
212211
}
213212

@@ -305,10 +304,10 @@ void CArrowTableIterator::convertScaledFixedNumberColumnToDecimalColumn_nanoarro
305304
SF_CHECK_ARROW_RC(returnCode, "[Snowflake Exception] error appending null to arrow array, error code: %d", returnCode);
306305
} else {
307306
auto originalVal = ArrowArrayViewGetIntUnsafe(columnArray, rowIdx);
308-
std::shared_ptr<ArrowDecimal> arrowDecimal = std::make_shared<ArrowDecimal>();
309-
ArrowDecimalInit(arrowDecimal.get(), 128, 38, scale);
310-
ArrowDecimalSetInt(arrowDecimal.get(), originalVal);
311-
returnCode = ArrowArrayAppendDecimal(newArray, arrowDecimal.get());
307+
ArrowDecimal arrowDecimal;
308+
ArrowDecimalInit(&arrowDecimal, 128, 38, scale);
309+
ArrowDecimalSetInt(&arrowDecimal, originalVal);
310+
returnCode = ArrowArrayAppendDecimal(newArray, &arrowDecimal);
312311
SF_CHECK_ARROW_RC(returnCode, "[Snowflake Exception] error appending decimal to arrow array, error code: %d", returnCode);
313312
}
314313
}

src/snowflake/connector/nanoarrow_cpp/ArrowIterator/CArrowTableIterator.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ class CArrowTableIterator : public CArrowIterator
4242
/**
4343
* @return an arrow table containing all data in all record batches
4444
*/
45-
std::shared_ptr<ReturnVal> next() override;
45+
ReturnVal next() override;
4646
std::vector<uintptr_t> getArrowArrayPtrs() override;
4747
std::vector<uintptr_t> getArrowSchemaPtrs() override;
4848

src/snowflake/connector/nanoarrow_cpp/ArrowIterator/Python/Common.hpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ class UniqueRef
4141
{
4242
}
4343

44-
explicit UniqueRef(UniqueRef&& other) : UniqueRef(other.release())
44+
UniqueRef(UniqueRef&& other) : UniqueRef(other.release())
4545
{
4646
}
4747

@@ -56,10 +56,19 @@ class UniqueRef
5656
reset();
5757
}
5858

59+
// Make this `UniqueRef` take ownership of the reference
60+
// `pyObj`, if it is not null. If this `UniqueRef` previously
61+
// owned a reference, it is released (via decrementing the
62+
// refcount).
63+
//
64+
// This provides a similar guarantee as `Py_SETREF()` or
65+
// `Py_CLEAR()`, by setting the new reference before
66+
// decrementing the old reference.
5967
void reset(PyObject* pyObj = nullptr)
6068
{
61-
Py_XDECREF(m_pyObj);
69+
PyObject *toDelete = m_pyObj;
6270
m_pyObj = pyObj;
71+
Py_XDECREF(toDelete);
6372
}
6473

6574
PyObject* release() noexcept

src/snowflake/connector/nanoarrow_cpp/ArrowIterator/nanoarrow_arrow_iterator.pyx

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
from cpython.ref cimport PyObject
1010
from cython.operator cimport dereference
1111
from libc.stdint cimport int64_t, uint8_t, uintptr_t
12-
from libcpp.memory cimport shared_ptr
1312
from libcpp.vector cimport vector
1413

1514
INSTALLED_PYARROW = False
@@ -38,8 +37,8 @@ cdef extern from "CArrowIterator.hpp" namespace "sf":
3837
PyObject * exception;
3938

4039
cdef cppclass CArrowIterator:
41-
shared_ptr[ReturnVal] next() except +;
42-
shared_ptr[ReturnVal] checkInitializationStatus() except +;
40+
ReturnVal next() except +;
41+
ReturnVal checkInitializationStatus() except +;
4342
vector[uintptr_t] getArrowArrayPtrs();
4443
vector[uintptr_t] getArrowSchemaPtrs();
4544

@@ -87,7 +86,6 @@ cdef class PyArrowIterator(EmptyPyArrowIterator):
8786
cdef object context
8887
cdef CArrowIterator* cIterator
8988
cdef str unit
90-
cdef shared_ptr[ReturnVal] cret
9189
cdef object use_dict_result
9290
cdef object cursor
9391
cdef vector[uintptr_t] nanoarrow_Table
@@ -159,33 +157,33 @@ cdef class PyArrowRowIterator(PyArrowIterator):
159157
self.arrow_bytes_size,
160158
<PyObject *> self.use_numpy
161159
)
162-
self.cret = self.cIterator.checkInitializationStatus()
163-
if self.cret.get().exception:
160+
cdef ReturnVal cret = self.cIterator.checkInitializationStatus()
161+
if cret.exception:
164162
Error.errorhandler_wrapper(
165163
self.cursor.connection if self.cursor is not None else None,
166164
self.cursor,
167165
OperationalError,
168166
{
169-
'msg': f'Failed to open arrow stream: {str(<object>self.cret.get().exception)}',
167+
'msg': f'Failed to open arrow stream: {str(<object>cret.exception)}',
170168
'errno': ER_FAILED_TO_READ_ARROW_STREAM
171169
})
172170
snow_logger.debug(msg=f"Batches read: {self.cIterator.getArrowArrayPtrs().size()}", path_name=__file__, func_name="__cinit__")
173171

174172
def __next__(self):
175-
self.cret = self.cIterator.next()
176-
if not self.cret.get().successObj:
173+
cdef ReturnVal cret = self.cIterator.next()
174+
if not cret.successObj:
177175
Error.errorhandler_wrapper(
178176
self.cursor.connection if self.cursor is not None else None,
179177
self.cursor,
180178
InterfaceError,
181179
{
182-
'msg': f'Failed to convert current row, cause: {<object>self.cret.get().exception}',
180+
'msg': f'Failed to convert current row, cause: {<object>cret.exception}',
183181
'errno': ER_FAILED_TO_CONVERT_ROW_TO_PYTHON_TYPE
184182
}
185183
)
186184
# it looks like this line can help us get into python and detect the global variable immediately
187185
# however, this log will not show up for unclear reason
188-
ret = <object>self.cret.get().successObj
186+
ret = <object>cret.successObj
189187

190188
if ret is None:
191189
raise StopIteration
@@ -225,17 +223,17 @@ cdef class PyArrowTableIterator(PyArrowIterator):
225223
self.arrow_bytes_size,
226224
self.number_to_decimal,
227225
)
228-
self.cret = self.cIterator.checkInitializationStatus()
229-
if self.cret.get().exception:
226+
cdef ReturnVal cret = self.cIterator.checkInitializationStatus()
227+
if cret.exception:
230228
Error.errorhandler_wrapper(
231229
self.cursor.connection if self.cursor is not None else None,
232230
self.cursor,
233231
OperationalError,
234232
{
235-
'msg': f'Failed to open arrow stream: {str(<object>self.cret.get().exception)}',
233+
'msg': f'Failed to open arrow stream: {str(<object>cret.exception)}',
236234
'errno': ER_FAILED_TO_READ_ARROW_STREAM
237235
})
238-
self.cret = self.cIterator.next()
236+
cdef ReturnVal cret2 = self.cIterator.next()
239237
self.nanoarrow_Table = self.cIterator.getArrowArrayPtrs()
240238
self.nanoarrow_Schema = self.cIterator.getArrowSchemaPtrs()
241239
self.pyarrow_table = pyarrow.Table.from_batches(

0 commit comments

Comments
 (0)