2020#include < vector>
2121#include < iostream>
2222
23+ static const char * NANOARROW_TYPE_ENUM_STRING[] = {
24+ " NANOARROW_TYPE_UNINITIALIZED" ,
25+ " NANOARROW_TYPE_NA" ,
26+ " NANOARROW_TYPE_BOOL" ,
27+ " NANOARROW_TYPE_UINT8" ,
28+ " NANOARROW_TYPE_INT8" ,
29+ " NANOARROW_TYPE_UINT16" ,
30+ " NANOARROW_TYPE_INT16" ,
31+ " NANOARROW_TYPE_UINT32" ,
32+ " NANOARROW_TYPE_INT32" ,
33+ " NANOARROW_TYPE_UINT64" ,
34+ " NANOARROW_TYPE_INT64" ,
35+ " NANOARROW_TYPE_HALF_FLOAT" ,
36+ " NANOARROW_TYPE_FLOAT" ,
37+ " NANOARROW_TYPE_DOUBLE" ,
38+ " NANOARROW_TYPE_STRING" ,
39+ " NANOARROW_TYPE_BINARY" ,
40+ " NANOARROW_TYPE_FIXED_SIZE_BINARY" ,
41+ " NANOARROW_TYPE_DATE32" ,
42+ " NANOARROW_TYPE_DATE64" ,
43+ " NANOARROW_TYPE_TIMESTAMP" ,
44+ " NANOARROW_TYPE_TIME32" ,
45+ " NANOARROW_TYPE_TIME64" ,
46+ " NANOARROW_TYPE_INTERVAL_MONTHS" ,
47+ " NANOARROW_TYPE_INTERVAL_DAY_TIME" ,
48+ " NANOARROW_TYPE_DECIMAL128" ,
49+ " NANOARROW_TYPE_DECIMAL256" ,
50+ " NANOARROW_TYPE_LIST" ,
51+ " NANOARROW_TYPE_STRUCT" ,
52+ " NANOARROW_TYPE_SPARSE_UNION" ,
53+ " NANOARROW_TYPE_DENSE_UNION" ,
54+ " NANOARROW_TYPE_DICTIONARY" ,
55+ " NANOARROW_TYPE_MAP" ,
56+ " NANOARROW_TYPE_EXTENSION" ,
57+ " NANOARROW_TYPE_FIXED_SIZE_LIST" ,
58+ " NANOARROW_TYPE_DURATION" ,
59+ " NANOARROW_TYPE_LARGE_STRING" ,
60+ " NANOARROW_TYPE_LARGE_BINARY" ,
61+ " NANOARROW_TYPE_LARGE_LIST" ,
62+ " NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO"
63+ };
64+
2365#define SF_CHECK_PYTHON_ERR () \
2466 if (py::checkPyError())\
2567 {\
@@ -109,41 +151,51 @@ void CArrowChunkIterator::initColumnConverters()
109151 (*m_cRecordBatches)[m_currentBatchIndex];
110152 m_currentSchema = currentBatch->schema ();
111153
112- ArrowSchema nanoarrowSchema;
154+ nanoarrow::UniqueSchema nanoarrowSchema;
113155 // TODO: Export is not needed when using nanoarrow IPC to read schema
114- arrow::ExportSchema (*m_currentSchema, & nanoarrowSchema);
156+ arrow::ExportSchema (*m_currentSchema, nanoarrowSchema. get () );
115157
116158 for (int i = 0 ; i < currentBatch->num_columns (); i++)
117159 {
118160 std::shared_ptr<arrow::Array> columnArray = currentBatch->column (i);
119- std::shared_ptr<arrow::DataType> dt = m_currentSchema->field (i)->type ();
120- std::shared_ptr<const arrow::KeyValueMetadata> metaData =
121- m_currentSchema->field (i)->metadata ();
122161
123- ArrowSchema* nanoarrowColumnSchema = nanoarrowSchema. children [i];
124- std::shared_ptr< ArrowSchemaView> nanoarrowColumnSchemaView = std::make_shared<ArrowSchemaView>() ;
162+ nanoarrow::UniqueSchema nanoarrowColumnSchemaUnique ( nanoarrowSchema-> children [i]) ;
163+ ArrowSchemaView nanoarrowColumnSchemaView;
125164 ArrowError error;
126- ArrowSchemaViewInit (nanoarrowColumnSchemaView.get (), nanoarrowColumnSchema, &error);
165+ ArrowSchemaViewInit (&nanoarrowColumnSchemaView, nanoarrowColumnSchemaUnique.get (), &error);
166+
167+ std::shared_ptr<ArrowArray> nanoarrowUniqueColumnArrowArray = std::make_shared<ArrowArray>();
168+ std::shared_ptr<ArrowArrayView> nanoarrowUniqueColumnArrowArrayView = std::make_shared<ArrowArrayView>();
169+
170+ // nanoarrow::UniqueArray nanoarrowUniqueColumnArrowArray;
171+ // nanoarrow::UniqueArrayView nanoarrowUniqueColumnArrowArrayView;
127172
128- std::shared_ptr<ArrowArray> nanoarrowColumnArrowArray = std::make_shared<ArrowArray>();
129- std::shared_ptr<ArrowArrayView> nanoarrowColumnArrowArrayViewInstance = std::make_shared<ArrowArrayView>();
130- arrow::ExportArray (*columnArray, nanoarrowColumnArrowArray.get ());
173+ arrow::ExportArray (*columnArray, nanoarrowUniqueColumnArrowArray.get ());
131174
132175 int res = 0 ;
133- res = ArrowArrayViewInitFromSchema (nanoarrowColumnArrowArrayViewInstance .get (), nanoarrowColumnSchema , &error);
176+ res = ArrowArrayViewInitFromSchema (nanoarrowUniqueColumnArrowArrayView .get (), nanoarrowColumnSchemaUnique. get () , &error);
134177 if (res != NANOARROW_OK) {
135178 std::string errorInfo = Logger::formatString (" ArrowArrayViewInitFromSchema failure" );
136179 logger->error (__FILE__, __func__, __LINE__, errorInfo.c_str ());
137180 PyErr_SetString (PyExc_Exception, errorInfo.c_str ());
138181 }
139- res = ArrowArrayViewSetArray (nanoarrowColumnArrowArrayViewInstance.get (), nanoarrowColumnArrowArray.get (), &error);
182+
183+ res = ArrowArrayViewSetArray (nanoarrowUniqueColumnArrowArrayView.get (), nanoarrowUniqueColumnArrowArray.get (), &error);
140184 if (res != NANOARROW_OK) {
141185 std::string errorInfo = Logger::formatString (" ArrowArrayViewSetArray failure" );
142186 logger->error (__FILE__, __func__, __LINE__, errorInfo.c_str ());
143187 PyErr_SetString (PyExc_Exception, errorInfo.c_str ());
144188 }
189+
190+ // std::shared_ptr<ArrowArrayView> nanoarrowColumnArrowArrayViewInstance = std::shared_ptr<ArrowArrayView>(nanoarrowUniqueColumnArrowArrayView.get());
191+ std::shared_ptr<ArrowArrayView> nanoarrowColumnArrowArrayViewInstance = nanoarrowUniqueColumnArrowArrayView;
192+ // m_uniqueColumnArrowArrays.push_back(nanoarrowUniqueColumnArrowArray.get());
193+ // m_uniqueColumnArrowArrayViews.push_back(nanoarrowUniqueColumnArrowArrayView.get());
194+ m_arrays.push_back (nanoarrowUniqueColumnArrowArray);
195+ m_arrayViews.push_back (nanoarrowUniqueColumnArrowArrayView);
196+
145197 ArrowStringView snowflakeLogicalType;
146- const char * metadata = nanoarrowSchema. children [i]->metadata ;
198+ const char * metadata = nanoarrowSchema-> children [i]->metadata ;
147199 ArrowMetadataGetValue (metadata, ArrowCharView (" logicalType" ), &snowflakeLogicalType);
148200 SnowflakeType::Type st = SnowflakeType::snowflakeTypeFromString (
149201 std::string (snowflakeLogicalType.data , snowflakeLogicalType.size_bytes )
@@ -164,7 +216,7 @@ void CArrowChunkIterator::initColumnConverters()
164216 precision = std::stoi (precisionString.data );
165217 }
166218
167- switch (nanoarrowColumnSchemaView-> type )
219+ switch (nanoarrowColumnSchemaView. type )
168220 {
169221#define _SF_INIT_FIXED_CONVERTER (ARROW_TYPE ) \
170222 case ArrowType::ARROW_TYPE: \
@@ -222,7 +274,7 @@ void CArrowChunkIterator::initColumnConverters()
222274 std::string errorInfo = Logger::formatString (
223275 " [Snowflake Exception] unknown arrow internal data type(%d) "
224276 " for FIXED data" ,
225- dt-> id () );
277+ NANOARROW_TYPE_ENUM_STRING[nanoarrowColumnSchemaView. type ] );
226278 logger->error (__FILE__, __func__, __LINE__, errorInfo.c_str ());
227279 PyErr_SetString (PyExc_Exception, errorInfo.c_str ());
228280 return ;
@@ -295,7 +347,7 @@ void CArrowChunkIterator::initColumnConverters()
295347 ArrowMetadataGetValue (metadata, ArrowCharView (" scale" ), &scaleString);
296348 scale = std::stoi (scaleString.data );
297349 }
298- switch (nanoarrowColumnSchemaView-> type )
350+ switch (nanoarrowColumnSchemaView. type )
299351 {
300352 case NANOARROW_TYPE_INT32:
301353 case NANOARROW_TYPE_INT64:
@@ -311,7 +363,7 @@ void CArrowChunkIterator::initColumnConverters()
311363 std::string errorInfo = Logger::formatString (
312364 " [Snowflake Exception] unknown arrow internal data type(%d) "
313365 " for TIME data" ,
314- dt-> id () );
366+ NANOARROW_TYPE_ENUM_STRING[nanoarrowColumnSchemaView. type ] );
315367 logger->error (__FILE__, __func__, __LINE__, errorInfo.c_str ());
316368 PyErr_SetString (PyExc_Exception, errorInfo.c_str ());
317369 return ;
@@ -328,7 +380,7 @@ void CArrowChunkIterator::initColumnConverters()
328380 ArrowMetadataGetValue (metadata, ArrowCharView (" scale" ), &scaleString);
329381 scale = std::stoi (scaleString.data );
330382 }
331- switch (nanoarrowColumnSchemaView-> type )
383+ switch (nanoarrowColumnSchemaView. type )
332384 {
333385 case NANOARROW_TYPE_INT64:
334386 {
@@ -353,13 +405,13 @@ void CArrowChunkIterator::initColumnConverters()
353405 {
354406 m_currentBatchConverters.push_back (
355407 std::make_shared<sf::NumpyTwoFieldTimeStampNTZConverter>(
356- nanoarrowColumnArrowArrayViewInstance, nanoarrowColumnSchemaView, scale, m_context));
408+ nanoarrowColumnArrowArrayViewInstance, & nanoarrowColumnSchemaView, scale, m_context));
357409 }
358410 else
359411 {
360412 m_currentBatchConverters.push_back (
361413 std::make_shared<sf::TwoFieldTimeStampNTZConverter>(
362- nanoarrowColumnArrowArrayViewInstance, nanoarrowColumnSchemaView, scale, m_context));
414+ nanoarrowColumnArrowArrayViewInstance, & nanoarrowColumnSchemaView, scale, m_context));
363415 }
364416 break ;
365417 }
@@ -369,7 +421,7 @@ void CArrowChunkIterator::initColumnConverters()
369421 std::string errorInfo = Logger::formatString (
370422 " [Snowflake Exception] unknown arrow internal data type(%d) "
371423 " for TIMESTAMP_NTZ data" ,
372- dt-> id () );
424+ NANOARROW_TYPE_ENUM_STRING[nanoarrowColumnSchemaView. type ] );
373425 logger->error (__FILE__, __func__, __LINE__, errorInfo.c_str ());
374426 PyErr_SetString (PyExc_Exception, errorInfo.c_str ());
375427 return ;
@@ -386,7 +438,7 @@ void CArrowChunkIterator::initColumnConverters()
386438 ArrowMetadataGetValue (metadata, ArrowCharView (" scale" ), &scaleString);
387439 scale = std::stoi (scaleString.data );
388440 }
389- switch (nanoarrowColumnSchemaView-> type )
441+ switch (nanoarrowColumnSchemaView. type )
390442 {
391443 case NANOARROW_TYPE_INT64:
392444 {
@@ -400,7 +452,7 @@ void CArrowChunkIterator::initColumnConverters()
400452 {
401453 m_currentBatchConverters.push_back (
402454 std::make_shared<sf::TwoFieldTimeStampLTZConverter>(
403- nanoarrowColumnArrowArrayViewInstance, nanoarrowColumnSchemaView, scale, m_context));
455+ nanoarrowColumnArrowArrayViewInstance, & nanoarrowColumnSchemaView, scale, m_context));
404456 break ;
405457 }
406458
@@ -409,7 +461,7 @@ void CArrowChunkIterator::initColumnConverters()
409461 std::string errorInfo = Logger::formatString (
410462 " [Snowflake Exception] unknown arrow internal data type(%d) "
411463 " for TIMESTAMP_LTZ data" ,
412- dt-> id () );
464+ NANOARROW_TYPE_ENUM_STRING[nanoarrowColumnSchemaView. type ] );
413465 logger->error (__FILE__, __func__, __LINE__, errorInfo.c_str ());
414466 PyErr_SetString (PyExc_Exception, errorInfo.c_str ());
415467 return ;
@@ -420,28 +472,31 @@ void CArrowChunkIterator::initColumnConverters()
420472
421473 case SnowflakeType::Type::TIMESTAMP_TZ:
422474 {
423- int scale = metaData
424- ? std::stoi (metaData->value (metaData->FindKey (" scale" )))
425- : 9 ;
426- int byteLength =
427- metaData
428- ? std::stoi (metaData->value (metaData->FindKey (" byteLength" )))
429- : 16 ;
475+ ArrowStringView scaleString;
476+ ArrowStringView byteLengthString;
477+ int scale = 9 ;
478+ int byteLength = 16 ;
479+ if (metadata != nullptr ) {
480+ ArrowMetadataGetValue (metadata, ArrowCharView (" scale" ), &scaleString);
481+ ArrowMetadataGetValue (metadata, ArrowCharView (" byteLength" ), &byteLengthString);
482+ scale = std::stoi (scaleString.data );
483+ byteLength = std::stoi (byteLengthString.data );
484+ }
430485 switch (byteLength)
431486 {
432487 case 8 :
433488 {
434489 m_currentBatchConverters.push_back (
435490 std::make_shared<sf::TwoFieldTimeStampTZConverter>(
436- nanoarrowColumnArrowArrayViewInstance, nanoarrowColumnSchemaView, scale, m_context));
491+ nanoarrowColumnArrowArrayViewInstance, & nanoarrowColumnSchemaView, scale, m_context));
437492 break ;
438493 }
439494
440495 case 16 :
441496 {
442497 m_currentBatchConverters.push_back (
443498 std::make_shared<sf::ThreeFieldTimeStampTZConverter>(
444- nanoarrowColumnArrowArrayViewInstance, nanoarrowColumnSchemaView, scale, m_context));
499+ nanoarrowColumnArrowArrayViewInstance, & nanoarrowColumnSchemaView, scale, m_context));
445500 break ;
446501 }
447502
@@ -450,7 +505,7 @@ void CArrowChunkIterator::initColumnConverters()
450505 std::string errorInfo = Logger::formatString (
451506 " [Snowflake Exception] unknown arrow internal data type(%d) "
452507 " for TIMESTAMP_TZ data" ,
453- dt-> id () );
508+ NANOARROW_TYPE_ENUM_STRING[nanoarrowColumnSchemaView. type ] );
454509 logger->error (__FILE__, __func__, __LINE__, errorInfo.c_str ());
455510 PyErr_SetString (PyExc_Exception, errorInfo.c_str ());
456511 return ;
@@ -464,7 +519,7 @@ void CArrowChunkIterator::initColumnConverters()
464519 {
465520 std::string errorInfo = Logger::formatString (
466521 " [Snowflake Exception] unknown snowflake data type : %s" ,
467- metaData-> value (metaData-> FindKey ( " logicalType " )). c_str () );
522+ snowflakeLogicalType. data );
468523 logger->error (__FILE__, __func__, __LINE__, errorInfo.c_str ());
469524 PyErr_SetString (PyExc_Exception, errorInfo.c_str ());
470525 return ;
0 commit comments