Skip to content

Commit b3798c3

Browse files
SNOW-2187756 - Next Gen Numbers - DECFLOAT support for Libsfc (#958)
1 parent eda6cfc commit b3798c3

File tree

8 files changed

+810
-86
lines changed

8 files changed

+810
-86
lines changed

cpp/lib/ArrowChunkIterator.cpp

Lines changed: 156 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
#include <cstdlib>
33
#include <ctime>
44
#include <string>
5+
#include <sstream>
6+
#include <iomanip>
7+
#include <vector>
58

69
#include "../logger/SFLogger.hpp"
710
#include "snowflake/platform.h"
@@ -154,7 +157,7 @@ bool ArrowChunkIterator::isCellNull(int32 col)
154157
}
155158
case (arrow::Type::type::STRUCT):
156159
{
157-
return (m_columns[col].arrowTimestamp->sse->IsNull(m_currRowIndexInBatch));
160+
return m_columns[col].arrowStructArray->IsNull(m_currRowIndexInBatch);
158161
break;
159162
}
160163
default:
@@ -346,26 +349,20 @@ ArrowChunkIterator::getCellAsInt64(size_t colIdx, int64 * out_data, bool rawData
346349
return SF_STATUS_SUCCESS;
347350
}
348351

349-
if ((!rawData) && (SF_DB_TYPE_FIXED == m_metadata[colIdx].type) && (m_metadata[colIdx].scale != 0))
352+
if (!rawData && ((SF_DB_TYPE_FIXED == m_metadata[colIdx].type && m_metadata[colIdx].scale != 0)
353+
|| SF_DB_TYPE_DECFLOAT == m_metadata[colIdx].type))
350354
{
351-
float64 floatData;
352-
SF_STATUS status = getCellAsFloat64(colIdx, &floatData);
355+
std::string val;
356+
SF_STATUS status = getCellAsString(colIdx, val);
353357
if (SF_STATUS_SUCCESS != status)
354358
{
355359
m_parent->setError(SF_STATUS_ERROR_CONVERSION_FAILURE,
356360
"Cannot convert value to int64.");
357361
return status;
358362
}
359363

360-
if (floatData > static_cast<float64>(SF_INT64_MAX) || floatData < static_cast<float64>(SF_INT64_MIN))
361-
{
362-
m_parent->setError(SF_STATUS_ERROR_OUT_OF_RANGE,
363-
"Value out of range for int64.");
364-
return SF_STATUS_ERROR_OUT_OF_RANGE;
365-
}
366-
367-
*out_data = (int64)floatData;
368-
return SF_STATUS_SUCCESS;
364+
//No decimal point, just convert directly
365+
return Conversion::Arrow::StringToInt64(val, out_data);
369366
}
370367

371368
int64 data;
@@ -483,6 +480,7 @@ ArrowChunkIterator::getCellAsUint32(size_t colIdx, uint32 * out_data)
483480
{
484481
m_parent->setError(SF_STATUS_ERROR_OUT_OF_RANGE,
485482
"Value out of range for uint32.");
483+
return SF_STATUS_ERROR_OUT_OF_RANGE;
486484
}
487485

488486
*out_data = static_cast<int32>(rawData);
@@ -553,6 +551,24 @@ ArrowChunkIterator::getCellAsUint64(size_t colIdx, uint64 * out_data)
553551
status = Conversion::Arrow::StringToUint64(strData, out_data);
554552
break;
555553
}
554+
case arrow::Type::type::STRUCT:
555+
{
556+
if (SF_DB_TYPE_DECFLOAT == m_metadata[colIdx].type)
557+
{
558+
std::string strData;
559+
getCellAsString(colIdx, strData);
560+
status = Conversion::Arrow::StringToUint64(strData, out_data);
561+
}
562+
else
563+
{
564+
CXX_LOG_ERROR("Unsupported conversion from %d to UINT64.", m_arrowColumnDataTypes[colIdx]);
565+
m_parent->setError(SF_STATUS_ERROR_CONVERSION_FAILURE,
566+
"No valid conversion to UINT64 from data type.");
567+
return SF_STATUS_ERROR_CONVERSION_FAILURE;
568+
}
569+
break;
570+
}
571+
556572
case arrow::Type::type::STRING:
557573
{
558574
std::string strData = m_columns[colIdx].arrowString->GetString(m_currRowIndexInBatch);
@@ -636,6 +652,16 @@ ArrowChunkIterator::getCellAsFloat32(size_t colIdx, float32 * out_data)
636652
return status;
637653
}
638654
*out_data = (float32)data;
655+
if (*out_data == INFINITY || *out_data == -INFINITY)
656+
{
657+
return SF_STATUS_ERROR_OUT_OF_RANGE;
658+
}
659+
660+
if (*out_data == 0.0f && data != 0.0)
661+
{
662+
return SF_STATUS_ERROR_CONVERSION_FAILURE;
663+
}
664+
639665
return SF_STATUS_SUCCESS;
640666
}
641667
case arrow::Type::type::DECIMAL:
@@ -644,6 +670,23 @@ ArrowChunkIterator::getCellAsFloat32(size_t colIdx, float32 * out_data)
644670
status = Conversion::Arrow::StringToFloat(strData, out_data);
645671
break;
646672
}
673+
case arrow::Type::type::STRUCT:
674+
{
675+
if (SF_DB_TYPE_DECFLOAT == m_metadata[colIdx].type)
676+
{
677+
std::string strData;
678+
getCellAsString(colIdx, strData);
679+
status = Conversion::Arrow::StringToFloat(strData, out_data);
680+
}
681+
else
682+
{
683+
CXX_LOG_ERROR("Unsupported conversion from %d to FLOAT32.", m_arrowColumnDataTypes[colIdx]);
684+
m_parent->setError(SF_STATUS_ERROR_CONVERSION_FAILURE,
685+
"No valid conversion to float32 from data type.");
686+
return SF_STATUS_ERROR_CONVERSION_FAILURE;
687+
}
688+
break;
689+
}
647690
case arrow::Type::type::STRING:
648691
{
649692
std::string strData = m_columns[colIdx].arrowString->GetString(m_currRowIndexInBatch);
@@ -741,6 +784,24 @@ ArrowChunkIterator::getCellAsFloat64(size_t colIdx, float64 * out_data)
741784
status = Conversion::Arrow::StringToDouble(strData, out_data);
742785
break;
743786
}
787+
case arrow::Type::type::STRUCT:
788+
{
789+
if (SF_DB_TYPE_DECFLOAT == m_metadata[colIdx].type)
790+
{
791+
std::string strData;
792+
getCellAsString(colIdx, strData);
793+
status = Conversion::Arrow::StringToDouble(strData, out_data);
794+
}
795+
else
796+
{
797+
CXX_LOG_ERROR("Unsupported conversion from %d to FLOAT64.", m_arrowColumnDataTypes[colIdx]);
798+
m_parent->setError(SF_STATUS_ERROR_CONVERSION_FAILURE,
799+
"No valid conversion to float64 from data type.");
800+
return SF_STATUS_ERROR_CONVERSION_FAILURE;
801+
}
802+
break;
803+
804+
}
744805
case arrow::Type::type::STRING:
745806
{
746807
std::string strData = m_columns[colIdx].arrowString->GetString(m_currRowIndexInBatch);
@@ -821,6 +882,76 @@ SF_STATUS STDCALL ArrowChunkIterator::getCellAsString(
821882
return SF_STATUS_SUCCESS;
822883
}
823884

885+
if (SF_DB_TYPE_DECFLOAT == snowType)
886+
{
887+
if (m_columns[colIdx].arrowStructArray->num_fields() == 2)
888+
{
889+
int16_t exponent = std::static_pointer_cast<arrow::Int16Array>(
890+
m_columns[colIdx].arrowStructArray->field(0))->Value(m_currRowIndexInBatch);
891+
int len = 0;
892+
const uint8_t* value = std::static_pointer_cast<arrow::BinaryArray>(
893+
m_columns[colIdx].arrowStructArray->field(1))->GetValue(m_currRowIndexInBatch, &len);
894+
if ((len < 0) || (len > 16))
895+
{
896+
CXX_LOG_ERROR("sf::arrowChunkIterator::getDecimal::Possible invalid data, row index in batch: %d, col: %d, length: %d", m_currRowIndexInBatch, (int)colIdx, len);
897+
return SF_STATUS_ERROR_OUT_OF_BOUNDS;
898+
}
899+
900+
std::vector<uint8_t> rawBytes(value, value + len);
901+
arrow::Result<arrow::Decimal128> res = arrow::Decimal128::FromBigEndian(rawBytes.data(), len);
902+
if (!res.ok()) {
903+
CXX_LOG_ERROR("sf::arrowChunkIterator::getDecimal::Failed to convert from big endian to Decimal128, row index in batch: %d, col: %d", m_currRowIndexInBatch, (int)colIdx);
904+
return SF_STATUS_ERROR_CONVERSION_FAILURE;
905+
};
906+
907+
arrow::Decimal128 dec = *res;
908+
std::string digits = dec.ToString(0);
909+
bool isPositive = true;
910+
if (digits[0] == '-') {
911+
isPositive = false;
912+
digits = digits.substr(1);
913+
}
914+
915+
int mantissaDigits = static_cast<int>(digits.size());
916+
917+
// value = (digits) * 10^exponent => normalized exponent = digits-1 + exponent
918+
int sciExp = (mantissaDigits - 1) + exponent;
919+
if (sciExp >= 38 || (exponent < 0 && (exponent) <= -38)) {
920+
// it means that the number is too big or too small, use scientific notation
921+
std::string m = digits.size() > 1
922+
? std::string(1, digits[0]) + '.' + digits.substr(1)
923+
: std::string(1, digits[0]);
924+
925+
outString = (isPositive ? "" : "-") + m + (sciExp ? "e" + std::to_string(sciExp) : "");
926+
}
927+
else
928+
{
929+
if (exponent > 0) {
930+
digits.append(exponent, '0');
931+
}
932+
else
933+
{
934+
int pointPos = mantissaDigits + exponent;
935+
if (pointPos != mantissaDigits)
936+
{
937+
if (pointPos > 0)
938+
{
939+
digits.insert(pointPos, 1, '.');
940+
}
941+
else
942+
{
943+
std::string leadingZeros(-pointPos, '0');
944+
digits = "0." + leadingZeros + digits;
945+
}
946+
}
947+
}
948+
outString = (isPositive ? "" : "-") + digits;
949+
}
950+
return SF_STATUS_SUCCESS;
951+
}
952+
return SF_STATUS_ERROR_CONVERSION_FAILURE;
953+
}
954+
824955
switch (m_arrowColumnDataTypes[colIdx])
825956
{
826957
case arrow::Type::type::STRING:
@@ -1095,14 +1226,18 @@ void ArrowChunkIterator::initColumnChunks()
10951226
switch (dt->id())
10961227
{
10971228
case arrow::Type::STRUCT: {
1098-
auto values = std::static_pointer_cast<arrow::StructArray>(columnArray);
1099-
std::shared_ptr<ArrowTimestampArray> ts(new ArrowTimestampArray);
1100-
ts->sse = std::static_pointer_cast<arrow::Int64Array>(values->field(0)).get();
1101-
if (values->num_fields() > 1)
1102-
ts->fs = std::static_pointer_cast<arrow::Int32Array>(values->field(1)).get();
1103-
if (values->num_fields() > 2)
1104-
ts->tz = std::static_pointer_cast<arrow::Int32Array>(values->field(2)).get();
1105-
arrowcol.arrowTimestamp = ts;
1229+
arrowcol.arrowStructArray = std::static_pointer_cast<arrow::StructArray>(columnArray).get();
1230+
if (m_metadata[i].type != SF_DB_TYPE_DECFLOAT)
1231+
{
1232+
auto values = std::static_pointer_cast<arrow::StructArray>(columnArray);
1233+
std::shared_ptr<ArrowTimestampArray> ts(new ArrowTimestampArray);
1234+
ts->sse = std::static_pointer_cast<arrow::Int64Array>(values->field(0)).get();
1235+
if (values->num_fields() > 1)
1236+
ts->fs = std::static_pointer_cast<arrow::Int32Array>(values->field(1)).get();
1237+
if (values->num_fields() > 2)
1238+
ts->tz = std::static_pointer_cast<arrow::Int32Array>(values->field(2)).get();
1239+
arrowcol.arrowTimestamp = ts;
1240+
}
11061241
m_columns.emplace_back(arrowcol);
11071242
break;
11081243
}

cpp/lib/ArrowChunkIterator.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ namespace Client
5959
struct ArrowColumn
6060
{
6161
// The array data of the columns. Each instance of ArrowColumn should only have one populated.
62+
arrow::StructArray * arrowStructArray;
6263
arrow::BinaryArray * arrowBinary;
6364
arrow::BooleanArray * arrowBoolean;
6465
arrow::Date32Array * arrowDate32;

cpp/lib/DataConversion.cpp

Lines changed: 76 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -92,31 +92,96 @@ SF_STATUS STDCALL StringToInteger(
9292
return SF_STATUS_SUCCESS;
9393
}
9494

95-
SF_STATUS STDCALL StringToUint64(
95+
SF_STATUS STDCALL StringToInt64(
9696
const std::string& str_data,
97-
uint64 * out_data
97+
int64* out_data
9898
)
9999
{
100100
size_t charsProcessed;
101-
uint64 convData;
102-
try
103-
{
104-
convData = static_cast<int64>(std::stoull(str_data, &charsProcessed, 10));
101+
int64 convData = 0;
102+
if (str_data.find('e') != std::string::npos) {
103+
float64 v;
104+
SF_STATUS status = StringToDouble(str_data, &v);
105+
if (status != SF_STATUS_SUCCESS) {
106+
return status;
107+
}
108+
109+
if (v > static_cast<float64>(SF_INT64_MAX) || v < static_cast<float64>(SF_INT64_MIN))
110+
{
111+
CXX_LOG_ERROR("Conversion from STRING to INT64 failed %s.", str_data.c_str());
112+
return SF_STATUS_ERROR_OUT_OF_RANGE;
113+
}
105114
}
106-
catch (const std::out_of_range& e)
115+
else
107116
{
108-
CXX_LOG_ERROR("Conversion from STRING to UINT64 failed %s.", str_data.c_str());
109-
return SF_STATUS_ERROR_OUT_OF_RANGE;
117+
try
118+
{
119+
convData = static_cast<int64>(std::stoll(str_data, &charsProcessed, 10));
120+
}
121+
catch (const std::out_of_range& e)
122+
{
123+
CXX_LOG_ERROR("Conversion from STRING to INT64 failed %s.", str_data.c_str());
124+
return SF_STATUS_ERROR_OUT_OF_RANGE;
125+
}
126+
catch (...)
127+
{
128+
CXX_LOG_ERROR("Conversion from STRING to INT64 failed %s.", str_data.c_str());
129+
return SF_STATUS_ERROR_CONVERSION_FAILURE;
130+
}
110131
}
111-
catch (...)
132+
// All checks passed. Proceed to write to buffer.
133+
*out_data = convData;
134+
if ((*out_data == 0 && str_data != "0"))
112135
{
113-
CXX_LOG_ERROR("Conversion from STRING to UINT64 failed %s.", str_data.c_str());
114136
return SF_STATUS_ERROR_CONVERSION_FAILURE;
115137
}
138+
return SF_STATUS_SUCCESS;
139+
}
116140

141+
SF_STATUS STDCALL StringToUint64(
142+
const std::string& str_data,
143+
uint64* out_data
144+
)
145+
{
146+
size_t charsProcessed;
147+
uint64 convData = 0;
148+
if (str_data.find('e') != std::string::npos) {
149+
float64 v;
150+
SF_STATUS status = StringToDouble(str_data, &v);
151+
if (status != SF_STATUS_SUCCESS) {
152+
return status;
153+
}
154+
if (v > static_cast<float64>(SF_UINT64_MAX) || v < 0)
155+
{
156+
CXX_LOG_ERROR("Conversion from STRING to UINT64 failed %s.", str_data.c_str());
157+
return SF_STATUS_ERROR_OUT_OF_RANGE;
158+
}
159+
}
160+
else
161+
{
162+
try
163+
{
164+
convData = static_cast<uint64>(std::stoull(str_data, &charsProcessed, 10));
165+
}
166+
catch (const std::out_of_range& e)
167+
{
168+
CXX_LOG_ERROR("Conversion from STRING to UINT64 failed %s.", str_data.c_str());
169+
return SF_STATUS_ERROR_OUT_OF_RANGE;
170+
}
171+
catch (...)
172+
{
173+
CXX_LOG_ERROR("Conversion from STRING to UINT64 failed %s.", str_data.c_str());
174+
return SF_STATUS_ERROR_CONVERSION_FAILURE;
175+
}
176+
}
117177
// All checks passed. Proceed to write to buffer.
118178
*out_data = convData;
179+
if ((*out_data == 0 && str_data != "0"))
180+
{
181+
return SF_STATUS_ERROR_CONVERSION_FAILURE;
182+
}
119183
return SF_STATUS_SUCCESS;
184+
120185
}
121186

122187
SF_STATUS STDCALL StringToDouble(

cpp/lib/DataConversion.hpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,19 @@ namespace Arrow
121121
const std::string& str_data,
122122
uint64 * out_data);
123123

124+
/**
125+
* Function to convert a string value into a uint64 value.
126+
*
127+
* @param str_data The string needs to be converted
128+
* @param out_data The buffer to which to write the converted value.
129+
*
130+
* @return 0 if successful, otherwise an error is returned.
131+
*/
132+
SF_STATUS STDCALL StringToInt64(
133+
const std::string& str_data,
134+
int64* out_data);
135+
136+
124137
/**
125138
* Function to convert a string value into a float64 value.
126139
*

0 commit comments

Comments
 (0)