1919
2020#include " iceberg/util/conversions.h"
2121
22- #include < cctype >
22+ #include < array >
2323#include < cstring>
24- #include < ranges>
24+ #include < span>
25+ #include < string>
2526
26- #include " iceberg/exception.h"
27- #include " iceberg/type.h"
2827#include " iceberg/util/endian.h"
2928#include " iceberg/util/macros.h"
3029
3130namespace iceberg {
3231
33- // / \brief Write a value in little-endian format to the buffer .
32+ // / \brief Write a value in little-endian format and return as vector .
3433template <EndianConvertible T>
35- void WriteLittleEndian ( std::vector<uint8_t >& buffer, T value) {
34+ std::vector<uint8_t > WriteLittleEndian ( T value) {
3635 value = ToLittleEndian (value);
3736 const auto * bytes = reinterpret_cast <const uint8_t *>(&value);
38- buffer.insert (buffer.end (), bytes, bytes + sizeof (T));
37+ std::vector<uint8_t > result;
38+ result.insert (result.end (), bytes, bytes + sizeof (T));
39+ return result;
3940}
4041
4142// / \brief Read a value in little-endian format from the data.
@@ -58,78 +59,63 @@ Result<std::vector<uint8_t>> Conversions::ToBytes(const PrimitiveType& type,
5859
5960 switch (type_id) {
6061 case TypeId::kBoolean : {
61- // 0x00 for false, 0x01 for true
6262 result.push_back (std::get<bool >(value) ? 0x01 : 0x00 );
6363 return result;
6464 }
6565
6666 case TypeId::kInt : {
67- // Stored as 4-byte little-endian
68- WriteLittleEndian (result, std::get<int32_t >(value));
67+ result = WriteLittleEndian (std::get<int32_t >(value));
6968 return result;
7069 }
7170
7271 case TypeId::kDate : {
73- // Stores days from 1970-01-01 in a 4-byte little-endian int
74- WriteLittleEndian (result, std::get<int32_t >(value));
72+ result = WriteLittleEndian (std::get<int32_t >(value));
7573 return result;
7674 }
7775
7876 case TypeId::kLong : {
79- // Stored as 8-byte little-endian
80- WriteLittleEndian (result, std::get<int64_t >(value));
77+ result = WriteLittleEndian (std::get<int64_t >(value));
8178 return result;
8279 }
8380
8481 case TypeId::kTime : {
85- // Stores microseconds from midnight in an 8-byte little-endian long
86- WriteLittleEndian (result, std::get<int64_t >(value));
82+ result = WriteLittleEndian (std::get<int64_t >(value));
8783 return result;
8884 }
8985
9086 case TypeId::kTimestamp : {
91- // Stores microseconds from 1970-01-01 00:00:00.000000 in an 8-byte little-endian
92- // long
93- WriteLittleEndian (result, std::get<int64_t >(value));
87+ result = WriteLittleEndian (std::get<int64_t >(value));
9488 return result;
9589 }
9690
9791 case TypeId::kTimestampTz : {
98- // Stores microseconds from 1970-01-01 00:00:00.000000 UTC in an 8-byte
99- // little-endian long
100- WriteLittleEndian (result, std::get<int64_t >(value));
92+ result = WriteLittleEndian (std::get<int64_t >(value));
10193 return result;
10294 }
10395
10496 case TypeId::kFloat : {
105- // Stored as 4-byte little-endian
106- WriteLittleEndian (result, std::get<float >(value));
97+ result = WriteLittleEndian (std::get<float >(value));
10798 return result;
10899 }
109100
110101 case TypeId::kDouble : {
111- // Stored as 8-byte little-endian
112- WriteLittleEndian (result, std::get<double >(value));
102+ result = WriteLittleEndian (std::get<double >(value));
113103 return result;
114104 }
115105
116106 case TypeId::kString : {
117- // UTF-8 bytes (without length)
118107 const auto & str = std::get<std::string>(value);
119108 result.insert (result.end (), str.begin (), str.end ());
120109 return result;
121110 }
122111
123112 case TypeId::kBinary : {
124- // Binary value (without length)
125113 const auto & binary_data = std::get<std::vector<uint8_t >>(value);
126114 result.insert (result.end (), binary_data.begin (), binary_data.end ());
127115 return result;
128116 }
129117
130118 case TypeId::kFixed : {
131- // Fixed(L) - Binary value, could be stored in std::array<uint8_t, 16> or
132- // std::vector<uint8_t>
133119 if (std::holds_alternative<std::array<uint8_t , 16 >>(value)) {
134120 const auto & fixed_bytes = std::get<std::array<uint8_t , 16 >>(value);
135121 result.insert (result.end (), fixed_bytes.begin (), fixed_bytes.end ());
@@ -144,13 +130,7 @@ Result<std::vector<uint8_t>> Conversions::ToBytes(const PrimitiveType& type,
144130 }
145131 return result;
146132 }
147-
148- case TypeId::kUuid : {
149- // 16-byte big-endian value
150- const auto & uuid_bytes = std::get<std::array<uint8_t , 16 >>(value);
151- WriteBigEndian16 (result, uuid_bytes);
152- return result;
153- }
133+ // TODO(Li Feiyang): Add support for UUID and Decimal
154134
155135 default :
156136 return NotSupported (" Serialization for type {} is not supported" , type.ToString ());
@@ -174,9 +154,8 @@ Result<std::vector<uint8_t>> Conversions::ToBytes(const Literal& literal) {
174154
175155Result<Literal::Value> Conversions::FromBytes (const PrimitiveType& type,
176156 std::span<const uint8_t > data) {
177- // Empty data represents null value
178157 if (data.empty ()) {
179- return Literal::Value{std::monostate{}} ;
158+ return InvalidArgument ( " Data cannot be empty " ) ;
180159 }
181160
182161 const auto type_id = type.type_id ();
@@ -187,7 +166,6 @@ Result<Literal::Value> Conversions::FromBytes(const PrimitiveType& type,
187166 return InvalidArgument (" Boolean requires 1 byte, got {}" , data.size ());
188167 }
189168 ICEBERG_ASSIGN_OR_RAISE (auto value, ReadLittleEndian<uint8_t >(data));
190- // 0x00 for false, non-zero byte for true
191169 return Literal::Value{static_cast <bool >(value != 0x00 )};
192170 }
193171
@@ -215,20 +193,14 @@ Result<Literal::Value> Conversions::FromBytes(const PrimitiveType& type,
215193 case TypeId::kTimestampTz : {
216194 int64_t value;
217195 if (data.size () == 8 ) {
218- // Standard 8-byte long
219196 ICEBERG_ASSIGN_OR_RAISE (auto long_value, ReadLittleEndian<int64_t >(data));
220197 value = long_value;
221198 } else if (data.size () == 4 ) {
222199 // Type was promoted from int to long
223200 ICEBERG_ASSIGN_OR_RAISE (auto int_value, ReadLittleEndian<int32_t >(data));
224201 value = static_cast <int64_t >(int_value);
225202 } else {
226- auto type_name_view = ToString (type_id);
227- std::string type_name{type_name_view};
228- if (!type_name.empty ()) {
229- type_name[0 ] = static_cast <char >(std::toupper (type_name[0 ]));
230- }
231- return InvalidArgument (" {} requires 4 or 8 bytes, got {}" , type_name,
203+ return InvalidArgument (" {} requires 4 or 8 bytes, got {}" , ToString (type_id),
232204 data.size ());
233205 }
234206
@@ -246,7 +218,6 @@ Result<Literal::Value> Conversions::FromBytes(const PrimitiveType& type,
246218
247219 case TypeId::kDouble : {
248220 if (data.size () == 8 ) {
249- // Standard 8-byte double
250221 ICEBERG_ASSIGN_OR_RAISE (auto double_value, ReadLittleEndian<double >(data));
251222 return Literal::Value{double_value};
252223 } else if (data.size () == 4 ) {
@@ -276,27 +247,7 @@ Result<Literal::Value> Conversions::FromBytes(const PrimitiveType& type,
276247 return Literal::Value{std::vector<uint8_t >(data.begin (), data.end ())};
277248 }
278249 }
279-
280- case TypeId::kUuid : {
281- if (data.size () != 16 ) {
282- return InvalidArgument (" UUID requires 16 bytes, got {}" , data.size ());
283- }
284- ICEBERG_ASSIGN_OR_RAISE (auto uuid_value, ReadBigEndian16 (data));
285- return Literal::Value{uuid_value};
286- }
287-
288- case TypeId::kDecimal : {
289- if (data.size () > 16 ) {
290- return InvalidArgument (
291- " Decimal data too large, maximum 16 bytes supported, got {}" , data.size ());
292- }
293-
294- std::array<uint8_t , 16 > decimal_bytes{};
295- // Copy data to the end of the array (big-endian format for decimals)
296- // This handles variable-length decimals by right-aligning them
297- std::ranges::copy (data, decimal_bytes.end () - data.size ());
298- return Literal::Value{decimal_bytes};
299- }
250+ // TODO(Li Feiyang): Add support for UUID and Decimal
300251
301252 default :
302253 return NotSupported (" Deserialization for type {} is not supported" ,
@@ -314,10 +265,10 @@ Result<Literal> Conversions::FromBytes(std::shared_ptr<PrimitiveType> type,
314265
315266 // If we got a null value (monostate), create a null Literal
316267 if (std::holds_alternative<std::monostate>(value)) {
317- return Literal::Null (type);
268+ return Literal::Null (std::move ( type) );
318269 }
319270
320- return Literal (value, type);
271+ return Literal (std::move ( value), std::move ( type) );
321272}
322273
323274} // namespace iceberg
0 commit comments