Skip to content

Commit 6402f18

Browse files
committed
finish
1 parent 48d392e commit 6402f18

File tree

7 files changed

+402
-286
lines changed

7 files changed

+402
-286
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ set(ICEBERG_SOURCES
5151
util/murmurhash3_internal.cc
5252
util/timepoint.cc
5353
util/gzip_internal.cc
54+
util/conversions.cc
5455
util/literal_format.cc)
5556

5657
set(ICEBERG_STATIC_BUILD_INTERFACE_LIBS)

src/iceberg/expression/literal.cc

Lines changed: 3 additions & 286 deletions
Original file line numberDiff line numberDiff line change
@@ -25,24 +25,11 @@
2525
#include <utility>
2626

2727
#include "iceberg/exception.h"
28-
#include "iceberg/util/endian.h"
28+
#include "iceberg/util/conversions.h"
2929
#include "iceberg/util/literal_format.h"
30-
#include "iceberg/util/macros.h"
3130

3231
namespace iceberg {
3332

34-
/// \brief LiteralSerializer handles serialization/deserialization operations for Literal.
35-
/// This is an internal implementation class.
36-
class LiteralSerializer {
37-
public:
38-
/// \brief Serialize a literal value to binary format.
39-
static Result<std::vector<uint8_t>> ToBytes(const Literal& literal);
40-
41-
/// \brief Deserialize binary data to a literal value.
42-
static Result<Literal> FromBytes(std::span<const uint8_t> data,
43-
const std::shared_ptr<PrimitiveType>& type);
44-
};
45-
4633
/// \brief LiteralCaster handles type casting operations for Literal.
4734
/// This is an internal implementation class.
4835
class LiteralCaster {
@@ -168,11 +155,11 @@ Literal Literal::Binary(std::vector<uint8_t> value) {
168155

169156
Result<Literal> Literal::Deserialize(std::span<const uint8_t> data,
170157
std::shared_ptr<PrimitiveType> type) {
171-
return LiteralSerializer::FromBytes(data, type);
158+
return Conversions::FromBytes(type, data);
172159
}
173160

174161
Result<std::vector<uint8_t>> Literal::Serialize() const {
175-
return LiteralSerializer::ToBytes(*this);
162+
return Conversions::ToBytes(*this);
176163
}
177164

178165
// Getters
@@ -380,274 +367,4 @@ Result<Literal> LiteralCaster::CastTo(const Literal& literal,
380367
target_type->ToString());
381368
}
382369

383-
// LiteralSerializer implementation
384-
385-
Result<std::vector<uint8_t>> LiteralSerializer::ToBytes(const Literal& literal) {
386-
// Cannot serialize special values
387-
if (literal.IsAboveMax()) {
388-
return NotSupported("Cannot serialize AboveMax");
389-
}
390-
if (literal.IsBelowMin()) {
391-
return NotSupported("Cannot serialize BelowMin");
392-
}
393-
394-
std::vector<uint8_t> result;
395-
396-
if (literal.IsNull()) {
397-
return NotSupported("Cannot serialize null");
398-
}
399-
400-
const auto& value = literal.value();
401-
const auto type_id = literal.type()->type_id();
402-
403-
switch (type_id) {
404-
case TypeId::kBoolean: {
405-
// 0x00 for false, 0x01 for true
406-
result.push_back(std::get<bool>(value) ? 0x01 : 0x00);
407-
return result;
408-
}
409-
410-
case TypeId::kInt: {
411-
// Stored as 4-byte little-endian
412-
util::WriteLittleEndian(result, std::get<int32_t>(value));
413-
return result;
414-
}
415-
416-
case TypeId::kDate: {
417-
// Stores days from 1970-01-01 in a 4-byte little-endian int
418-
util::WriteLittleEndian(result, std::get<int32_t>(value));
419-
return result;
420-
}
421-
422-
case TypeId::kLong: {
423-
// Stored as 8-byte little-endian
424-
util::WriteLittleEndian(result, std::get<int64_t>(value));
425-
return result;
426-
}
427-
428-
case TypeId::kTime: {
429-
// Stores microseconds from midnight in an 8-byte little-endian long
430-
util::WriteLittleEndian(result, std::get<int64_t>(value));
431-
return result;
432-
}
433-
434-
case TypeId::kTimestamp: {
435-
// Stores microseconds from 1970-01-01 00:00:00.000000 in an 8-byte little-endian
436-
// long
437-
util::WriteLittleEndian(result, std::get<int64_t>(value));
438-
return result;
439-
}
440-
441-
case TypeId::kTimestampTz: {
442-
// Stores microseconds from 1970-01-01 00:00:00.000000 UTC in an 8-byte
443-
// little-endian long
444-
util::WriteLittleEndian(result, std::get<int64_t>(value));
445-
return result;
446-
}
447-
448-
case TypeId::kFloat: {
449-
// Stored as 4-byte little-endian
450-
util::WriteLittleEndian(result, std::get<float>(value));
451-
return result;
452-
}
453-
454-
case TypeId::kDouble: {
455-
// Stored as 8-byte little-endian
456-
util::WriteLittleEndian(result, std::get<double>(value));
457-
return result;
458-
}
459-
460-
case TypeId::kString: {
461-
// UTF-8 bytes (without length)
462-
const auto& str = std::get<std::string>(value);
463-
result.insert(result.end(), str.begin(), str.end());
464-
return result;
465-
}
466-
467-
case TypeId::kBinary: {
468-
// Binary value (without length)
469-
const auto& binary_data = std::get<std::vector<uint8_t>>(value);
470-
result.insert(result.end(), binary_data.begin(), binary_data.end());
471-
return result;
472-
}
473-
474-
case TypeId::kFixed: {
475-
// Fixed(L) - Binary value, could be stored in std::array<uint8_t, 16> or
476-
// std::vector<uint8_t>
477-
if (std::holds_alternative<std::array<uint8_t, 16>>(value)) {
478-
const auto& fixed_bytes = std::get<std::array<uint8_t, 16>>(value);
479-
result.insert(result.end(), fixed_bytes.begin(), fixed_bytes.end());
480-
} else if (std::holds_alternative<std::vector<uint8_t>>(value)) {
481-
result = std::get<std::vector<uint8_t>>(value);
482-
} else {
483-
std::string actual_type = std::visit(
484-
[](auto&& arg) -> std::string { return typeid(arg).name(); }, value);
485-
486-
return InvalidArgument("Invalid value type for Fixed literal, got type: {}",
487-
actual_type);
488-
}
489-
return result;
490-
}
491-
492-
case TypeId::kUuid: {
493-
// 16-byte big-endian value
494-
const auto& uuid_bytes = std::get<std::array<uint8_t, 16>>(value);
495-
util::WriteBigEndian16(result, uuid_bytes);
496-
return result;
497-
}
498-
499-
default:
500-
return NotSupported("Serialization for type {} is not supported",
501-
literal.type()->ToString());
502-
}
503-
}
504-
505-
Result<Literal> LiteralSerializer::FromBytes(std::span<const uint8_t> data,
506-
const std::shared_ptr<PrimitiveType>& type) {
507-
if (!type) {
508-
return InvalidArgument("Type cannot be null");
509-
}
510-
511-
// Empty data represents null value
512-
if (data.empty()) {
513-
return Literal::Null(type);
514-
}
515-
516-
const auto type_id = type->type_id();
517-
518-
switch (type_id) {
519-
case TypeId::kBoolean: {
520-
if (data.size() != 1) {
521-
return InvalidArgument("Boolean requires 1 byte, got {}", data.size());
522-
}
523-
ICEBERG_ASSIGN_OR_RAISE(auto value, util::ReadLittleEndian<uint8_t>(data));
524-
// 0x00 for false, non-zero byte for true
525-
return Literal::Boolean(value != 0x00);
526-
}
527-
528-
case TypeId::kInt: {
529-
if (data.size() != sizeof(int32_t)) {
530-
return InvalidArgument("Int requires {} bytes, got {}", sizeof(int32_t),
531-
data.size());
532-
}
533-
ICEBERG_ASSIGN_OR_RAISE(auto value, util::ReadLittleEndian<int32_t>(data));
534-
return Literal::Int(value);
535-
}
536-
537-
case TypeId::kDate: {
538-
if (data.size() != sizeof(int32_t)) {
539-
return InvalidArgument("Date requires {} bytes, got {}", sizeof(int32_t),
540-
data.size());
541-
}
542-
ICEBERG_ASSIGN_OR_RAISE(auto value, util::ReadLittleEndian<int32_t>(data));
543-
return Literal::Date(value);
544-
}
545-
546-
case TypeId::kLong:
547-
case TypeId::kTime:
548-
case TypeId::kTimestamp:
549-
case TypeId::kTimestampTz: {
550-
int64_t value;
551-
if (data.size() == 8) {
552-
// Standard 8-byte long
553-
ICEBERG_ASSIGN_OR_RAISE(auto long_value, util::ReadLittleEndian<int64_t>(data));
554-
value = long_value;
555-
} else if (data.size() == 4) {
556-
// Type was promoted from int to long
557-
ICEBERG_ASSIGN_OR_RAISE(auto int_value, util::ReadLittleEndian<int32_t>(data));
558-
value = static_cast<int64_t>(int_value);
559-
} else {
560-
const char* type_name = [type_id]() {
561-
switch (type_id) {
562-
case TypeId::kLong:
563-
return "Long";
564-
case TypeId::kTime:
565-
return "Time";
566-
case TypeId::kTimestamp:
567-
return "Timestamp";
568-
case TypeId::kTimestampTz:
569-
return "TimestampTz";
570-
default:
571-
return "Unknown";
572-
}
573-
}();
574-
return InvalidArgument("{} requires 4 or 8 bytes, got {}", type_name,
575-
data.size());
576-
}
577-
578-
return Literal(value, type);
579-
}
580-
581-
case TypeId::kFloat: {
582-
if (data.size() != sizeof(float)) {
583-
return InvalidArgument("Float requires {} bytes, got {}", sizeof(float),
584-
data.size());
585-
}
586-
ICEBERG_ASSIGN_OR_RAISE(auto value, util::ReadLittleEndian<float>(data));
587-
return Literal::Float(value);
588-
}
589-
590-
case TypeId::kDouble: {
591-
if (data.size() == 8) {
592-
// Standard 8-byte double
593-
ICEBERG_ASSIGN_OR_RAISE(auto double_value, util::ReadLittleEndian<double>(data));
594-
return Literal::Double(double_value);
595-
} else if (data.size() == 4) {
596-
// Type was promoted from float to double
597-
ICEBERG_ASSIGN_OR_RAISE(auto float_value, util::ReadLittleEndian<float>(data));
598-
return Literal::Double(static_cast<double>(float_value));
599-
} else {
600-
return InvalidArgument("Double requires 4 or 8 bytes, got {}", data.size());
601-
}
602-
}
603-
604-
case TypeId::kString: {
605-
return Literal::String(
606-
std::string(reinterpret_cast<const char*>(data.data()), data.size()));
607-
}
608-
609-
case TypeId::kBinary: {
610-
return Literal::Binary(std::vector<uint8_t>(data.begin(), data.end()));
611-
}
612-
613-
case TypeId::kFixed: {
614-
if (data.size() == 16) {
615-
std::array<uint8_t, 16> fixed_bytes;
616-
std::ranges::copy(data, fixed_bytes.begin());
617-
return Literal(Literal::Value{fixed_bytes}, type);
618-
} else {
619-
return Literal(Literal::Value{std::vector<uint8_t>(data.begin(), data.end())},
620-
type);
621-
}
622-
}
623-
624-
case TypeId::kUuid: {
625-
if (data.size() != 16) {
626-
return InvalidArgument("UUID requires 16 bytes, got {}", data.size());
627-
}
628-
ICEBERG_ASSIGN_OR_RAISE(auto uuid_value, util::ReadBigEndian16(data));
629-
return Literal(Literal::Value{uuid_value}, type);
630-
}
631-
632-
case TypeId::kDecimal: {
633-
if (data.size() > 16) {
634-
return InvalidArgument(
635-
"Decimal data too large, maximum 16 bytes supported, got {}", data.size());
636-
}
637-
638-
std::array<uint8_t, 16> decimal_bytes{};
639-
// Copy data to the end of the array (big-endian format for decimals)
640-
// This handles variable-length decimals by right-aligning them
641-
std::ranges::copy(data, decimal_bytes.end() - data.size());
642-
return Literal(Literal::Value{decimal_bytes}, type);
643-
}
644-
645-
default:
646-
return NotSupported("Deserialization for type {} is not supported",
647-
type->ToString());
648-
}
649-
650-
std::unreachable();
651-
}
652-
653370
} // namespace iceberg

src/iceberg/expression/literal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ class ICEBERG_EXPORT Literal {
145145

146146
friend class LiteralCaster;
147147
friend class LiteralSerializer;
148+
friend class Conversions;
148149

149150
Value value_;
150151
std::shared_ptr<PrimitiveType> type_;

src/iceberg/type.cc

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <format>
2323
#include <iterator>
2424
#include <memory>
25+
#include <utility>
2526

2627
#include "iceberg/exception.h"
2728
#include "iceberg/util/formatter.h" // IWYU pragma: keep
@@ -319,4 +320,45 @@ std::shared_ptr<FixedType> fixed(int32_t length) {
319320
return std::make_shared<FixedType>(length);
320321
}
321322

323+
std::string_view ToString(TypeId id) {
324+
switch (id) {
325+
case TypeId::kStruct:
326+
return "struct";
327+
case TypeId::kList:
328+
return "list";
329+
case TypeId::kMap:
330+
return "map";
331+
case TypeId::kBoolean:
332+
return "boolean";
333+
case TypeId::kInt:
334+
return "int";
335+
case TypeId::kLong:
336+
return "long";
337+
case TypeId::kFloat:
338+
return "float";
339+
case TypeId::kDouble:
340+
return "double";
341+
case TypeId::kDecimal:
342+
return "decimal";
343+
case TypeId::kDate:
344+
return "date";
345+
case TypeId::kTime:
346+
return "time";
347+
case TypeId::kTimestamp:
348+
return "timestamp";
349+
case TypeId::kTimestampTz:
350+
return "timestamptz";
351+
case TypeId::kString:
352+
return "string";
353+
case TypeId::kUuid:
354+
return "uuid";
355+
case TypeId::kFixed:
356+
return "fixed";
357+
case TypeId::kBinary:
358+
return "binary";
359+
}
360+
361+
std::unreachable();
362+
}
363+
322364
} // namespace iceberg

src/iceberg/type.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,4 +489,13 @@ ICEBERG_EXPORT std::shared_ptr<FixedType> fixed(int32_t length);
489489

490490
/// @}
491491

492+
/// \brief Get the lowercase string representation of a TypeId.
493+
///
494+
/// This returns the same lowercase string as used by Type::ToString() methods.
495+
/// For example: TypeId::kBoolean -> "boolean", TypeId::kInt -> "int", etc.
496+
///
497+
/// \param id The TypeId to convert to string
498+
/// \return A string_view containing the lowercase type name
499+
ICEBERG_EXPORT std::string_view ToString(TypeId id);
500+
492501
} // namespace iceberg

0 commit comments

Comments
 (0)