|
25 | 25 | #include <utility> |
26 | 26 |
|
27 | 27 | #include "iceberg/exception.h" |
28 | | -#include "iceberg/util/endian.h" |
| 28 | +#include "iceberg/util/conversions.h" |
29 | 29 | #include "iceberg/util/literal_format.h" |
30 | | -#include "iceberg/util/macros.h" |
31 | 30 |
|
32 | 31 | namespace iceberg { |
33 | 32 |
|
34 | | -/// \brief LiteralSerializer handles serialization/deserialization operations for Literal. |
35 | | -/// This is an internal implementation class. |
36 | | -class LiteralSerializer { |
37 | | - public: |
38 | | - /// \brief Serialize a literal value to binary format. |
39 | | - static Result<std::vector<uint8_t>> ToBytes(const Literal& literal); |
40 | | - |
41 | | - /// \brief Deserialize binary data to a literal value. |
42 | | - static Result<Literal> FromBytes(std::span<const uint8_t> data, |
43 | | - const std::shared_ptr<PrimitiveType>& type); |
44 | | -}; |
45 | | - |
46 | 33 | /// \brief LiteralCaster handles type casting operations for Literal. |
47 | 34 | /// This is an internal implementation class. |
48 | 35 | class LiteralCaster { |
@@ -168,11 +155,11 @@ Literal Literal::Binary(std::vector<uint8_t> value) { |
168 | 155 |
|
169 | 156 | Result<Literal> Literal::Deserialize(std::span<const uint8_t> data, |
170 | 157 | std::shared_ptr<PrimitiveType> type) { |
171 | | - return LiteralSerializer::FromBytes(data, type); |
| 158 | + return Conversions::FromBytes(type, data); |
172 | 159 | } |
173 | 160 |
|
174 | 161 | Result<std::vector<uint8_t>> Literal::Serialize() const { |
175 | | - return LiteralSerializer::ToBytes(*this); |
| 162 | + return Conversions::ToBytes(*this); |
176 | 163 | } |
177 | 164 |
|
178 | 165 | // Getters |
@@ -380,274 +367,4 @@ Result<Literal> LiteralCaster::CastTo(const Literal& literal, |
380 | 367 | target_type->ToString()); |
381 | 368 | } |
382 | 369 |
|
383 | | -// LiteralSerializer implementation |
384 | | - |
385 | | -Result<std::vector<uint8_t>> LiteralSerializer::ToBytes(const Literal& literal) { |
386 | | - // Cannot serialize special values |
387 | | - if (literal.IsAboveMax()) { |
388 | | - return NotSupported("Cannot serialize AboveMax"); |
389 | | - } |
390 | | - if (literal.IsBelowMin()) { |
391 | | - return NotSupported("Cannot serialize BelowMin"); |
392 | | - } |
393 | | - |
394 | | - std::vector<uint8_t> result; |
395 | | - |
396 | | - if (literal.IsNull()) { |
397 | | - return NotSupported("Cannot serialize null"); |
398 | | - } |
399 | | - |
400 | | - const auto& value = literal.value(); |
401 | | - const auto type_id = literal.type()->type_id(); |
402 | | - |
403 | | - switch (type_id) { |
404 | | - case TypeId::kBoolean: { |
405 | | - // 0x00 for false, 0x01 for true |
406 | | - result.push_back(std::get<bool>(value) ? 0x01 : 0x00); |
407 | | - return result; |
408 | | - } |
409 | | - |
410 | | - case TypeId::kInt: { |
411 | | - // Stored as 4-byte little-endian |
412 | | - util::WriteLittleEndian(result, std::get<int32_t>(value)); |
413 | | - return result; |
414 | | - } |
415 | | - |
416 | | - case TypeId::kDate: { |
417 | | - // Stores days from 1970-01-01 in a 4-byte little-endian int |
418 | | - util::WriteLittleEndian(result, std::get<int32_t>(value)); |
419 | | - return result; |
420 | | - } |
421 | | - |
422 | | - case TypeId::kLong: { |
423 | | - // Stored as 8-byte little-endian |
424 | | - util::WriteLittleEndian(result, std::get<int64_t>(value)); |
425 | | - return result; |
426 | | - } |
427 | | - |
428 | | - case TypeId::kTime: { |
429 | | - // Stores microseconds from midnight in an 8-byte little-endian long |
430 | | - util::WriteLittleEndian(result, std::get<int64_t>(value)); |
431 | | - return result; |
432 | | - } |
433 | | - |
434 | | - case TypeId::kTimestamp: { |
435 | | - // Stores microseconds from 1970-01-01 00:00:00.000000 in an 8-byte little-endian |
436 | | - // long |
437 | | - util::WriteLittleEndian(result, std::get<int64_t>(value)); |
438 | | - return result; |
439 | | - } |
440 | | - |
441 | | - case TypeId::kTimestampTz: { |
442 | | - // Stores microseconds from 1970-01-01 00:00:00.000000 UTC in an 8-byte |
443 | | - // little-endian long |
444 | | - util::WriteLittleEndian(result, std::get<int64_t>(value)); |
445 | | - return result; |
446 | | - } |
447 | | - |
448 | | - case TypeId::kFloat: { |
449 | | - // Stored as 4-byte little-endian |
450 | | - util::WriteLittleEndian(result, std::get<float>(value)); |
451 | | - return result; |
452 | | - } |
453 | | - |
454 | | - case TypeId::kDouble: { |
455 | | - // Stored as 8-byte little-endian |
456 | | - util::WriteLittleEndian(result, std::get<double>(value)); |
457 | | - return result; |
458 | | - } |
459 | | - |
460 | | - case TypeId::kString: { |
461 | | - // UTF-8 bytes (without length) |
462 | | - const auto& str = std::get<std::string>(value); |
463 | | - result.insert(result.end(), str.begin(), str.end()); |
464 | | - return result; |
465 | | - } |
466 | | - |
467 | | - case TypeId::kBinary: { |
468 | | - // Binary value (without length) |
469 | | - const auto& binary_data = std::get<std::vector<uint8_t>>(value); |
470 | | - result.insert(result.end(), binary_data.begin(), binary_data.end()); |
471 | | - return result; |
472 | | - } |
473 | | - |
474 | | - case TypeId::kFixed: { |
475 | | - // Fixed(L) - Binary value, could be stored in std::array<uint8_t, 16> or |
476 | | - // std::vector<uint8_t> |
477 | | - if (std::holds_alternative<std::array<uint8_t, 16>>(value)) { |
478 | | - const auto& fixed_bytes = std::get<std::array<uint8_t, 16>>(value); |
479 | | - result.insert(result.end(), fixed_bytes.begin(), fixed_bytes.end()); |
480 | | - } else if (std::holds_alternative<std::vector<uint8_t>>(value)) { |
481 | | - result = std::get<std::vector<uint8_t>>(value); |
482 | | - } else { |
483 | | - std::string actual_type = std::visit( |
484 | | - [](auto&& arg) -> std::string { return typeid(arg).name(); }, value); |
485 | | - |
486 | | - return InvalidArgument("Invalid value type for Fixed literal, got type: {}", |
487 | | - actual_type); |
488 | | - } |
489 | | - return result; |
490 | | - } |
491 | | - |
492 | | - case TypeId::kUuid: { |
493 | | - // 16-byte big-endian value |
494 | | - const auto& uuid_bytes = std::get<std::array<uint8_t, 16>>(value); |
495 | | - util::WriteBigEndian16(result, uuid_bytes); |
496 | | - return result; |
497 | | - } |
498 | | - |
499 | | - default: |
500 | | - return NotSupported("Serialization for type {} is not supported", |
501 | | - literal.type()->ToString()); |
502 | | - } |
503 | | -} |
504 | | - |
505 | | -Result<Literal> LiteralSerializer::FromBytes(std::span<const uint8_t> data, |
506 | | - const std::shared_ptr<PrimitiveType>& type) { |
507 | | - if (!type) { |
508 | | - return InvalidArgument("Type cannot be null"); |
509 | | - } |
510 | | - |
511 | | - // Empty data represents null value |
512 | | - if (data.empty()) { |
513 | | - return Literal::Null(type); |
514 | | - } |
515 | | - |
516 | | - const auto type_id = type->type_id(); |
517 | | - |
518 | | - switch (type_id) { |
519 | | - case TypeId::kBoolean: { |
520 | | - if (data.size() != 1) { |
521 | | - return InvalidArgument("Boolean requires 1 byte, got {}", data.size()); |
522 | | - } |
523 | | - ICEBERG_ASSIGN_OR_RAISE(auto value, util::ReadLittleEndian<uint8_t>(data)); |
524 | | - // 0x00 for false, non-zero byte for true |
525 | | - return Literal::Boolean(value != 0x00); |
526 | | - } |
527 | | - |
528 | | - case TypeId::kInt: { |
529 | | - if (data.size() != sizeof(int32_t)) { |
530 | | - return InvalidArgument("Int requires {} bytes, got {}", sizeof(int32_t), |
531 | | - data.size()); |
532 | | - } |
533 | | - ICEBERG_ASSIGN_OR_RAISE(auto value, util::ReadLittleEndian<int32_t>(data)); |
534 | | - return Literal::Int(value); |
535 | | - } |
536 | | - |
537 | | - case TypeId::kDate: { |
538 | | - if (data.size() != sizeof(int32_t)) { |
539 | | - return InvalidArgument("Date requires {} bytes, got {}", sizeof(int32_t), |
540 | | - data.size()); |
541 | | - } |
542 | | - ICEBERG_ASSIGN_OR_RAISE(auto value, util::ReadLittleEndian<int32_t>(data)); |
543 | | - return Literal::Date(value); |
544 | | - } |
545 | | - |
546 | | - case TypeId::kLong: |
547 | | - case TypeId::kTime: |
548 | | - case TypeId::kTimestamp: |
549 | | - case TypeId::kTimestampTz: { |
550 | | - int64_t value; |
551 | | - if (data.size() == 8) { |
552 | | - // Standard 8-byte long |
553 | | - ICEBERG_ASSIGN_OR_RAISE(auto long_value, util::ReadLittleEndian<int64_t>(data)); |
554 | | - value = long_value; |
555 | | - } else if (data.size() == 4) { |
556 | | - // Type was promoted from int to long |
557 | | - ICEBERG_ASSIGN_OR_RAISE(auto int_value, util::ReadLittleEndian<int32_t>(data)); |
558 | | - value = static_cast<int64_t>(int_value); |
559 | | - } else { |
560 | | - const char* type_name = [type_id]() { |
561 | | - switch (type_id) { |
562 | | - case TypeId::kLong: |
563 | | - return "Long"; |
564 | | - case TypeId::kTime: |
565 | | - return "Time"; |
566 | | - case TypeId::kTimestamp: |
567 | | - return "Timestamp"; |
568 | | - case TypeId::kTimestampTz: |
569 | | - return "TimestampTz"; |
570 | | - default: |
571 | | - return "Unknown"; |
572 | | - } |
573 | | - }(); |
574 | | - return InvalidArgument("{} requires 4 or 8 bytes, got {}", type_name, |
575 | | - data.size()); |
576 | | - } |
577 | | - |
578 | | - return Literal(value, type); |
579 | | - } |
580 | | - |
581 | | - case TypeId::kFloat: { |
582 | | - if (data.size() != sizeof(float)) { |
583 | | - return InvalidArgument("Float requires {} bytes, got {}", sizeof(float), |
584 | | - data.size()); |
585 | | - } |
586 | | - ICEBERG_ASSIGN_OR_RAISE(auto value, util::ReadLittleEndian<float>(data)); |
587 | | - return Literal::Float(value); |
588 | | - } |
589 | | - |
590 | | - case TypeId::kDouble: { |
591 | | - if (data.size() == 8) { |
592 | | - // Standard 8-byte double |
593 | | - ICEBERG_ASSIGN_OR_RAISE(auto double_value, util::ReadLittleEndian<double>(data)); |
594 | | - return Literal::Double(double_value); |
595 | | - } else if (data.size() == 4) { |
596 | | - // Type was promoted from float to double |
597 | | - ICEBERG_ASSIGN_OR_RAISE(auto float_value, util::ReadLittleEndian<float>(data)); |
598 | | - return Literal::Double(static_cast<double>(float_value)); |
599 | | - } else { |
600 | | - return InvalidArgument("Double requires 4 or 8 bytes, got {}", data.size()); |
601 | | - } |
602 | | - } |
603 | | - |
604 | | - case TypeId::kString: { |
605 | | - return Literal::String( |
606 | | - std::string(reinterpret_cast<const char*>(data.data()), data.size())); |
607 | | - } |
608 | | - |
609 | | - case TypeId::kBinary: { |
610 | | - return Literal::Binary(std::vector<uint8_t>(data.begin(), data.end())); |
611 | | - } |
612 | | - |
613 | | - case TypeId::kFixed: { |
614 | | - if (data.size() == 16) { |
615 | | - std::array<uint8_t, 16> fixed_bytes; |
616 | | - std::ranges::copy(data, fixed_bytes.begin()); |
617 | | - return Literal(Literal::Value{fixed_bytes}, type); |
618 | | - } else { |
619 | | - return Literal(Literal::Value{std::vector<uint8_t>(data.begin(), data.end())}, |
620 | | - type); |
621 | | - } |
622 | | - } |
623 | | - |
624 | | - case TypeId::kUuid: { |
625 | | - if (data.size() != 16) { |
626 | | - return InvalidArgument("UUID requires 16 bytes, got {}", data.size()); |
627 | | - } |
628 | | - ICEBERG_ASSIGN_OR_RAISE(auto uuid_value, util::ReadBigEndian16(data)); |
629 | | - return Literal(Literal::Value{uuid_value}, type); |
630 | | - } |
631 | | - |
632 | | - case TypeId::kDecimal: { |
633 | | - if (data.size() > 16) { |
634 | | - return InvalidArgument( |
635 | | - "Decimal data too large, maximum 16 bytes supported, got {}", data.size()); |
636 | | - } |
637 | | - |
638 | | - std::array<uint8_t, 16> decimal_bytes{}; |
639 | | - // Copy data to the end of the array (big-endian format for decimals) |
640 | | - // This handles variable-length decimals by right-aligning them |
641 | | - std::ranges::copy(data, decimal_bytes.end() - data.size()); |
642 | | - return Literal(Literal::Value{decimal_bytes}, type); |
643 | | - } |
644 | | - |
645 | | - default: |
646 | | - return NotSupported("Deserialization for type {} is not supported", |
647 | | - type->ToString()); |
648 | | - } |
649 | | - |
650 | | - std::unreachable(); |
651 | | -} |
652 | | - |
653 | 370 | } // namespace iceberg |
0 commit comments