Skip to content

Commit 043199c

Browse files
committed
finish
1 parent a736e69 commit 043199c

File tree

3 files changed

+155
-59
lines changed

3 files changed

+155
-59
lines changed

src/iceberg/expression/literal.cc

Lines changed: 85 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,15 @@
2121

2222
#include <algorithm>
2323
#include <bit>
24+
#include <chrono>
2425
#include <cmath>
2526
#include <concepts>
2627
#include <cstring>
28+
#include <iomanip>
29+
#include <sstream>
2730

2831
#include "iceberg/exception.h"
32+
#include "iceberg/util/macros.h"
2933

3034
namespace iceberg {
3135

@@ -400,13 +404,21 @@ std::string Literal::ToString() const {
400404
}
401405
return result;
402406
}
407+
case TypeId::kDate: {
408+
return FormatDate(std::get<int32_t>(value_));
409+
}
410+
case TypeId::kTime: {
411+
return FormatTime(std::get<int64_t>(value_));
412+
}
413+
case TypeId::kTimestamp: {
414+
return FormatTimestamp(std::get<int64_t>(value_));
415+
}
416+
case TypeId::kTimestampTz: {
417+
return FormatTimestampTz(std::get<int64_t>(value_));
418+
}
403419
case TypeId::kDecimal:
404420
case TypeId::kUuid:
405-
case TypeId::kFixed:
406-
case TypeId::kDate:
407-
case TypeId::kTime:
408-
case TypeId::kTimestamp:
409-
case TypeId::kTimestampTz: {
421+
case TypeId::kFixed: {
410422
throw IcebergError("Not implemented: ToString for " + type_->ToString());
411423
}
412424
default: {
@@ -602,28 +614,19 @@ Result<Literal> LiteralSerializer::FromBytes(std::span<const uint8_t> data,
602614

603615
switch (type_id) {
604616
case TypeId::kBoolean: {
605-
auto result = ReadLittleEndian<uint8_t>(data);
606-
if (!result) {
607-
return std::unexpected<Error>(result.error());
608-
}
617+
ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian<uint8_t>(data));
609618
// 0x00 for false, non-zero byte for true
610-
return Literal::Boolean(*result != 0x00);
619+
return Literal::Boolean(value != 0x00);
611620
}
612621

613622
case TypeId::kInt: {
614-
auto result = ReadLittleEndian<int32_t>(data);
615-
if (!result) {
616-
return std::unexpected<Error>(result.error());
617-
}
618-
return Literal::Int(*result);
623+
ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian<int32_t>(data));
624+
return Literal::Int(value);
619625
}
620626

621627
case TypeId::kDate: {
622-
auto result = ReadLittleEndian<int32_t>(data);
623-
if (!result) {
624-
return std::unexpected<Error>(result.error());
625-
}
626-
return Literal::Date(*result);
628+
ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian<int32_t>(data));
629+
return Literal::Date(value);
627630
}
628631

629632
case TypeId::kLong:
@@ -634,18 +637,12 @@ Result<Literal> LiteralSerializer::FromBytes(std::span<const uint8_t> data,
634637

635638
if (data.size() == 4) {
636639
// Type was promoted from int to long
637-
auto int_result = ReadLittleEndian<int32_t>(data);
638-
if (!int_result) {
639-
return std::unexpected<Error>(int_result.error());
640-
}
641-
value = static_cast<int64_t>(*int_result);
640+
ICEBERG_ASSIGN_OR_RAISE(auto int_value, ReadLittleEndian<int32_t>(data));
641+
value = static_cast<int64_t>(int_value);
642642
} else if (data.size() == 8) {
643643
// Standard 8-byte long
644-
auto long_result = ReadLittleEndian<int64_t>(data);
645-
if (!long_result) {
646-
return std::unexpected<Error>(long_result.error());
647-
}
648-
value = *long_result;
644+
ICEBERG_ASSIGN_OR_RAISE(auto long_value, ReadLittleEndian<int64_t>(data));
645+
value = long_value;
649646
} else {
650647
return InvalidArgument("{} requires 4 or 8 bytes, got {}",
651648
GetLongTypeName(type_id), data.size());
@@ -655,28 +652,19 @@ Result<Literal> LiteralSerializer::FromBytes(std::span<const uint8_t> data,
655652
}
656653

657654
case TypeId::kFloat: {
658-
auto result = ReadLittleEndian<float>(data);
659-
if (!result) {
660-
return std::unexpected<Error>(result.error());
661-
}
662-
return Literal::Float(*result);
655+
ICEBERG_ASSIGN_OR_RAISE(auto value, ReadLittleEndian<float>(data));
656+
return Literal::Float(value);
663657
}
664658

665659
case TypeId::kDouble: {
666660
if (data.size() == 4) {
667661
// Type was promoted from float to double
668-
auto float_result = ReadLittleEndian<float>(data);
669-
if (!float_result) {
670-
return std::unexpected<Error>(float_result.error());
671-
}
672-
return Literal::Double(static_cast<double>(*float_result));
662+
ICEBERG_ASSIGN_OR_RAISE(auto float_value, ReadLittleEndian<float>(data));
663+
return Literal::Double(static_cast<double>(float_value));
673664
} else if (data.size() == 8) {
674665
// Standard 8-byte double
675-
auto double_result = ReadLittleEndian<double>(data);
676-
if (!double_result) {
677-
return std::unexpected<Error>(double_result.error());
678-
}
679-
return Literal::Double(*double_result);
666+
ICEBERG_ASSIGN_OR_RAISE(auto double_value, ReadLittleEndian<double>(data));
667+
return Literal::Double(double_value);
680668
} else {
681669
return InvalidArgument("Double requires 4 or 8 bytes, got {}", data.size());
682670
}
@@ -703,11 +691,8 @@ Result<Literal> LiteralSerializer::FromBytes(std::span<const uint8_t> data,
703691
}
704692

705693
case TypeId::kUuid: {
706-
auto uuid_result = ReadBigEndian16(data);
707-
if (!uuid_result) {
708-
return std::unexpected<Error>(uuid_result.error());
709-
}
710-
return Literal(Literal::Value{*uuid_result}, type);
694+
ICEBERG_ASSIGN_OR_RAISE(auto uuid_value, ReadBigEndian16(data));
695+
return Literal(Literal::Value{uuid_value}, type);
711696
}
712697

713698
case TypeId::kDecimal: {
@@ -732,4 +717,54 @@ Result<Literal> LiteralSerializer::FromBytes(std::span<const uint8_t> data,
732717
return InvalidArgument("Unexpected error in deserialization");
733718
}
734719

720+
// Literal formatting member functions
721+
722+
std::string Literal::FormatDate(int32_t days_since_epoch) const {
723+
// Convert days since Unix epoch to date
724+
auto time_point =
725+
std::chrono::system_clock::time_point{} + std::chrono::days{days_since_epoch};
726+
auto date = std::chrono::floor<std::chrono::days>(time_point);
727+
auto ymd = std::chrono::year_month_day{date};
728+
729+
std::ostringstream oss;
730+
oss << static_cast<int>(ymd.year()) << "-" << std::setfill('0') << std::setw(2)
731+
<< static_cast<unsigned>(ymd.month()) << "-" << std::setfill('0') << std::setw(2)
732+
<< static_cast<unsigned>(ymd.day());
733+
return oss.str();
734+
}
735+
736+
std::string Literal::FormatTime(int64_t microseconds_since_midnight) const {
737+
auto hours = microseconds_since_midnight / (1000000LL * 3600);
738+
auto minutes = (microseconds_since_midnight % (1000000LL * 3600)) / (1000000LL * 60);
739+
auto seconds = (microseconds_since_midnight % (1000000LL * 60)) / 1000000LL;
740+
auto micros = microseconds_since_midnight % 1000000LL;
741+
742+
std::ostringstream oss;
743+
oss << std::setfill('0') << std::setw(2) << hours << ":" << std::setfill('0')
744+
<< std::setw(2) << minutes << ":" << std::setfill('0') << std::setw(2) << seconds
745+
<< "." << std::setfill('0') << std::setw(6) << micros;
746+
return oss.str();
747+
}
748+
749+
std::string Literal::FormatTimestamp(int64_t microseconds_since_epoch) const {
750+
auto time_point = std::chrono::system_clock::time_point{} +
751+
std::chrono::microseconds{microseconds_since_epoch};
752+
auto date = std::chrono::floor<std::chrono::days>(time_point);
753+
auto time_of_day = time_point - date;
754+
auto micros_of_day =
755+
std::chrono::duration_cast<std::chrono::microseconds>(time_of_day).count();
756+
757+
auto ymd = std::chrono::year_month_day{date};
758+
759+
std::ostringstream oss;
760+
oss << static_cast<int>(ymd.year()) << "-" << std::setfill('0') << std::setw(2)
761+
<< static_cast<unsigned>(ymd.month()) << "-" << std::setfill('0') << std::setw(2)
762+
<< static_cast<unsigned>(ymd.day()) << "T" << FormatTime(micros_of_day);
763+
return oss.str();
764+
}
765+
766+
std::string Literal::FormatTimestampTz(int64_t microseconds_since_epoch) const {
767+
return FormatTimestamp(microseconds_since_epoch) + "+00:00";
768+
}
769+
735770
} // namespace iceberg

src/iceberg/expression/literal.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,18 @@ class ICEBERG_EXPORT Literal {
146146
friend class LiteralCaster;
147147
friend class LiteralSerializer;
148148

149+
/// \brief Format a date value as a string.
150+
std::string FormatDate(int32_t days_since_epoch) const;
151+
152+
/// \brief Format a time value as a string.
153+
std::string FormatTime(int64_t microseconds_since_midnight) const;
154+
155+
/// \brief Format a timestamp value as a string.
156+
std::string FormatTimestamp(int64_t microseconds_since_epoch) const;
157+
158+
/// \brief Format a timestamp with timezone value as a string.
159+
std::string FormatTimestampTz(int64_t microseconds_since_epoch) const;
160+
149161
private:
150162
Value value_;
151163
std::shared_ptr<PrimitiveType> type_;

test/literal_test.cc

Lines changed: 58 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919

2020
#include "iceberg/expression/literal.h"
2121

22-
#include <algorithm>
2322
#include <limits>
2423
#include <numbers>
2524
#include <vector>
@@ -407,45 +406,74 @@ void CheckBinaryRoundTrip(const std::vector<uint8_t>& input_bytes,
407406
EXPECT_EQ(final_literal->ToString(), expected_literal.ToString());
408407
}
409408

410-
// Boolean binary serialization tests
409+
// binary serialization tests
411410
TEST(LiteralSerializationTest, BinaryBoolean) {
412411
CheckBinaryRoundTrip({1}, Literal::Boolean(true), boolean());
413412
CheckBinaryRoundTrip({0}, Literal::Boolean(false), boolean());
414413
}
415414

416-
// Integer binary serialization tests
417415
TEST(LiteralSerializationTest, BinaryInt) {
418416
CheckBinaryRoundTrip({32, 0, 0, 0}, Literal::Int(32), int32());
419417
}
420418

421-
// Long binary serialization tests
422419
TEST(LiteralSerializationTest, BinaryLong) {
423420
CheckBinaryRoundTrip({32, 0, 0, 0, 0, 0, 0, 0}, Literal::Long(32), int64());
424421
}
425422

426-
// Float binary serialization tests
427423
TEST(LiteralSerializationTest, BinaryFloat) {
428424
CheckBinaryRoundTrip({0, 0, 128, 63}, Literal::Float(1.0f), float32());
429425
}
430426

431-
// Double binary serialization tests
432427
TEST(LiteralSerializationTest, BinaryDouble) {
433428
CheckBinaryRoundTrip({0, 0, 0, 0, 0, 0, 240, 63}, Literal::Double(1.0), float64());
434429
}
435430

436-
// String binary serialization tests
437431
TEST(LiteralSerializationTest, BinaryString) {
438432
CheckBinaryRoundTrip({105, 99, 101, 98, 101, 114, 103}, Literal::String("iceberg"),
439433
string());
440434
}
441435

442-
// Binary data type serialization tests
436+
TEST(LiteralSerializationTest, BinaryDate) {
437+
CheckBinaryRoundTrip({32, 0, 0, 0}, Literal::Date(32), date());
438+
CheckBinaryRoundTrip({4, 77, 0, 0}, Literal::Date(19716), date());
439+
CheckBinaryRoundTrip({33, 156, 255, 255}, Literal::Date(-25567), date());
440+
}
441+
442+
TEST(LiteralSerializationTest, BinaryTime) {
443+
CheckBinaryRoundTrip({32, 0, 0, 0, 0, 0, 0, 0}, Literal::Time(32), time());
444+
CheckBinaryRoundTrip({0, 176, 235, 14, 10, 0, 0, 0}, Literal::Time(43200000000LL),
445+
time());
446+
CheckBinaryRoundTrip({128, 81, 13, 42, 12, 0, 0, 0}, Literal::Time(52245123456LL),
447+
time());
448+
}
449+
450+
TEST(LiteralSerializationTest, BinaryTimestamp) {
451+
CheckBinaryRoundTrip({32, 0, 0, 0, 0, 0, 0, 0}, Literal::Timestamp(32), timestamp());
452+
CheckBinaryRoundTrip({0, 224, 55, 59, 1, 93, 3, 0},
453+
Literal::Timestamp(946684800000000LL), timestamp());
454+
CheckBinaryRoundTrip({128, 209, 74, 105, 86, 13, 6, 0},
455+
Literal::Timestamp(1703514645123456LL), timestamp());
456+
CheckBinaryRoundTrip({255, 255, 255, 255, 255, 255, 255, 255}, Literal::Timestamp(-1),
457+
timestamp());
458+
}
459+
460+
TEST(LiteralSerializationTest, BinaryTimestampTz) {
461+
CheckBinaryRoundTrip({32, 0, 0, 0, 0, 0, 0, 0}, Literal::TimestampTz(32),
462+
timestamp_tz());
463+
CheckBinaryRoundTrip({0, 224, 55, 59, 1, 93, 3, 0},
464+
Literal::TimestampTz(946684800000000LL), timestamp_tz());
465+
CheckBinaryRoundTrip({128, 209, 74, 105, 86, 13, 6, 0},
466+
Literal::TimestampTz(1703514645123456LL), timestamp_tz());
467+
CheckBinaryRoundTrip({255, 255, 255, 255, 255, 255, 255, 255}, Literal::TimestampTz(-1),
468+
timestamp_tz());
469+
}
470+
443471
TEST(LiteralSerializationTest, BinaryData) {
444472
std::vector<uint8_t> data = {0x01, 0x02, 0x03, 0xFF};
445473
CheckBinaryRoundTrip(data, Literal::Binary(data), binary());
446474
}
447475

448-
// Type promotion tests - smaller types can be deserialized as larger types
476+
// Type promotion tests
449477
TEST(LiteralSerializationTest, TypePromotion) {
450478
// 4-byte int data can be deserialized as long
451479
std::vector<uint8_t> int_data = {32, 0, 0, 0};
@@ -514,6 +542,27 @@ TEST(LiteralSerializationTest, EdgeCases) {
514542
EXPECT_EQ(inf_bytes->size(), 4);
515543
}
516544

545+
// ToString formatting tests for date/time types
546+
TEST(LiteralFormattingTest, DateTimeToString) {
547+
// Date formatting tests
548+
EXPECT_EQ(Literal::Date(0).ToString(), "1970-01-01");
549+
EXPECT_EQ(Literal::Date(-25567).ToString(), "1900-01-01");
550+
551+
// Time formatting tests
552+
EXPECT_EQ(Literal::Time(0).ToString(), "00:00:00.000000");
553+
EXPECT_EQ(Literal::Time(52245123456LL).ToString(), "14:30:45.123456");
554+
555+
// Timestamp formatting tests
556+
EXPECT_EQ(Literal::Timestamp(0).ToString(), "1970-01-01T00:00:00.000000");
557+
EXPECT_EQ(Literal::Timestamp(1703514645123456LL).ToString(),
558+
"2023-12-25T14:30:45.123456");
559+
560+
// TimestampTz formatting tests
561+
EXPECT_EQ(Literal::TimestampTz(0).ToString(), "1970-01-01T00:00:00.000000+00:00");
562+
EXPECT_EQ(Literal::TimestampTz(1703514645123456LL).ToString(),
563+
"2023-12-25T14:30:45.123456+00:00");
564+
}
565+
517566
// Error case serialization tests
518567
TEST(LiteralSerializationTest, ErrorCases) {
519568
// AboveMax/BelowMin values cannot be serialized

0 commit comments

Comments
 (0)