diff --git a/src/iceberg/expression/literal.cc b/src/iceberg/expression/literal.cc index adfe5355a..bd76d9c28 100644 --- a/src/iceberg/expression/literal.cc +++ b/src/iceberg/expression/literal.cc @@ -147,6 +147,8 @@ Literal Literal::Double(double value) { return {Value{value}, float64()}; } Literal Literal::String(std::string value) { return {Value{std::move(value)}, string()}; } +Literal Literal::UUID(Uuid value) { return {Value{std::move(value)}, uuid()}; } + Literal Literal::Binary(std::vector value) { return {Value{std::move(value)}, binary()}; } @@ -251,6 +253,15 @@ std::partial_ordering Literal::operator<=>(const Literal& other) const { return this_val <=> other_val; } + case TypeId::kUuid: { + auto& this_val = std::get(value_); + auto& other_val = std::get(other.value_); + if (this_val == other_val) { + return std::partial_ordering::equivalent; + } + return std::partial_ordering::unordered; + } + case TypeId::kBinary: { auto& this_val = std::get>(value_); auto& other_val = std::get>(other.value_); @@ -299,6 +310,9 @@ std::string Literal::ToString() const { case TypeId::kString: { return std::get(value_); } + case TypeId::kUuid: { + return std::get(value_).ToString(); + } case TypeId::kBinary: { const auto& binary_data = std::get>(value_); std::string result; @@ -318,7 +332,6 @@ std::string Literal::ToString() const { return result; } case TypeId::kDecimal: - case TypeId::kUuid: case TypeId::kDate: case TypeId::kTime: case TypeId::kTimestamp: diff --git a/src/iceberg/expression/literal.h b/src/iceberg/expression/literal.h index c11d48f5b..70ff2d806 100644 --- a/src/iceberg/expression/literal.h +++ b/src/iceberg/expression/literal.h @@ -28,6 +28,7 @@ #include "iceberg/result.h" #include "iceberg/type.h" #include "iceberg/util/formattable.h" +#include "iceberg/util/uuid.h" namespace iceberg { @@ -56,8 +57,9 @@ class ICEBERG_EXPORT Literal : public util::Formattable { float, // for float double, // for double std::string, // for string + Uuid, // for uuid std::vector, // for binary, fixed - std::array, // for uuid and decimal + std::array, // for decimal BelowMin, AboveMax>; /// \brief Factory methods for primitive types @@ -71,6 +73,7 @@ class ICEBERG_EXPORT Literal : public util::Formattable { static Literal Float(float value); static Literal Double(double value); static Literal String(std::string value); + static Literal UUID(Uuid value); static Literal Binary(std::vector value); static Literal Fixed(std::vector value); @@ -207,6 +210,11 @@ struct LiteralTraits { using ValueType = std::string; }; +template <> +struct LiteralTraits { + using ValueType = Uuid; +}; + template <> struct LiteralTraits { using ValueType = std::vector; diff --git a/src/iceberg/test/literal_test.cc b/src/iceberg/test/literal_test.cc index bd7544bfa..58cc906da 100644 --- a/src/iceberg/test/literal_test.cc +++ b/src/iceberg/test/literal_test.cc @@ -216,6 +216,15 @@ TEST(LiteralTest, StringBasics) { EXPECT_EQ(empty_string.ToString(), ""); } +// Uuid type tests +TEST(LiteralTest, UuidBasics) { + auto uuid = Uuid::FromString("123e4567-e89b-12d3-a456-426614174000").value(); + auto uuid_literal = Literal::UUID(uuid); + + EXPECT_EQ(uuid_literal.type()->type_id(), TypeId::kUuid); + EXPECT_EQ(uuid_literal.ToString(), "123e4567-e89b-12d3-a456-426614174000"); +} + TEST(LiteralTest, StringComparison) { auto string1 = Literal::String("apple"); auto string2 = Literal::String("banana"); @@ -480,6 +489,15 @@ INSTANTIATE_TEST_SUITE_P( Literal::String("AAAAAAAAAAAAAAAA"), string()}, + // Uuid type + LiteralParam{ + "Uuid", + {0x12, 0x3E, 0x45, 0x67, 0xE8, 0x9B, 0x12, 0xD3, 0xA4, 0x56, 0x42, 0x66, 0x14, + 0x17, 0x40, 0x00}, + Literal::UUID( + Uuid::FromString("123e4567-e89b-12d3-a456-426614174000").value()), + uuid()}, + LiteralParam{"BinaryData", {0x01, 0x02, 0x03, 0xFF}, Literal::Binary({0x01, 0x02, 0x03, 0xFF}), diff --git a/src/iceberg/transform_function.cc b/src/iceberg/transform_function.cc index b043c397c..fd9a1659b 100644 --- a/src/iceberg/transform_function.cc +++ b/src/iceberg/transform_function.cc @@ -29,6 +29,7 @@ #include "iceberg/type.h" #include "iceberg/util/murmurhash3_internal.h" #include "iceberg/util/truncate_util.h" +#include "iceberg/util/uuid.h" namespace iceberg { @@ -75,6 +76,9 @@ Result BucketTransform::Transform(const Literal& literal) { MurmurHash3_x86_32(value.data(), sizeof(uint8_t) * 16, 0, &hash_value); } else if constexpr (std::is_same_v) { MurmurHash3_x86_32(value.data(), value.size(), 0, &hash_value); + } else if constexpr (std::is_same_v) { + MurmurHash3_x86_32(std::get(literal.value()).bytes().data(), + Uuid::kLength, 0, &hash_value); } else if constexpr (std::is_same_v>) { MurmurHash3_x86_32(value.data(), value.size(), 0, &hash_value); } else if constexpr (std::is_same_v || diff --git a/src/iceberg/util/conversions.cc b/src/iceberg/util/conversions.cc index c5dbcf359..e12e4815d 100644 --- a/src/iceberg/util/conversions.cc +++ b/src/iceberg/util/conversions.cc @@ -25,6 +25,7 @@ #include "iceberg/util/endian.h" #include "iceberg/util/macros.h" +#include "iceberg/util/uuid.h" namespace iceberg { @@ -69,6 +70,12 @@ Result> ToBytesImpl(const Literal::Value& return std::vector(str.begin(), str.end()); } +template <> +Result> ToBytesImpl(const Literal::Value& value) { + const auto& uuid = std::get(value); + return std::vector(uuid.bytes().begin(), uuid.bytes().end()); +} + template <> Result> ToBytesImpl(const Literal::Value& value) { return std::get>(value); @@ -98,9 +105,10 @@ Result> Conversions::ToBytes(const PrimitiveType& type, DISPATCH_LITERAL_TO_BYTES(TypeId::kDouble) DISPATCH_LITERAL_TO_BYTES(TypeId::kBoolean) DISPATCH_LITERAL_TO_BYTES(TypeId::kString) + DISPATCH_LITERAL_TO_BYTES(TypeId::kUuid) DISPATCH_LITERAL_TO_BYTES(TypeId::kBinary) DISPATCH_LITERAL_TO_BYTES(TypeId::kFixed) - // TODO(Li Feiyang): Add support for UUID and Decimal + // TODO(Li Feiyang): Add support for Decimal default: return NotSupported("Serialization for type {} is not supported", type.ToString()); @@ -172,6 +180,10 @@ Result Conversions::FromBytes(const PrimitiveType& type, case TypeId::kString: return Literal::Value{ std::string(reinterpret_cast(data.data()), data.size())}; + case TypeId::kUuid: { + ICEBERG_ASSIGN_OR_RAISE(auto uuid, Uuid::FromBytes(data)); + return Literal::Value{uuid}; + } case TypeId::kBinary: return Literal::Value{std::vector(data.begin(), data.end())}; case TypeId::kFixed: { @@ -182,7 +194,7 @@ Result Conversions::FromBytes(const PrimitiveType& type, } return Literal::Value{std::vector(data.begin(), data.end())}; } - // TODO(Li Feiyang): Add support for UUID and Decimal + // TODO(Li Feiyang): Add support for Decimal default: return NotSupported("Deserialization for type {} is not supported", type.ToString());