Skip to content

Commit 81bf29e

Browse files
authored
feat: Literal adapt Uuid representation (#253)
1 parent 046f149 commit 81bf29e

File tree

5 files changed

+59
-4
lines changed

5 files changed

+59
-4
lines changed

src/iceberg/expression/literal.cc

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,8 @@ Literal Literal::Double(double value) { return {Value{value}, float64()}; }
147147

148148
Literal Literal::String(std::string value) { return {Value{std::move(value)}, string()}; }
149149

150+
Literal Literal::UUID(Uuid value) { return {Value{std::move(value)}, uuid()}; }
151+
150152
Literal Literal::Binary(std::vector<uint8_t> value) {
151153
return {Value{std::move(value)}, binary()};
152154
}
@@ -251,6 +253,15 @@ std::partial_ordering Literal::operator<=>(const Literal& other) const {
251253
return this_val <=> other_val;
252254
}
253255

256+
case TypeId::kUuid: {
257+
auto& this_val = std::get<Uuid>(value_);
258+
auto& other_val = std::get<Uuid>(other.value_);
259+
if (this_val == other_val) {
260+
return std::partial_ordering::equivalent;
261+
}
262+
return std::partial_ordering::unordered;
263+
}
264+
254265
case TypeId::kBinary: {
255266
auto& this_val = std::get<std::vector<uint8_t>>(value_);
256267
auto& other_val = std::get<std::vector<uint8_t>>(other.value_);
@@ -299,6 +310,9 @@ std::string Literal::ToString() const {
299310
case TypeId::kString: {
300311
return std::get<std::string>(value_);
301312
}
313+
case TypeId::kUuid: {
314+
return std::get<Uuid>(value_).ToString();
315+
}
302316
case TypeId::kBinary: {
303317
const auto& binary_data = std::get<std::vector<uint8_t>>(value_);
304318
std::string result;
@@ -318,7 +332,6 @@ std::string Literal::ToString() const {
318332
return result;
319333
}
320334
case TypeId::kDecimal:
321-
case TypeId::kUuid:
322335
case TypeId::kDate:
323336
case TypeId::kTime:
324337
case TypeId::kTimestamp:

src/iceberg/expression/literal.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "iceberg/result.h"
2929
#include "iceberg/type.h"
3030
#include "iceberg/util/formattable.h"
31+
#include "iceberg/util/uuid.h"
3132

3233
namespace iceberg {
3334

@@ -56,8 +57,9 @@ class ICEBERG_EXPORT Literal : public util::Formattable {
5657
float, // for float
5758
double, // for double
5859
std::string, // for string
60+
Uuid, // for uuid
5961
std::vector<uint8_t>, // for binary, fixed
60-
std::array<uint8_t, 16>, // for uuid and decimal
62+
std::array<uint8_t, 16>, // for decimal
6163
BelowMin, AboveMax>;
6264

6365
/// \brief Factory methods for primitive types
@@ -71,6 +73,7 @@ class ICEBERG_EXPORT Literal : public util::Formattable {
7173
static Literal Float(float value);
7274
static Literal Double(double value);
7375
static Literal String(std::string value);
76+
static Literal UUID(Uuid value);
7477
static Literal Binary(std::vector<uint8_t> value);
7578
static Literal Fixed(std::vector<uint8_t> value);
7679

@@ -207,6 +210,11 @@ struct LiteralTraits<TypeId::kString> {
207210
using ValueType = std::string;
208211
};
209212

213+
template <>
214+
struct LiteralTraits<TypeId::kUuid> {
215+
using ValueType = Uuid;
216+
};
217+
210218
template <>
211219
struct LiteralTraits<TypeId::kBinary> {
212220
using ValueType = std::vector<uint8_t>;

src/iceberg/test/literal_test.cc

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,15 @@ TEST(LiteralTest, StringBasics) {
216216
EXPECT_EQ(empty_string.ToString(), "");
217217
}
218218

219+
// Uuid type tests
220+
TEST(LiteralTest, UuidBasics) {
221+
auto uuid = Uuid::FromString("123e4567-e89b-12d3-a456-426614174000").value();
222+
auto uuid_literal = Literal::UUID(uuid);
223+
224+
EXPECT_EQ(uuid_literal.type()->type_id(), TypeId::kUuid);
225+
EXPECT_EQ(uuid_literal.ToString(), "123e4567-e89b-12d3-a456-426614174000");
226+
}
227+
219228
TEST(LiteralTest, StringComparison) {
220229
auto string1 = Literal::String("apple");
221230
auto string2 = Literal::String("banana");
@@ -480,6 +489,15 @@ INSTANTIATE_TEST_SUITE_P(
480489
Literal::String("AAAAAAAAAAAAAAAA"),
481490
string()},
482491

492+
// Uuid type
493+
LiteralParam{
494+
"Uuid",
495+
{0x12, 0x3E, 0x45, 0x67, 0xE8, 0x9B, 0x12, 0xD3, 0xA4, 0x56, 0x42, 0x66, 0x14,
496+
0x17, 0x40, 0x00},
497+
Literal::UUID(
498+
Uuid::FromString("123e4567-e89b-12d3-a456-426614174000").value()),
499+
uuid()},
500+
483501
LiteralParam{"BinaryData",
484502
{0x01, 0x02, 0x03, 0xFF},
485503
Literal::Binary({0x01, 0x02, 0x03, 0xFF}),

src/iceberg/transform_function.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "iceberg/type.h"
3030
#include "iceberg/util/murmurhash3_internal.h"
3131
#include "iceberg/util/truncate_util.h"
32+
#include "iceberg/util/uuid.h"
3233

3334
namespace iceberg {
3435

@@ -75,6 +76,9 @@ Result<Literal> BucketTransform::Transform(const Literal& literal) {
7576
MurmurHash3_x86_32(value.data(), sizeof(uint8_t) * 16, 0, &hash_value);
7677
} else if constexpr (std::is_same_v<T, std::string>) {
7778
MurmurHash3_x86_32(value.data(), value.size(), 0, &hash_value);
79+
} else if constexpr (std::is_same_v<T, Uuid>) {
80+
MurmurHash3_x86_32(std::get<Uuid>(literal.value()).bytes().data(),
81+
Uuid::kLength, 0, &hash_value);
7882
} else if constexpr (std::is_same_v<T, std::vector<uint8_t>>) {
7983
MurmurHash3_x86_32(value.data(), value.size(), 0, &hash_value);
8084
} else if constexpr (std::is_same_v<T, std::monostate> ||

src/iceberg/util/conversions.cc

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
#include "iceberg/util/endian.h"
2727
#include "iceberg/util/macros.h"
28+
#include "iceberg/util/uuid.h"
2829

2930
namespace iceberg {
3031

@@ -69,6 +70,12 @@ Result<std::vector<uint8_t>> ToBytesImpl<TypeId::kString>(const Literal::Value&
6970
return std::vector<uint8_t>(str.begin(), str.end());
7071
}
7172

73+
template <>
74+
Result<std::vector<uint8_t>> ToBytesImpl<TypeId::kUuid>(const Literal::Value& value) {
75+
const auto& uuid = std::get<Uuid>(value);
76+
return std::vector<uint8_t>(uuid.bytes().begin(), uuid.bytes().end());
77+
}
78+
7279
template <>
7380
Result<std::vector<uint8_t>> ToBytesImpl<TypeId::kBinary>(const Literal::Value& value) {
7481
return std::get<std::vector<uint8_t>>(value);
@@ -98,9 +105,10 @@ Result<std::vector<uint8_t>> Conversions::ToBytes(const PrimitiveType& type,
98105
DISPATCH_LITERAL_TO_BYTES(TypeId::kDouble)
99106
DISPATCH_LITERAL_TO_BYTES(TypeId::kBoolean)
100107
DISPATCH_LITERAL_TO_BYTES(TypeId::kString)
108+
DISPATCH_LITERAL_TO_BYTES(TypeId::kUuid)
101109
DISPATCH_LITERAL_TO_BYTES(TypeId::kBinary)
102110
DISPATCH_LITERAL_TO_BYTES(TypeId::kFixed)
103-
// TODO(Li Feiyang): Add support for UUID and Decimal
111+
// TODO(Li Feiyang): Add support for Decimal
104112

105113
default:
106114
return NotSupported("Serialization for type {} is not supported", type.ToString());
@@ -172,6 +180,10 @@ Result<Literal::Value> Conversions::FromBytes(const PrimitiveType& type,
172180
case TypeId::kString:
173181
return Literal::Value{
174182
std::string(reinterpret_cast<const char*>(data.data()), data.size())};
183+
case TypeId::kUuid: {
184+
ICEBERG_ASSIGN_OR_RAISE(auto uuid, Uuid::FromBytes(data));
185+
return Literal::Value{uuid};
186+
}
175187
case TypeId::kBinary:
176188
return Literal::Value{std::vector<uint8_t>(data.begin(), data.end())};
177189
case TypeId::kFixed: {
@@ -182,7 +194,7 @@ Result<Literal::Value> Conversions::FromBytes(const PrimitiveType& type,
182194
}
183195
return Literal::Value{std::vector<uint8_t>(data.begin(), data.end())};
184196
}
185-
// TODO(Li Feiyang): Add support for UUID and Decimal
197+
// TODO(Li Feiyang): Add support for Decimal
186198
default:
187199
return NotSupported("Deserialization for type {} is not supported",
188200
type.ToString());

0 commit comments

Comments
 (0)