Skip to content

Commit c6b8cc8

Browse files
committed
feat: Implement Type Casting and toString for Literals
1 parent 57418d4 commit c6b8cc8

File tree

3 files changed

+486
-19
lines changed

3 files changed

+486
-19
lines changed

src/iceberg/expression/literal.cc

Lines changed: 211 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,16 @@
2626

2727
namespace iceberg {
2828

29+
namespace {
30+
31+
constexpr int64_t kMicrosPerDay = 86400000000LL; // 24 * 60 * 60 * 1000 * 1000
32+
33+
int32_t MicrosToDays(int64_t micros) {
34+
return static_cast<int32_t>(std::floor(static_cast<double>(micros) / kMicrosPerDay));
35+
}
36+
37+
} // namespace
38+
2939
/// \brief LiteralCaster handles type casting operations for Literal.
3040
/// This is an internal implementation class.
3141
class LiteralCaster {
@@ -52,6 +62,30 @@ class LiteralCaster {
5262
/// Cast from Float type to target type.
5363
static Result<Literal> CastFromFloat(const Literal& literal,
5464
const std::shared_ptr<PrimitiveType>& target_type);
65+
66+
/// Cast from Double type to target type.
67+
static Result<Literal> CastFromDouble(
68+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type);
69+
70+
/// Cast from String type to target type.
71+
static Result<Literal> CastFromString(
72+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type);
73+
74+
/// Cast from Timestamp type to target type.
75+
static Result<Literal> CastFromTimestamp(
76+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type);
77+
78+
/// Cast from TimestampTz type to target type.
79+
static Result<Literal> CastFromTimestampTz(
80+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type);
81+
82+
/// Cast from Binary type to target type.
83+
static Result<Literal> CastFromBinary(
84+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type);
85+
86+
/// Cast from Fixed type to target type.
87+
static Result<Literal> CastFromFixed(const Literal& literal,
88+
const std::shared_ptr<PrimitiveType>& target_type);
5589
};
5690

5791
Literal LiteralCaster::BelowMinLiteral(std::shared_ptr<PrimitiveType> type) {
@@ -74,6 +108,8 @@ Result<Literal> LiteralCaster::CastFromInt(
74108
return Literal::Float(static_cast<float>(int_val));
75109
case TypeId::kDouble:
76110
return Literal::Double(static_cast<double>(int_val));
111+
case TypeId::kDate:
112+
return Literal::Date(int_val);
77113
default:
78114
return NotSupported("Cast from Int to {} is not implemented",
79115
target_type->ToString());
@@ -83,15 +119,14 @@ Result<Literal> LiteralCaster::CastFromInt(
83119
Result<Literal> LiteralCaster::CastFromLong(
84120
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) {
85121
auto long_val = std::get<int64_t>(literal.value_);
86-
auto target_type_id = target_type->type_id();
87122

88-
switch (target_type_id) {
123+
switch (target_type->type_id()) {
89124
case TypeId::kInt: {
90125
// Check for overflow
91-
if (long_val >= std::numeric_limits<int32_t>::max()) {
126+
if (long_val > std::numeric_limits<int32_t>::max()) {
92127
return AboveMaxLiteral(target_type);
93128
}
94-
if (long_val <= std::numeric_limits<int32_t>::min()) {
129+
if (long_val < std::numeric_limits<int32_t>::min()) {
95130
return BelowMinLiteral(target_type);
96131
}
97132
return Literal::Int(static_cast<int32_t>(long_val));
@@ -100,6 +135,21 @@ Result<Literal> LiteralCaster::CastFromLong(
100135
return Literal::Float(static_cast<float>(long_val));
101136
case TypeId::kDouble:
102137
return Literal::Double(static_cast<double>(long_val));
138+
case TypeId::kDate: {
139+
if (long_val > static_cast<int64_t>(std::numeric_limits<int32_t>::max())) {
140+
return AboveMaxLiteral(target_type);
141+
}
142+
if (long_val < static_cast<int64_t>(std::numeric_limits<int32_t>::min())) {
143+
return BelowMinLiteral(target_type);
144+
}
145+
return Literal::Date(static_cast<int32_t>(long_val));
146+
}
147+
case TypeId::kTime:
148+
return Literal::Time(long_val);
149+
case TypeId::kTimestamp:
150+
return Literal::Timestamp(long_val);
151+
case TypeId::kTimestampTz:
152+
return Literal::TimestampTz(long_val);
103153
default:
104154
return NotSupported("Cast from Long to {} is not supported",
105155
target_type->ToString());
@@ -109,9 +159,8 @@ Result<Literal> LiteralCaster::CastFromLong(
109159
Result<Literal> LiteralCaster::CastFromFloat(
110160
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) {
111161
auto float_val = std::get<float>(literal.value_);
112-
auto target_type_id = target_type->type_id();
113162

114-
switch (target_type_id) {
163+
switch (target_type->type_id()) {
115164
case TypeId::kDouble:
116165
return Literal::Double(static_cast<double>(float_val));
117166
default:
@@ -120,6 +169,131 @@ Result<Literal> LiteralCaster::CastFromFloat(
120169
}
121170
}
122171

172+
Result<Literal> LiteralCaster::CastFromDouble(
173+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) {
174+
auto double_val = std::get<double>(literal.value_);
175+
176+
switch (target_type->type_id()) {
177+
case TypeId::kFloat: {
178+
if (double_val > static_cast<double>(std::numeric_limits<float>::max())) {
179+
return AboveMaxLiteral(target_type);
180+
}
181+
if (double_val < -static_cast<double>(std::numeric_limits<float>::max())) {
182+
return BelowMinLiteral(target_type);
183+
}
184+
return Literal::Float(static_cast<float>(double_val));
185+
}
186+
default:
187+
return NotSupported("Cast from Double to {} is not supported",
188+
target_type->ToString());
189+
}
190+
}
191+
192+
Result<Literal> LiteralCaster::CastFromString(
193+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) {
194+
switch (target_type->type_id()) {
195+
case TypeId::kDate: {
196+
// TODO(Li Feiyang): Implement parsing for "YYYY-MM-DD" using std::chrono::parse
197+
// once it becomes available in the target libc++.
198+
return NotImplemented("Cast from String to Date is not yet implemented.");
199+
}
200+
201+
case TypeId::kTime: {
202+
// TODO(Li Feiyang): Implement parsing for "HH:MM:SS.ffffff" using
203+
// std::chrono::parse once it becomes available in the target libc++.
204+
return NotImplemented("Cast from String to Time is not yet implemented.");
205+
}
206+
207+
case TypeId::kTimestamp: {
208+
// TODO(Li Feiyang): Implement parsing for "YYYY-MM-DDTHH:MM:SS.ffffff" using
209+
// std::chrono::parse once it becomes available in the target libc++.
210+
return NotImplemented("Cast from String to Timestamp is not yet implemented.");
211+
}
212+
213+
case TypeId::kTimestampTz: {
214+
// TODO(Li Feiyang): Implement parsing for "YYYY-MM-DDTHH:MM:SS.ffffffZ" using
215+
// std::chrono::parse once it becomes available in the target libc++.
216+
return NotImplemented("Cast from String to TimestampTz is not yet implemented.");
217+
}
218+
219+
default:
220+
return NotSupported("Cast from String to {} is not supported",
221+
target_type->ToString());
222+
}
223+
}
224+
225+
Result<Literal> LiteralCaster::CastFromTimestamp(
226+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) {
227+
auto timestamp_val = std::get<int64_t>(literal.value_);
228+
229+
switch (target_type->type_id()) {
230+
case TypeId::kDate:
231+
return Literal::Date(MicrosToDays(timestamp_val));
232+
case TypeId::kTimestampTz:
233+
return Literal::TimestampTz(timestamp_val);
234+
default:
235+
return NotSupported("Cast from Timestamp to {} is not supported",
236+
target_type->ToString());
237+
}
238+
}
239+
240+
Result<Literal> LiteralCaster::CastFromTimestampTz(
241+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) {
242+
auto micros = std::get<int64_t>(literal.value_);
243+
244+
switch (target_type->type_id()) {
245+
case TypeId::kDate: {
246+
return Literal::Date(MicrosToDays(micros));
247+
}
248+
case TypeId::kTimestamp: {
249+
return Literal::Timestamp(micros);
250+
}
251+
default:
252+
return NotSupported("Cast from TimestampTz to {} is not supported",
253+
target_type->ToString());
254+
}
255+
}
256+
257+
Result<Literal> LiteralCaster::CastFromBinary(
258+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) {
259+
auto binary_val = std::get<std::vector<uint8_t>>(literal.value_);
260+
switch (target_type->type_id()) {
261+
case TypeId::kFixed: {
262+
auto target_fixed_type = std::dynamic_pointer_cast<FixedType>(target_type);
263+
if (binary_val.size() == target_fixed_type->length()) {
264+
return Literal::Fixed(binary_val);
265+
}
266+
return NotSupported("Cannot cast Binary with length {} to Fixed({})",
267+
binary_val.size(), target_fixed_type->length());
268+
}
269+
default:
270+
return NotSupported("Cast from Binary to {} is not supported",
271+
target_type->ToString());
272+
}
273+
}
274+
275+
Result<Literal> LiteralCaster::CastFromFixed(
276+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) {
277+
const auto& fixed_val = std::get<std::vector<uint8_t>>(literal.value_);
278+
279+
switch (target_type->type_id()) {
280+
case TypeId::kBinary: {
281+
return Literal::Binary(fixed_val);
282+
}
283+
case TypeId::kFixed: {
284+
auto target_fixed_type = std::dynamic_pointer_cast<FixedType>(target_type);
285+
if (fixed_val.size() == target_fixed_type->length()) {
286+
return literal;
287+
}
288+
return NotSupported("Cannot cast Fixed({}) to Fixed({}) due to mismatched lengths",
289+
fixed_val.size(), target_fixed_type->length());
290+
}
291+
default:
292+
return NotSupported("Cast from Fixed to {} is not supported",
293+
target_type->ToString());
294+
}
295+
}
296+
123297
// Constructor
124298
Literal::Literal(Value value, std::shared_ptr<PrimitiveType> type)
125299
: value_(std::move(value)), type_(std::move(type)) {}
@@ -149,6 +323,11 @@ Literal Literal::Binary(std::vector<uint8_t> value) {
149323
return {Value{std::move(value)}, binary()};
150324
}
151325

326+
Literal Literal::Fixed(std::vector<uint8_t> value) {
327+
const auto size = value.size();
328+
return {Value{std::move(value)}, fixed(size)};
329+
}
330+
152331
Result<Literal> Literal::Deserialize(std::span<const uint8_t> data,
153332
std::shared_ptr<PrimitiveType> type) {
154333
return NotImplemented("Deserialization of Literal is not implemented yet");
@@ -216,6 +395,7 @@ std::partial_ordering Literal::operator<=>(const Literal& other) const {
216395
}
217396

218397
case TypeId::kLong:
398+
case TypeId::kTime:
219399
case TypeId::kTimestamp:
220400
case TypeId::kTimestampTz: {
221401
auto this_val = std::get<int64_t>(value_);
@@ -243,7 +423,8 @@ std::partial_ordering Literal::operator<=>(const Literal& other) const {
243423
return this_val <=> other_val;
244424
}
245425

246-
case TypeId::kBinary: {
426+
case TypeId::kBinary:
427+
case TypeId::kFixed: {
247428
auto& this_val = std::get<std::vector<uint8_t>>(value_);
248429
auto& other_val = std::get<std::vector<uint8_t>>(other.value_);
249430
return this_val <=> other_val;
@@ -285,22 +466,28 @@ std::string Literal::ToString() const {
285466
case TypeId::kString: {
286467
return std::get<std::string>(value_);
287468
}
288-
case TypeId::kBinary: {
469+
case TypeId::kBinary:
470+
case TypeId::kFixed: {
289471
const auto& binary_data = std::get<std::vector<uint8_t>>(value_);
290-
std::string result;
291-
result.reserve(binary_data.size() * 2); // 2 chars per byte
472+
std::string result = "X'";
473+
result.reserve(2 + binary_data.size() * 2 +
474+
1); // 2 chars per byte and 2 + 1 for prefix and suffix
292475
for (const auto& byte : binary_data) {
293476
std::format_to(std::back_inserter(result), "{:02X}", byte);
294477
}
478+
result.push_back('\'');
295479
return result;
296480
}
297-
case TypeId::kDecimal:
298-
case TypeId::kUuid:
299-
case TypeId::kFixed:
300-
case TypeId::kDate:
301481
case TypeId::kTime:
302482
case TypeId::kTimestamp:
303483
case TypeId::kTimestampTz: {
484+
return std::to_string(std::get<int64_t>(value_));
485+
}
486+
case TypeId::kDate: {
487+
return std::to_string(std::get<int32_t>(value_));
488+
}
489+
case TypeId::kDecimal:
490+
case TypeId::kUuid: {
304491
throw IcebergError("Not implemented: ToString for " + type_->ToString());
305492
}
306493
default: {
@@ -343,10 +530,18 @@ Result<Literal> LiteralCaster::CastTo(const Literal& literal,
343530
case TypeId::kFloat:
344531
return CastFromFloat(literal, target_type);
345532
case TypeId::kDouble:
346-
case TypeId::kBoolean:
533+
return CastFromDouble(literal, target_type);
347534
case TypeId::kString:
535+
return CastFromString(literal, target_type);
348536
case TypeId::kBinary:
349-
break;
537+
return CastFromBinary(literal, target_type);
538+
case TypeId::kFixed:
539+
return CastFromFixed(literal, target_type);
540+
case TypeId::kTimestamp:
541+
return CastFromTimestamp(literal, target_type);
542+
case TypeId::kTimestampTz:
543+
return CastFromTimestampTz(literal, target_type);
544+
case TypeId::kBoolean:
350545
default:
351546
break;
352547
}

src/iceberg/expression/literal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ class ICEBERG_EXPORT Literal {
7171
static Literal Double(double value);
7272
static Literal String(std::string value);
7373
static Literal Binary(std::vector<uint8_t> value);
74+
static Literal Fixed(std::vector<uint8_t> value);
7475

7576
/// \brief Create a literal representing a null value.
7677
static Literal Null(std::shared_ptr<PrimitiveType> type) {

0 commit comments

Comments
 (0)