Skip to content

Commit 046f149

Browse files
authored
feat: implement literal expressions with binary serialization support (#185)
## Summary Implements binary serialization and deserialization support for Literal values, enabling conversion between Literal objects and binary representations. Adds comprehensive formatting support for date, time, and timestamp types. ## Changes - Added `Conversions` utility class (`src/iceberg/util/conversions.cc/h`) with `ToBytes()` and `FromBytes()` methods for `Literal` binary serialization/deserialization - Added literal formatting utilities (`src/iceberg/util/literal_format.cc/h`) for `date`, `time`, `timestamp`, and `timestamptz` formatting - Implemented `Literal` serialization methods: Replaced placeholder implementations of `Serialize()` and `Deserialize()` with full functionality - Enhanced `Literal::ToString()`: Added support for `date`, `time`, `timestamp`, and `timestamptz` types - Added `TypeId` string conversion: Implemented `ToString(TypeId)` utility function for type name lookups - Updated CMake configuration: Added new util source files to build system ## Test Plan - Comprehensive binary round-trip tests for all primitive types (boolean, int, long, float, double, string, binary) - Serialization correctness tests verify exact byte representations match expected formats - Date/time formatting tests ensure proper ISO 8601 compatible string output - Modify existing test(e.g. manifest_reader_test.cc) to use binary serialization.
1 parent 3669b05 commit 046f149

File tree

10 files changed

+684
-69
lines changed

10 files changed

+684
-69
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,11 @@ set(ICEBERG_SOURCES
5555
manifest_reader_internal.cc
5656
manifest_writer.cc
5757
arrow_c_data_guard_internal.cc
58+
util/conversions.cc
5859
util/decimal.cc
60+
util/gzip_internal.cc
5961
util/murmurhash3_internal.cc
6062
util/timepoint.cc
61-
util/gzip_internal.cc
6263
util/uuid.cc)
6364

6465
set(ICEBERG_STATIC_BUILD_INTERFACE_LIBS)

src/iceberg/expression/literal.cc

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
#include <concepts>
2424

2525
#include "iceberg/exception.h"
26+
#include "iceberg/util/conversions.h"
27+
#include "iceberg/util/macros.h"
2628

2729
namespace iceberg {
2830

@@ -149,13 +151,18 @@ Literal Literal::Binary(std::vector<uint8_t> value) {
149151
return {Value{std::move(value)}, binary()};
150152
}
151153

154+
Literal Literal::Fixed(std::vector<uint8_t> value) {
155+
auto length = static_cast<int32_t>(value.size());
156+
return {Value{std::move(value)}, fixed(length)};
157+
}
158+
152159
Result<Literal> Literal::Deserialize(std::span<const uint8_t> data,
153160
std::shared_ptr<PrimitiveType> type) {
154-
return NotImplemented("Deserialization of Literal is not implemented yet");
161+
return Conversions::FromBytes(std::move(type), data);
155162
}
156163

157164
Result<std::vector<uint8_t>> Literal::Serialize() const {
158-
return NotImplemented("Serialization of Literal is not implemented yet");
165+
return Conversions::ToBytes(*this);
159166
}
160167

161168
// Getters
@@ -189,7 +196,7 @@ bool Literal::operator==(const Literal& other) const { return (*this <=> other)
189196
// Three-way comparison operator
190197
std::partial_ordering Literal::operator<=>(const Literal& other) const {
191198
// If types are different, comparison is unordered
192-
if (type_->type_id() != other.type_->type_id()) {
199+
if (*type_ != *other.type_) {
193200
return std::partial_ordering::unordered;
194201
}
195202

@@ -216,6 +223,7 @@ std::partial_ordering Literal::operator<=>(const Literal& other) const {
216223
}
217224

218225
case TypeId::kLong:
226+
case TypeId::kTime:
219227
case TypeId::kTimestamp:
220228
case TypeId::kTimestampTz: {
221229
auto this_val = std::get<int64_t>(value_);
@@ -249,6 +257,12 @@ std::partial_ordering Literal::operator<=>(const Literal& other) const {
249257
return this_val <=> other_val;
250258
}
251259

260+
case TypeId::kFixed: {
261+
auto& this_val = std::get<std::vector<uint8_t>>(value_);
262+
auto& other_val = std::get<std::vector<uint8_t>>(other.value_);
263+
return this_val <=> other_val;
264+
}
265+
252266
default:
253267
// For unsupported types, return unordered
254268
return std::partial_ordering::unordered;
@@ -294,9 +308,17 @@ std::string Literal::ToString() const {
294308
}
295309
return result;
296310
}
311+
case TypeId::kFixed: {
312+
const auto& fixed_data = std::get<std::vector<uint8_t>>(value_);
313+
std::string result;
314+
result.reserve(fixed_data.size() * 2); // 2 chars per byte
315+
for (const auto& byte : fixed_data) {
316+
std::format_to(std::back_inserter(result), "{:02X}", byte);
317+
}
318+
return result;
319+
}
297320
case TypeId::kDecimal:
298321
case TypeId::kUuid:
299-
case TypeId::kFixed:
300322
case TypeId::kDate:
301323
case TypeId::kTime:
302324
case TypeId::kTimestamp:

src/iceberg/expression/literal.h

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ class ICEBERG_EXPORT Literal : public util::Formattable {
7272
static Literal Double(double value);
7373
static Literal String(std::string value);
7474
static Literal Binary(std::vector<uint8_t> value);
75+
static Literal Fixed(std::vector<uint8_t> value);
7576

7677
/// \brief Create a literal representing a null value.
7778
static Literal Null(std::shared_ptr<PrimitiveType> type) {
@@ -144,11 +145,76 @@ class ICEBERG_EXPORT Literal : public util::Formattable {
144145
private:
145146
Literal(Value value, std::shared_ptr<PrimitiveType> type);
146147

148+
friend class Conversions;
147149
friend class LiteralCaster;
148150

149-
private:
150151
Value value_;
151152
std::shared_ptr<PrimitiveType> type_;
152153
};
153154

155+
template <TypeId type_id>
156+
struct LiteralTraits {
157+
using ValueType = void;
158+
};
159+
160+
template <>
161+
struct LiteralTraits<TypeId::kBoolean> {
162+
using ValueType = bool;
163+
};
164+
165+
template <>
166+
struct LiteralTraits<TypeId::kInt> {
167+
using ValueType = int32_t;
168+
};
169+
170+
template <>
171+
struct LiteralTraits<TypeId::kDate> {
172+
using ValueType = int32_t;
173+
};
174+
175+
template <>
176+
struct LiteralTraits<TypeId::kLong> {
177+
using ValueType = int64_t;
178+
};
179+
180+
template <>
181+
struct LiteralTraits<TypeId::kTime> {
182+
using ValueType = int64_t;
183+
};
184+
185+
template <>
186+
struct LiteralTraits<TypeId::kTimestamp> {
187+
using ValueType = int64_t;
188+
};
189+
190+
template <>
191+
struct LiteralTraits<TypeId::kTimestampTz> {
192+
using ValueType = int64_t;
193+
};
194+
195+
template <>
196+
struct LiteralTraits<TypeId::kFloat> {
197+
using ValueType = float;
198+
};
199+
200+
template <>
201+
struct LiteralTraits<TypeId::kDouble> {
202+
using ValueType = double;
203+
};
204+
205+
template <>
206+
struct LiteralTraits<TypeId::kString> {
207+
using ValueType = std::string;
208+
};
209+
210+
template <>
211+
struct LiteralTraits<TypeId::kBinary> {
212+
using ValueType = std::vector<uint8_t>;
213+
};
214+
215+
template <>
216+
struct LiteralTraits<TypeId::kFixed> {
217+
using ValueType = std::vector<uint8_t>;
218+
};
219+
154220
} // namespace iceberg

0 commit comments

Comments
 (0)