Skip to content

Commit 4084db1

Browse files
committed
feat: implement literal expressions with binary serialization support
1 parent 71d5f11 commit 4084db1

File tree

8 files changed

+57
-285
lines changed

8 files changed

+57
-285
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,7 @@ set(ICEBERG_SOURCES
5151
util/murmurhash3_internal.cc
5252
util/timepoint.cc
5353
util/gzip_internal.cc
54-
util/conversions.cc
55-
util/literal_format.cc)
54+
util/conversions.cc)
5655

5756
set(ICEBERG_STATIC_BUILD_INTERFACE_LIBS)
5857
set(ICEBERG_SHARED_BUILD_INTERFACE_LIBS)

src/iceberg/expression/literal.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424

2525
#include "iceberg/exception.h"
2626
#include "iceberg/util/conversions.h"
27-
#include "iceberg/util/literal_format.h"
2827

2928
namespace iceberg {
3029

@@ -153,7 +152,7 @@ Literal Literal::Binary(std::vector<uint8_t> value) {
153152

154153
Result<Literal> Literal::Deserialize(std::span<const uint8_t> data,
155154
std::shared_ptr<PrimitiveType> type) {
156-
return Conversions::FromBytes(type, data);
155+
return Conversions::FromBytes(std::move(type), data);
157156
}
158157

159158
Result<std::vector<uint8_t>> Literal::Serialize() const {

src/iceberg/util/conversions.cc

Lines changed: 26 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,36 @@
1919

2020
#include "iceberg/util/conversions.h"
2121

22-
#include <cctype>
23-
#include <cstring>
24-
#include <ranges>
22+
#include <array>
23+
#include <span>
24+
#include <string>
2525

26-
#include "iceberg/exception.h"
27-
#include "iceberg/type.h"
2826
#include "iceberg/util/endian.h"
2927
#include "iceberg/util/macros.h"
3028

3129
namespace iceberg {
3230

31+
/// \brief Write a value in little-endian format to the buffer.
32+
template <EndianConvertible T>
33+
void WriteLittleEndian(std::vector<uint8_t>& buffer, T value) {
34+
value = ToLittleEndian(value);
35+
const auto* bytes = reinterpret_cast<const uint8_t*>(&value);
36+
buffer.insert(buffer.end(), bytes, bytes + sizeof(T));
37+
}
38+
39+
/// \brief Read a value in little-endian format from the data.
40+
template <EndianConvertible T>
41+
Result<T> ReadLittleEndian(std::span<const uint8_t> data) {
42+
if (data.size() < sizeof(T)) [[unlikely]] {
43+
return InvalidArgument("Insufficient data to read {} bytes, got {}", sizeof(T),
44+
data.size());
45+
}
46+
47+
T value;
48+
std::memcpy(&value, data.data(), sizeof(T));
49+
return FromLittleEndian(value);
50+
}
51+
3352
Result<std::vector<uint8_t>> Conversions::ToBytes(const PrimitiveType& type,
3453
const Literal::Value& value) {
3554
std::vector<uint8_t> result;
@@ -123,13 +142,7 @@ Result<std::vector<uint8_t>> Conversions::ToBytes(const PrimitiveType& type,
123142
}
124143
return result;
125144
}
126-
127-
case TypeId::kUuid: {
128-
// 16-byte big-endian value
129-
const auto& uuid_bytes = std::get<std::array<uint8_t, 16>>(value);
130-
WriteBigEndian16(result, uuid_bytes);
131-
return result;
132-
}
145+
// TODO(Li Feiyang): Add support for UUID and Decimal
133146

134147
default:
135148
return NotSupported("Serialization for type {} is not supported", type.ToString());
@@ -255,27 +268,7 @@ Result<Literal::Value> Conversions::FromBytes(const PrimitiveType& type,
255268
return Literal::Value{std::vector<uint8_t>(data.begin(), data.end())};
256269
}
257270
}
258-
259-
case TypeId::kUuid: {
260-
if (data.size() != 16) {
261-
return InvalidArgument("UUID requires 16 bytes, got {}", data.size());
262-
}
263-
ICEBERG_ASSIGN_OR_RAISE(auto uuid_value, ReadBigEndian16(data));
264-
return Literal::Value{uuid_value};
265-
}
266-
267-
case TypeId::kDecimal: {
268-
if (data.size() > 16) {
269-
return InvalidArgument(
270-
"Decimal data too large, maximum 16 bytes supported, got {}", data.size());
271-
}
272-
273-
std::array<uint8_t, 16> decimal_bytes{};
274-
// Copy data to the end of the array (big-endian format for decimals)
275-
// This handles variable-length decimals by right-aligning them
276-
std::ranges::copy(data, decimal_bytes.end() - data.size());
277-
return Literal::Value{decimal_bytes};
278-
}
271+
// TODO(Li Feiyang): Add support for UUID and Decimal
279272

280273
default:
281274
return NotSupported("Deserialization for type {} is not supported",

src/iceberg/util/conversions.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,12 @@
1919

2020
#pragma once
2121

22+
#include <span>
2223
#include <vector>
2324

2425
#include "iceberg/expression/literal.h"
25-
#include "iceberg/iceberg_export.h"
2626
#include "iceberg/result.h"
27-
#include "iceberg/type.h"
27+
#include "iceberg/type_fwd.h"
2828

2929
namespace iceberg {
3030
class ICEBERG_EXPORT Conversions {

src/iceberg/util/endian.h

Lines changed: 27 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -19,26 +19,25 @@
1919

2020
#pragma once
2121

22-
#include <algorithm>
22+
#include <array>
2323
#include <bit>
24-
#include <cstring>
25-
#include <span>
26-
#include <vector>
27-
28-
#include "iceberg/result.h"
24+
#include <concepts>
2925

3026
/// \file iceberg/util/endian.h
3127
/// \brief Endianness conversion utilities
3228

3329
namespace iceberg {
3430

31+
/// \brief Concept for values that can be written in little-endian format.
32+
template <typename T>
33+
concept EndianConvertible = std::is_arithmetic_v<T>;
34+
35+
/// \brief Concept for values that can be written in big-endian format,
3536
template <typename T>
36-
concept LittleEndianWritable =
37-
std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
38-
std::is_same_v<T, float> || std::is_same_v<T, double> || std::is_same_v<T, uint8_t>;
37+
concept BigEndianWritable = std::same_as<T, std::array<uint8_t, 16>>;
3938

4039
/// \brief Convert a value to little-endian format.
41-
template <LittleEndianWritable T>
40+
template <EndianConvertible T>
4241
T ToLittleEndian(T value) {
4342
if constexpr (std::endian::native != std::endian::little && sizeof(T) > 1) {
4443
return std::byteswap(value);
@@ -47,47 +46,31 @@ T ToLittleEndian(T value) {
4746
}
4847

4948
/// \brief Convert a value from little-endian format.
50-
template <LittleEndianWritable T>
49+
template <EndianConvertible T>
5150
T FromLittleEndian(T value) {
52-
return ToLittleEndian(value);
53-
}
54-
55-
/// \brief Write a value in little-endian format to the buffer.
56-
template <LittleEndianWritable T>
57-
void WriteLittleEndian(std::vector<uint8_t>& buffer, T value) {
58-
T le_value = ToLittleEndian(value);
59-
const auto* bytes = reinterpret_cast<const uint8_t*>(&le_value);
60-
buffer.insert(buffer.end(), bytes, bytes + sizeof(T));
61-
}
62-
63-
/// \brief Read a value in little-endian format from the data.
64-
template <LittleEndianWritable T>
65-
Result<T> ReadLittleEndian(std::span<const uint8_t> data) {
66-
if (data.size() < sizeof(T)) [[unlikely]] {
67-
return InvalidArgument("Insufficient data to read {} bytes, got {}", sizeof(T),
68-
data.size());
51+
if constexpr (std::endian::native != std::endian::little && sizeof(T) > 1) {
52+
return std::byteswap(value);
6953
}
70-
71-
T value;
72-
std::memcpy(&value, data.data(), sizeof(T));
73-
return FromLittleEndian(value);
54+
return value;
7455
}
7556

76-
/// \brief Write a 16-byte value in big-endian format (for UUID and Decimal).
77-
inline void WriteBigEndian16(std::vector<uint8_t>& buffer,
78-
const std::array<uint8_t, 16>& value) {
79-
buffer.insert(buffer.end(), value.begin(), value.end());
57+
template <EndianConvertible T>
58+
constexpr T ToBigEndian(T value) {
59+
if constexpr (std::endian::native == std::endian::big || sizeof(T) <= 1) {
60+
return value;
61+
} else {
62+
return std::byteswap(value);
63+
}
8064
}
8165

82-
/// \brief Read a 16-byte value in big-endian format (for UUID and Decimal).
83-
inline Result<std::array<uint8_t, 16>> ReadBigEndian16(std::span<const uint8_t> data) {
84-
if (data.size() < 16) {
85-
return InvalidArgument("Insufficient data to read 16 bytes, got {}", data.size());
66+
/// \brief Convert a value from big-endian format to native.
67+
template <EndianConvertible T>
68+
constexpr T FromBigEndian(T value) {
69+
if constexpr (std::endian::native == std::endian::big || sizeof(T) <= 1) {
70+
return value;
71+
} else {
72+
return std::byteswap(value);
8673
}
87-
88-
std::array<uint8_t, 16> result;
89-
std::copy(data.begin(), data.begin() + 16, result.begin());
90-
return result;
9174
}
9275

9376
} // namespace iceberg

src/iceberg/util/literal_format.cc

Lines changed: 0 additions & 76 deletions
This file was deleted.

src/iceberg/util/literal_format.h

Lines changed: 0 additions & 43 deletions
This file was deleted.

0 commit comments

Comments
 (0)