Skip to content

Commit a13eed2

Browse files
authored
feat: implement Literal Transform (#156)
1 parent 8ecee31 commit a13eed2

File tree

11 files changed

+772
-111
lines changed

11 files changed

+772
-111
lines changed

.github/workflows/cpp-linter.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ jobs:
4747
with:
4848
style: file
4949
tidy-checks: ''
50-
version: 19
50+
version: 22
5151
files-changed-only: true
5252
lines-changed-only: true
5353
thread-comments: true

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ repos:
3030
- id: check-added-large-files
3131

3232
- repo: https://github.com/pre-commit/mirrors-clang-format
33-
rev: v19.1.5
33+
rev: v20.1.8
3434
hooks:
3535
- id: clang-format
3636
exclude: ^test/resources/.*\.json$

src/iceberg/expression/literal.cc

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121

2222
#include <cmath>
2323
#include <concepts>
24-
#include <sstream>
2524

2625
#include "iceberg/exception.h"
2726

@@ -126,22 +125,28 @@ Literal::Literal(Value value, std::shared_ptr<PrimitiveType> type)
126125
: value_(std::move(value)), type_(std::move(type)) {}
127126

128127
// Factory methods
129-
Literal Literal::Boolean(bool value) { return {Value{value}, iceberg::boolean()}; }
128+
Literal Literal::Boolean(bool value) { return {Value{value}, boolean()}; }
130129

131-
Literal Literal::Int(int32_t value) { return {Value{value}, iceberg::int32()}; }
130+
Literal Literal::Int(int32_t value) { return {Value{value}, int32()}; }
132131

133-
Literal Literal::Long(int64_t value) { return {Value{value}, iceberg::int64()}; }
132+
Literal Literal::Date(int32_t value) { return {Value{value}, date()}; }
134133

135-
Literal Literal::Float(float value) { return {Value{value}, iceberg::float32()}; }
134+
Literal Literal::Long(int64_t value) { return {Value{value}, int64()}; }
136135

137-
Literal Literal::Double(double value) { return {Value{value}, iceberg::float64()}; }
136+
Literal Literal::Time(int64_t value) { return {Value{value}, time()}; }
138137

139-
Literal Literal::String(std::string value) {
140-
return {Value{std::move(value)}, iceberg::string()};
141-
}
138+
Literal Literal::Timestamp(int64_t value) { return {Value{value}, timestamp()}; }
139+
140+
Literal Literal::TimestampTz(int64_t value) { return {Value{value}, timestamp_tz()}; }
141+
142+
Literal Literal::Float(float value) { return {Value{value}, float32()}; }
143+
144+
Literal Literal::Double(double value) { return {Value{value}, float64()}; }
145+
146+
Literal Literal::String(std::string value) { return {Value{std::move(value)}, string()}; }
142147

143148
Literal Literal::Binary(std::vector<uint8_t> value) {
144-
return {Value{std::move(value)}, iceberg::binary()};
149+
return {Value{std::move(value)}, binary()};
145150
}
146151

147152
Result<Literal> Literal::Deserialize(std::span<const uint8_t> data,
@@ -188,8 +193,9 @@ std::partial_ordering Literal::operator<=>(const Literal& other) const {
188193
return std::partial_ordering::unordered;
189194
}
190195

191-
// If either value is AboveMax or BelowMin, comparison is unordered
192-
if (IsAboveMax() || IsBelowMin() || other.IsAboveMax() || other.IsBelowMin()) {
196+
// If either value is AboveMax, BelowMin or null, comparison is unordered
197+
if (IsAboveMax() || IsBelowMin() || other.IsAboveMax() || other.IsBelowMin() ||
198+
IsNull() || other.IsNull()) {
193199
return std::partial_ordering::unordered;
194200
}
195201

@@ -202,13 +208,16 @@ std::partial_ordering Literal::operator<=>(const Literal& other) const {
202208
return this_val ? std::partial_ordering::greater : std::partial_ordering::less;
203209
}
204210

205-
case TypeId::kInt: {
211+
case TypeId::kInt:
212+
case TypeId::kDate: {
206213
auto this_val = std::get<int32_t>(value_);
207214
auto other_val = std::get<int32_t>(other.value_);
208215
return this_val <=> other_val;
209216
}
210217

211-
case TypeId::kLong: {
218+
case TypeId::kLong:
219+
case TypeId::kTimestamp:
220+
case TypeId::kTimestampTz: {
212221
auto this_val = std::get<int64_t>(value_);
213222
auto other_val = std::get<int64_t>(other.value_);
214223
return this_val <=> other_val;
@@ -253,6 +262,9 @@ std::string Literal::ToString() const {
253262
if (std::holds_alternative<AboveMax>(value_)) {
254263
return "aboveMax";
255264
}
265+
if (std::holds_alternative<std::monostate>(value_)) {
266+
return "null";
267+
}
256268

257269
switch (type_->type_id()) {
258270
case TypeId::kBoolean: {
@@ -301,6 +313,8 @@ bool Literal::IsBelowMin() const { return std::holds_alternative<BelowMin>(value
301313

302314
bool Literal::IsAboveMax() const { return std::holds_alternative<AboveMax>(value_); }
303315

316+
bool Literal::IsNull() const { return std::holds_alternative<std::monostate>(value_); }
317+
304318
// LiteralCaster implementation
305319

306320
Result<Literal> LiteralCaster::CastTo(const Literal& literal,
@@ -312,7 +326,8 @@ Result<Literal> LiteralCaster::CastTo(const Literal& literal,
312326

313327
// Handle special values
314328
if (std::holds_alternative<Literal::BelowMin>(literal.value_) ||
315-
std::holds_alternative<Literal::AboveMax>(literal.value_)) {
329+
std::holds_alternative<Literal::AboveMax>(literal.value_) ||
330+
std::holds_alternative<std::monostate>(literal.value_)) {
316331
// Cannot cast type for special values
317332
return NotSupported("Cannot cast type for {}", literal.ToString());
318333
}

src/iceberg/expression/literal.h

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ namespace iceberg {
3232

3333
/// \brief Literal is a literal value that is associated with a primitive type.
3434
class ICEBERG_EXPORT Literal {
35-
private:
35+
public:
3636
/// \brief Sentinel value to indicate that the literal value is below the valid range
3737
/// of a specific primitive type. It can happen when casting a literal to a narrower
3838
/// primitive type.
@@ -48,27 +48,35 @@ class ICEBERG_EXPORT Literal {
4848
bool operator==(const AboveMax&) const = default;
4949
std::strong_ordering operator<=>(const AboveMax&) const = default;
5050
};
51-
52-
using Value = std::variant<bool, // for boolean
53-
int32_t, // for int, date
54-
int64_t, // for long, timestamp, timestamp_tz, time
55-
float, // for float
56-
double, // for double
57-
std::string, // for string
51+
using Value = std::variant<std::monostate, // for null
52+
bool, // for boolean
53+
int32_t, // for int, date
54+
int64_t, // for long, timestamp, timestamp_tz, time
55+
float, // for float
56+
double, // for double
57+
std::string, // for string
5858
std::vector<uint8_t>, // for binary, fixed
5959
std::array<uint8_t, 16>, // for uuid and decimal
6060
BelowMin, AboveMax>;
6161

62-
public:
6362
/// \brief Factory methods for primitive types
6463
static Literal Boolean(bool value);
6564
static Literal Int(int32_t value);
65+
static Literal Date(int32_t value);
6666
static Literal Long(int64_t value);
67+
static Literal Time(int64_t value);
68+
static Literal Timestamp(int64_t value);
69+
static Literal TimestampTz(int64_t value);
6770
static Literal Float(float value);
6871
static Literal Double(double value);
6972
static Literal String(std::string value);
7073
static Literal Binary(std::vector<uint8_t> value);
7174

75+
/// \brief Create a literal representing a null value.
76+
static Literal Null(std::shared_ptr<PrimitiveType> type) {
77+
return {Value{std::monostate{}}, std::move(type)};
78+
}
79+
7280
/// \brief Restore a literal from single-value serialization.
7381
///
7482
/// See [this spec](https://iceberg.apache.org/spec/#binary-single-value-serialization)
@@ -85,6 +93,9 @@ class ICEBERG_EXPORT Literal {
8593
/// \brief Get the literal type.
8694
const std::shared_ptr<PrimitiveType>& type() const;
8795

96+
/// \brief Get the literal value.
97+
const Value& value() const { return value_; }
98+
8899
/// \brief Converts this literal to a literal of the given type.
89100
///
90101
/// When a predicate is bound to a concrete data column, literals are converted to match
@@ -123,6 +134,10 @@ class ICEBERG_EXPORT Literal {
123134
/// \return true if this literal represents a BelowMin value, false otherwise
124135
bool IsBelowMin() const;
125136

137+
/// Check if this literal is null.
138+
/// \return true if this literal is null, false otherwise
139+
bool IsNull() const;
140+
126141
std::string ToString() const;
127142

128143
private:

src/iceberg/transform.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
#include <memory>
2626
#include <variant>
2727

28-
#include "iceberg/arrow_c_data.h"
28+
#include "iceberg/expression/literal.h"
2929
#include "iceberg/iceberg_export.h"
3030
#include "iceberg/result.h"
3131
#include "iceberg/type_fwd.h"
@@ -170,14 +170,16 @@ class ICEBERG_EXPORT TransformFunction {
170170
public:
171171
virtual ~TransformFunction() = default;
172172
TransformFunction(TransformType transform_type, std::shared_ptr<Type> source_type);
173-
/// \brief Transform an input array to a new array
174-
virtual Result<ArrowArray> Transform(const ArrowArray& data) = 0;
173+
/// \brief Transform an input Literal to a new Literal
174+
///
175+
/// All transforms must return null for a null input value.
176+
virtual Result<Literal> Transform(const Literal& literal) = 0;
175177
/// \brief Get the transform type
176178
TransformType transform_type() const;
177179
/// \brief Get the source type of transform function
178180
const std::shared_ptr<Type>& source_type() const;
179181
/// \brief Get the result type of transform function
180-
virtual Result<std::shared_ptr<Type>> ResultType() const = 0;
182+
virtual std::shared_ptr<Type> ResultType() const = 0;
181183

182184
friend bool operator==(const TransformFunction& lhs, const TransformFunction& rhs) {
183185
return lhs.Equals(rhs);

0 commit comments

Comments
 (0)