Skip to content

Commit 8c685e8

Browse files
committed
fix: get rid of NullType
1 parent 47da236 commit 8c685e8

File tree

12 files changed

+133
-87
lines changed

12 files changed

+133
-87
lines changed

src/iceberg/expression/literal.cc

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,6 @@ Literal::Literal(Value value, std::shared_ptr<PrimitiveType> type)
126126
: value_(std::move(value)), type_(std::move(type)) {}
127127

128128
// Factory methods
129-
Literal Literal::Null() { return {Value{std::monostate{}}, iceberg::null()}; }
130129

131130
Literal Literal::Boolean(bool value) { return {Value{value}, iceberg::boolean()}; }
132131

@@ -200,16 +199,14 @@ std::partial_ordering Literal::operator<=>(const Literal& other) const {
200199
return std::partial_ordering::unordered;
201200
}
202201

203-
// If either value is AboveMax or BelowMin, comparison is unordered
204-
if (IsAboveMax() || IsBelowMin() || other.IsAboveMax() || other.IsBelowMin()) {
202+
// If either value is AboveMax, BelowMin or null, comparison is unordered
203+
if (IsAboveMax() || IsBelowMin() || other.IsAboveMax() || other.IsBelowMin() ||
204+
IsNull() || other.IsNull()) {
205205
return std::partial_ordering::unordered;
206206
}
207207

208208
// Same type comparison for normal values
209209
switch (type_->type_id()) {
210-
case TypeId::kNull:
211-
// Nulls are equivalent
212-
return std::partial_ordering::equivalent;
213210
case TypeId::kBoolean: {
214211
auto this_val = std::get<bool>(value_);
215212
auto other_val = std::get<bool>(other.value_);
@@ -271,6 +268,9 @@ std::string Literal::ToString() const {
271268
if (std::holds_alternative<AboveMax>(value_)) {
272269
return "aboveMax";
273270
}
271+
if (std::holds_alternative<std::monostate>(value_)) {
272+
return "null";
273+
}
274274

275275
switch (type_->type_id()) {
276276
case TypeId::kBoolean: {
@@ -319,6 +319,8 @@ bool Literal::IsBelowMin() const { return std::holds_alternative<BelowMin>(value
319319

320320
bool Literal::IsAboveMax() const { return std::holds_alternative<AboveMax>(value_); }
321321

322+
bool Literal::IsNull() const { return std::holds_alternative<std::monostate>(value_); }
323+
322324
// LiteralCaster implementation
323325

324326
Result<Literal> LiteralCaster::CastTo(const Literal& literal,
@@ -330,7 +332,8 @@ Result<Literal> LiteralCaster::CastTo(const Literal& literal,
330332

331333
// Handle special values
332334
if (std::holds_alternative<Literal::BelowMin>(literal.value_) ||
333-
std::holds_alternative<Literal::AboveMax>(literal.value_)) {
335+
std::holds_alternative<Literal::AboveMax>(literal.value_) ||
336+
std::holds_alternative<std::monostate>(literal.value_)) {
334337
// Cannot cast type for special values
335338
return NotSupported("Cannot cast type for {}", literal.ToString());
336339
}

src/iceberg/expression/literal.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ class ICEBERG_EXPORT Literal {
6060
BelowMin, AboveMax>;
6161

6262
/// \brief Factory methods for primitive types
63-
static Literal Null();
6463
static Literal Boolean(bool value);
6564
static Literal Int(int32_t value);
6665
static Literal Date(int32_t value);
@@ -73,6 +72,11 @@ class ICEBERG_EXPORT Literal {
7372
static Literal String(std::string value);
7473
static Literal Binary(std::vector<uint8_t> value);
7574

75+
/// \brief Create a literal representing a null value.
76+
static Literal Null(std::shared_ptr<PrimitiveType> type) {
77+
return {Value{std::monostate{}}, std::move(type)};
78+
}
79+
7680
/// \brief Restore a literal from single-value serialization.
7781
///
7882
/// See [this spec](https://iceberg.apache.org/spec/#binary-single-value-serialization)
@@ -130,6 +134,10 @@ class ICEBERG_EXPORT Literal {
130134
/// \return true if this literal represents a BelowMin value, false otherwise
131135
bool IsBelowMin() const;
132136

137+
/// Check if this literal is null.
138+
/// \return true if this literal is null, false otherwise
139+
bool IsNull() const;
140+
133141
std::string ToString() const;
134142

135143
private:

src/iceberg/json_internal.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -477,9 +477,8 @@ nlohmann::json ToJson(const Type& type) {
477477
}
478478
case TypeId::kUuid:
479479
return "uuid";
480-
default:
481-
std::unreachable();
482480
}
481+
std::unreachable();
483482
}
484483

485484
nlohmann::json ToJson(const Schema& schema) {

src/iceberg/schema_internal.cc

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
#include <cstring>
2323
#include <optional>
2424
#include <string>
25-
#include <utility>
2625

2726
#include "iceberg/schema.h"
2827
#include "iceberg/type.h"
@@ -140,8 +139,6 @@ ArrowErrorCode ToArrowSchema(const Type& type, bool optional, std::string_view n
140139
ArrowMetadataBuilderAppend(&metadata_buffer, ArrowCharView(kArrowExtensionName),
141140
ArrowCharView(kArrowUuidExtensionName)));
142141
} break;
143-
default:
144-
std::unreachable();
145142
}
146143

147144
if (!name.empty()) {

src/iceberg/transform.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323

2424
#include <cstdint>
2525
#include <memory>
26-
#include <optional>
2726
#include <variant>
2827

2928
#include "iceberg/expression/literal.h"
@@ -172,13 +171,15 @@ class ICEBERG_EXPORT TransformFunction {
172171
virtual ~TransformFunction() = default;
173172
TransformFunction(TransformType transform_type, std::shared_ptr<Type> source_type);
174173
/// \brief Transform an input Literal to a new Literal
174+
///
175+
/// All transforms must return null for a null input value.
175176
virtual Result<Literal> Transform(const Literal& literal) = 0;
176177
/// \brief Get the transform type
177178
TransformType transform_type() const;
178179
/// \brief Get the source type of transform function
179180
const std::shared_ptr<Type>& source_type() const;
180181
/// \brief Get the result type of transform function
181-
virtual Result<std::shared_ptr<Type>> ResultType() const = 0;
182+
virtual std::shared_ptr<Type> ResultType() const = 0;
182183

183184
friend bool operator==(const TransformFunction& lhs, const TransformFunction& rhs) {
184185
return lhs.Equals(rhs);

src/iceberg/transform_function.cc

Lines changed: 30 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,7 @@ IdentityTransform::IdentityTransform(std::shared_ptr<Type> const& source_type)
3535

3636
Result<Literal> IdentityTransform::Transform(const Literal& literal) { return literal; }
3737

38-
Result<std::shared_ptr<Type>> IdentityTransform::ResultType() const {
39-
return source_type();
40-
}
38+
std::shared_ptr<Type> IdentityTransform::ResultType() const { return source_type(); }
4139

4240
Result<std::unique_ptr<TransformFunction>> IdentityTransform::Make(
4341
std::shared_ptr<Type> const& source_type) {
@@ -59,6 +57,10 @@ Result<Literal> BucketTransform::Transform(const Literal& literal) {
5957
"Cannot apply bucket transform to literal with value {} of type {}",
6058
literal.ToString(), source_type()->ToString());
6159
}
60+
if (literal.IsNull()) {
61+
return Literal::Null(iceberg::int32());
62+
}
63+
6264
int32_t hash_value = 0;
6365
std::visit(
6466
[&](auto&& value) {
@@ -92,9 +94,7 @@ Result<Literal> BucketTransform::Transform(const Literal& literal) {
9294
return Literal::Int(bucket_index);
9395
}
9496

95-
Result<std::shared_ptr<Type>> BucketTransform::ResultType() const {
96-
return iceberg::int32();
97-
}
97+
std::shared_ptr<Type> BucketTransform::ResultType() const { return iceberg::int32(); }
9898

9999
Result<std::unique_ptr<TransformFunction>> BucketTransform::Make(
100100
std::shared_ptr<Type> const& source_type, int32_t num_buckets) {
@@ -135,6 +135,10 @@ Result<Literal> TruncateTransform::Transform(const Literal& literal) {
135135
"Cannot apply truncate transform to literal with value {} of type {}",
136136
literal.ToString(), source_type()->ToString());
137137
}
138+
if (literal.IsNull()) {
139+
// Return null as is
140+
return literal;
141+
}
138142

139143
switch (source_type()->type_id()) {
140144
case TypeId::kInt: {
@@ -183,9 +187,7 @@ Result<Literal> TruncateTransform::Transform(const Literal& literal) {
183187
}
184188
}
185189

186-
Result<std::shared_ptr<Type>> TruncateTransform::ResultType() const {
187-
return source_type();
188-
}
190+
std::shared_ptr<Type> TruncateTransform::ResultType() const { return source_type(); }
189191

190192
Result<std::unique_ptr<TransformFunction>> TruncateTransform::Make(
191193
std::shared_ptr<Type> const& source_type, int32_t width) {
@@ -219,6 +221,9 @@ Result<Literal> YearTransform::Transform(const Literal& literal) {
219221
"Cannot apply year transform to literal with value {} of type {}",
220222
literal.ToString(), source_type()->ToString());
221223
}
224+
if (literal.IsNull()) {
225+
return Literal::Null(iceberg::int32());
226+
}
222227

223228
using namespace std::chrono; // NOLINT
224229
switch (source_type()->type_id()) {
@@ -240,9 +245,7 @@ Result<Literal> YearTransform::Transform(const Literal& literal) {
240245
}
241246
}
242247

243-
Result<std::shared_ptr<Type>> YearTransform::ResultType() const {
244-
return iceberg::int32();
245-
}
248+
std::shared_ptr<Type> YearTransform::ResultType() const { return iceberg::int32(); }
246249

247250
Result<std::unique_ptr<TransformFunction>> YearTransform::Make(
248251
std::shared_ptr<Type> const& source_type) {
@@ -271,6 +274,9 @@ Result<Literal> MonthTransform::Transform(const Literal& literal) {
271274
"Cannot apply month transform to literal with value {} of type {}",
272275
literal.ToString(), source_type()->ToString());
273276
}
277+
if (literal.IsNull()) {
278+
return Literal::Null(iceberg::int32());
279+
}
274280

275281
using namespace std::chrono; // NOLINT
276282
switch (source_type()->type_id()) {
@@ -304,9 +310,7 @@ Result<Literal> MonthTransform::Transform(const Literal& literal) {
304310
}
305311
}
306312

307-
Result<std::shared_ptr<Type>> MonthTransform::ResultType() const {
308-
return iceberg::int32();
309-
}
313+
std::shared_ptr<Type> MonthTransform::ResultType() const { return iceberg::int32(); }
310314

311315
Result<std::unique_ptr<TransformFunction>> MonthTransform::Make(
312316
std::shared_ptr<Type> const& source_type) {
@@ -335,6 +339,9 @@ Result<Literal> DayTransform::Transform(const Literal& literal) {
335339
"Cannot apply day transform to literal with value {} of type {}",
336340
literal.ToString(), source_type()->ToString());
337341
}
342+
if (literal.IsNull()) {
343+
return Literal::Null(iceberg::int32());
344+
}
338345

339346
using namespace std::chrono; // NOLINT
340347
switch (source_type()->type_id()) {
@@ -357,7 +364,7 @@ Result<Literal> DayTransform::Transform(const Literal& literal) {
357364
}
358365
}
359366

360-
Result<std::shared_ptr<Type>> DayTransform::ResultType() const { return iceberg::date(); }
367+
std::shared_ptr<Type> DayTransform::ResultType() const { return iceberg::int32(); }
361368

362369
Result<std::unique_ptr<TransformFunction>> DayTransform::Make(
363370
std::shared_ptr<Type> const& source_type) {
@@ -387,6 +394,10 @@ Result<Literal> HourTransform::Transform(const Literal& literal) {
387394
literal.ToString(), source_type()->ToString());
388395
}
389396

397+
if (literal.IsNull()) {
398+
return Literal::Null(int32());
399+
}
400+
390401
using namespace std::chrono; // NOLINT
391402
switch (source_type()->type_id()) {
392403
case TypeId::kTimestamp:
@@ -405,9 +416,7 @@ Result<Literal> HourTransform::Transform(const Literal& literal) {
405416
}
406417
}
407418

408-
Result<std::shared_ptr<Type>> HourTransform::ResultType() const {
409-
return iceberg::int32();
410-
}
419+
std::shared_ptr<Type> HourTransform::ResultType() const { return iceberg::int32(); }
411420

412421
Result<std::unique_ptr<TransformFunction>> HourTransform::Make(
413422
std::shared_ptr<Type> const& source_type) {
@@ -429,10 +438,10 @@ VoidTransform::VoidTransform(std::shared_ptr<Type> const& source_type)
429438
: TransformFunction(TransformType::kVoid, source_type) {}
430439

431440
Result<Literal> VoidTransform::Transform(const Literal& literal) {
432-
return Literal::Null();
441+
return literal.IsNull() ? literal : Literal::Null(literal.type());
433442
}
434443

435-
Result<std::shared_ptr<Type>> VoidTransform::ResultType() const { return source_type(); }
444+
std::shared_ptr<Type> VoidTransform::ResultType() const { return source_type(); }
436445

437446
Result<std::unique_ptr<TransformFunction>> VoidTransform::Make(
438447
std::shared_ptr<Type> const& source_type) {

src/iceberg/transform_function.h

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ class IdentityTransform : public TransformFunction {
3333
/// \brief Returns the same Literal as the input.
3434
Result<Literal> Transform(const Literal& literal) override;
3535

36-
/// \brief Returns the same type as the source type if it is valid.
37-
Result<std::shared_ptr<Type>> ResultType() const override;
36+
/// \brief Returns the same type as source_type.
37+
std::shared_ptr<Type> ResultType() const override;
3838

3939
/// \brief Create an IdentityTransform.
4040
/// \param source_type Type of the input data.
@@ -54,7 +54,7 @@ class BucketTransform : public TransformFunction {
5454
Result<Literal> Transform(const Literal& literal) override;
5555

5656
/// \brief Returns INT32 as the output type.
57-
Result<std::shared_ptr<Type>> ResultType() const override;
57+
std::shared_ptr<Type> ResultType() const override;
5858

5959
/// \brief Create a BucketTransform.
6060
/// \param source_type Type of the input data.
@@ -78,7 +78,7 @@ class TruncateTransform : public TransformFunction {
7878
Result<Literal> Transform(const Literal& literal) override;
7979

8080
/// \brief Returns the same type as source_type.
81-
Result<std::shared_ptr<Type>> ResultType() const override;
81+
std::shared_ptr<Type> ResultType() const override;
8282

8383
/// \brief Create a TruncateTransform.
8484
/// \param source_type Type of the input data.
@@ -101,7 +101,7 @@ class YearTransform : public TransformFunction {
101101
Result<Literal> Transform(const Literal& literal) override;
102102

103103
/// \brief Returns INT32 as the output type.
104-
Result<std::shared_ptr<Type>> ResultType() const override;
104+
std::shared_ptr<Type> ResultType() const override;
105105

106106
/// \brief Create a YearTransform.
107107
/// \param source_type Type of the input data.
@@ -120,7 +120,7 @@ class MonthTransform : public TransformFunction {
120120
Result<Literal> Transform(const Literal& literal) override;
121121

122122
/// \brief Returns INT32 as the output type.
123-
Result<std::shared_ptr<Type>> ResultType() const override;
123+
std::shared_ptr<Type> ResultType() const override;
124124

125125
/// \brief Create a MonthTransform.
126126
/// \param source_type Type of the input data.
@@ -139,7 +139,7 @@ class DayTransform : public TransformFunction {
139139
Result<Literal> Transform(const Literal& literal) override;
140140

141141
/// \brief Returns INT32 as the output type.
142-
Result<std::shared_ptr<Type>> ResultType() const override;
142+
std::shared_ptr<Type> ResultType() const override;
143143

144144
/// \brief Create a DayTransform.
145145
/// \param source_type Type of the input data.
@@ -158,7 +158,7 @@ class HourTransform : public TransformFunction {
158158
Result<Literal> Transform(const Literal& literal) override;
159159

160160
/// \brief Returns INT32 as the output type.
161-
Result<std::shared_ptr<Type>> ResultType() const override;
161+
std::shared_ptr<Type> ResultType() const override;
162162

163163
/// \brief Create a HourTransform.
164164
/// \param source_type Type of the input data.
@@ -176,8 +176,8 @@ class VoidTransform : public TransformFunction {
176176
/// \brief Returns a null literal.
177177
Result<Literal> Transform(const Literal& literal) override;
178178

179-
/// \brief Returns null type or a sentinel type indicating void.
180-
Result<std::shared_ptr<Type>> ResultType() const override;
179+
/// \brief Returns the same type as source_type.
180+
std::shared_ptr<Type> ResultType() const override;
181181

182182
/// \brief Create a VoidTransform.
183183
/// \param source_type Input type (ignored).

src/iceberg/type.cc

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -194,10 +194,6 @@ bool MapType::Equals(const Type& other) const {
194194
return fields_ == map.fields_;
195195
}
196196

197-
TypeId NullType::type_id() const { return TypeId::kNull; }
198-
std::string NullType::ToString() const { return "null"; }
199-
bool NullType::Equals(const Type& other) const { return other.type_id() == kTypeId; }
200-
201197
TypeId BooleanType::type_id() const { return kTypeId; }
202198
std::string BooleanType::ToString() const { return "boolean"; }
203199
bool BooleanType::Equals(const Type& other) const { return other.type_id() == kTypeId; }
@@ -300,7 +296,6 @@ bool BinaryType::Equals(const Type& other) const { return other.type_id() == kTy
300296
return result; \
301297
}
302298

303-
TYPE_FACTORY(null, NullType)
304299
TYPE_FACTORY(boolean, BooleanType)
305300
TYPE_FACTORY(int32, IntType)
306301
TYPE_FACTORY(int64, LongType)

0 commit comments

Comments
 (0)