Skip to content

Commit e1963ab

Browse files
committed
feat: add full bound predicate support
- Implemented Negate and ToString functions for bound predicates. - Added hash support to Literal. - Refactored BoundSetPredicate to use unordered set for literals. - Refactored predicate unit test to be better organized.
1 parent 6a8aeeb commit e1963ab

File tree

5 files changed

+628
-203
lines changed

5 files changed

+628
-203
lines changed

src/iceberg/expression/literal.cc

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -554,4 +554,51 @@ Result<Literal> LiteralCaster::CastTo(const Literal& literal,
554554
target_type->ToString());
555555
}
556556

557+
// LiteralValueHash implementation
558+
std::size_t LiteralValueHash::operator()(const Literal::Value& value) const noexcept {
559+
return std::visit(
560+
[](const auto& v) -> std::size_t {
561+
using T = std::decay_t<decltype(v)>;
562+
563+
constexpr size_t kHashPrime = 0x9e3779b9;
564+
565+
if constexpr (std::is_same_v<T, std::monostate>) {
566+
return 0;
567+
} else if constexpr (std::is_same_v<T, Literal::BelowMin>) {
568+
return std::numeric_limits<std::size_t>::min();
569+
} else if constexpr (std::is_same_v<T, Literal::AboveMax>) {
570+
return std::numeric_limits<std::size_t>::max();
571+
} else if constexpr (std::is_same_v<T, bool> || std::is_same_v<T, int32_t> ||
572+
std::is_same_v<T, int64_t> || std::is_same_v<T, float> ||
573+
std::is_same_v<T, double> ||
574+
std::is_same_v<T, std::string>) {
575+
return std::hash<T>{}(v);
576+
} else if constexpr (std::is_same_v<T, std::vector<uint8_t>>) {
577+
std::size_t hash = 0;
578+
for (size_t i = 0; i < v.size(); ++i) {
579+
hash ^= std::hash<uint8_t>{}(v[i]) + kHashPrime + (hash << 6) + (hash >> 2);
580+
}
581+
return hash;
582+
} else if constexpr (std::is_same_v<T, Decimal>) {
583+
const int128_t& val = v.value();
584+
std::size_t hash = std::hash<uint64_t>{}(static_cast<uint64_t>(val >> 64));
585+
hash ^= std::hash<uint64_t>{}(static_cast<uint64_t>(val)) + kHashPrime +
586+
(hash << 6) + (hash >> 2);
587+
return hash;
588+
} else if constexpr (std::is_same_v<T, Uuid>) {
589+
std::size_t hash = 0;
590+
const auto& bytes = v.bytes();
591+
for (size_t i = 0; i < bytes.size(); ++i) {
592+
hash ^=
593+
std::hash<uint8_t>{}(bytes[i]) + kHashPrime + (hash << 6) + (hash >> 2);
594+
}
595+
return hash;
596+
} else {
597+
static_assert(sizeof(T) == 0, "Unhandled variant type in LiteralValueHash");
598+
return 0;
599+
}
600+
},
601+
value);
602+
}
603+
557604
} // namespace iceberg

src/iceberg/expression/literal.h

Lines changed: 31 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -166,79 +166,43 @@ class ICEBERG_EXPORT Literal : public util::Formattable {
166166
std::shared_ptr<PrimitiveType> type_;
167167
};
168168

169-
template <TypeId type_id>
170-
struct LiteralTraits {
171-
using ValueType = void;
172-
};
173-
174-
template <>
175-
struct LiteralTraits<TypeId::kBoolean> {
176-
using ValueType = bool;
177-
};
178-
179-
template <>
180-
struct LiteralTraits<TypeId::kInt> {
181-
using ValueType = int32_t;
182-
};
183-
184-
template <>
185-
struct LiteralTraits<TypeId::kDate> {
186-
using ValueType = int32_t;
187-
};
188-
189-
template <>
190-
struct LiteralTraits<TypeId::kLong> {
191-
using ValueType = int64_t;
192-
};
193-
194-
template <>
195-
struct LiteralTraits<TypeId::kTime> {
196-
using ValueType = int64_t;
197-
};
198-
199-
template <>
200-
struct LiteralTraits<TypeId::kTimestamp> {
201-
using ValueType = int64_t;
169+
/// \brief Hash function for Literal to facilitate use in unordered containers
170+
struct ICEBERG_EXPORT LiteralValueHash {
171+
std::size_t operator()(const Literal::Value& value) const noexcept;
202172
};
203173

204-
template <>
205-
struct LiteralTraits<TypeId::kTimestampTz> {
206-
using ValueType = int64_t;
207-
};
208-
209-
template <>
210-
struct LiteralTraits<TypeId::kFloat> {
211-
using ValueType = float;
212-
};
213-
214-
template <>
215-
struct LiteralTraits<TypeId::kDouble> {
216-
using ValueType = double;
217-
};
218-
219-
template <>
220-
struct LiteralTraits<TypeId::kDecimal> {
221-
using ValueType = Decimal;
222-
};
223-
224-
template <>
225-
struct LiteralTraits<TypeId::kString> {
226-
using ValueType = std::string;
174+
struct ICEBERG_EXPORT LiteralHash {
175+
std::size_t operator()(const Literal& value) const noexcept {
176+
return LiteralValueHash{}(value.value());
177+
}
227178
};
228179

229-
template <>
230-
struct LiteralTraits<TypeId::kUuid> {
231-
using ValueType = Uuid;
180+
template <TypeId type_id>
181+
struct LiteralTraits {
182+
using ValueType = void;
232183
};
233184

234-
template <>
235-
struct LiteralTraits<TypeId::kBinary> {
236-
using ValueType = std::vector<uint8_t>;
237-
};
185+
#define DEFINE_LITERAL_TRAIT(TYPE_ID, VALUE_TYPE) \
186+
template <> \
187+
struct LiteralTraits<TypeId::TYPE_ID> { \
188+
using ValueType = VALUE_TYPE; \
189+
};
238190

239-
template <>
240-
struct LiteralTraits<TypeId::kFixed> {
241-
using ValueType = std::vector<uint8_t>;
242-
};
191+
DEFINE_LITERAL_TRAIT(kBoolean, bool)
192+
DEFINE_LITERAL_TRAIT(kInt, int32_t)
193+
DEFINE_LITERAL_TRAIT(kDate, int32_t)
194+
DEFINE_LITERAL_TRAIT(kLong, int64_t)
195+
DEFINE_LITERAL_TRAIT(kTime, int64_t)
196+
DEFINE_LITERAL_TRAIT(kTimestamp, int64_t)
197+
DEFINE_LITERAL_TRAIT(kTimestampTz, int64_t)
198+
DEFINE_LITERAL_TRAIT(kFloat, float)
199+
DEFINE_LITERAL_TRAIT(kDouble, double)
200+
DEFINE_LITERAL_TRAIT(kDecimal, Decimal)
201+
DEFINE_LITERAL_TRAIT(kString, std::string)
202+
DEFINE_LITERAL_TRAIT(kUuid, Uuid)
203+
DEFINE_LITERAL_TRAIT(kBinary, std::vector<uint8_t>)
204+
DEFINE_LITERAL_TRAIT(kFixed, std::vector<uint8_t>)
205+
206+
#undef DEFINE_LITERAL_TRAIT
243207

244208
} // namespace iceberg

src/iceberg/expression/predicate.cc

Lines changed: 106 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -304,8 +304,22 @@ Result<bool> BoundUnaryPredicate::Test(const Literal::Value& value) const {
304304
return NotImplemented("BoundUnaryPredicate::Test not implemented");
305305
}
306306

307+
Result<std::shared_ptr<Expression>> BoundUnaryPredicate::Negate() const {
308+
ICEBERG_ASSIGN_OR_RAISE(auto negated_op, ::iceberg::Negate(op()));
309+
return std::make_shared<BoundUnaryPredicate>(negated_op, term_);
310+
}
311+
307312
bool BoundUnaryPredicate::Equals(const Expression& other) const {
308-
throw IcebergError("BoundUnaryPredicate::Equals not implemented");
313+
if (op() != other.op()) {
314+
return false;
315+
}
316+
317+
if (const auto* other_pred = dynamic_cast<const BoundUnaryPredicate*>(&other);
318+
other_pred) {
319+
return term_->Equals(*other_pred->term());
320+
}
321+
322+
return false;
309323
}
310324

311325
std::string BoundUnaryPredicate::ToString() const {
@@ -335,8 +349,68 @@ Result<bool> BoundLiteralPredicate::Test(const Literal::Value& value) const {
335349
return NotImplemented("BoundLiteralPredicate::Test not implemented");
336350
}
337351

352+
Result<std::shared_ptr<Expression>> BoundLiteralPredicate::Negate() const {
353+
ICEBERG_ASSIGN_OR_RAISE(auto negated_op, ::iceberg::Negate(op()));
354+
return std::make_shared<BoundLiteralPredicate>(negated_op, term_, literal_);
355+
}
356+
338357
bool BoundLiteralPredicate::Equals(const Expression& other) const {
339-
throw IcebergError("BoundLiteralPredicate::Equals not implemented");
358+
const auto* other_pred = dynamic_cast<const BoundLiteralPredicate*>(&other);
359+
if (!other_pred) {
360+
return false;
361+
}
362+
363+
if (op() == other.op()) {
364+
if (term_->Equals(*other_pred->term())) {
365+
// because the term is equivalent, the literal must have the same type
366+
return literal_ == other_pred->literal();
367+
}
368+
}
369+
370+
// TODO(gangwu): add TypeId::kTimestampNano
371+
static const std::unordered_set<TypeId> kIntegralTypes = {
372+
TypeId::kInt, TypeId::kLong, TypeId::kDate,
373+
TypeId::kTime, TypeId::kTimestamp, TypeId::kTimestampTz};
374+
375+
if (kIntegralTypes.contains(term_->type()->type_id()) &&
376+
term_->Equals(*other_pred->term())) {
377+
auto get_long = [](const Literal& lit) -> std::optional<int64_t> {
378+
const auto& val = lit.value();
379+
if (std::holds_alternative<int32_t>(val)) {
380+
return std::get<int32_t>(val);
381+
} else if (std::holds_alternative<int64_t>(val)) {
382+
return std::get<int64_t>(val);
383+
}
384+
return std::nullopt;
385+
};
386+
387+
auto this_val = get_long(literal_);
388+
auto other_val = get_long(other_pred->literal());
389+
if (this_val && other_val) {
390+
switch (op()) {
391+
case Expression::Operation::kLt:
392+
// < 6 is equivalent to <= 5
393+
return other_pred->op() == Expression::Operation::kLtEq &&
394+
*this_val == *other_val + 1;
395+
case Expression::Operation::kLtEq:
396+
// <= 5 is equivalent to < 6
397+
return other_pred->op() == Expression::Operation::kLt &&
398+
*this_val == *other_val - 1;
399+
case Expression::Operation::kGt:
400+
// > 5 is equivalent to >= 6
401+
return other_pred->op() == Expression::Operation::kGtEq &&
402+
*this_val == *other_val - 1;
403+
case Expression::Operation::kGtEq:
404+
// >= 6 is equivalent to > 5
405+
return other_pred->op() == Expression::Operation::kGt &&
406+
*this_val == *other_val + 1;
407+
default:
408+
return false;
409+
}
410+
}
411+
}
412+
413+
return false;
340414
}
341415

342416
std::string BoundLiteralPredicate::ToString() const {
@@ -370,27 +444,47 @@ std::string BoundLiteralPredicate::ToString() const {
370444
BoundSetPredicate::BoundSetPredicate(Expression::Operation op,
371445
std::shared_ptr<BoundTerm> term,
372446
std::span<const Literal> literals)
373-
: BoundPredicate(op, std::move(term)) {
374-
for (const auto& literal : literals) {
375-
ICEBERG_DCHECK((*literal.type() == *term_->type()),
376-
"Literal type does not match term type");
377-
value_set_.push_back(literal.value());
378-
}
379-
}
447+
: BoundPredicate(op, std::move(term)), value_set_(literals.begin(), literals.end()) {}
448+
449+
BoundSetPredicate::BoundSetPredicate(Expression::Operation op,
450+
std::shared_ptr<BoundTerm> term,
451+
LiteralSet value_set)
452+
: BoundPredicate(op, std::move(term)), value_set_(std::move(value_set)) {}
380453

381454
BoundSetPredicate::~BoundSetPredicate() = default;
382455

383456
Result<bool> BoundSetPredicate::Test(const Literal::Value& value) const {
384457
return NotImplemented("BoundSetPredicate::Test not implemented");
385458
}
386459

460+
Result<std::shared_ptr<Expression>> BoundSetPredicate::Negate() const {
461+
ICEBERG_ASSIGN_OR_RAISE(auto negated_op, ::iceberg::Negate(op()));
462+
return std::make_shared<BoundSetPredicate>(negated_op, term_, value_set_);
463+
}
464+
387465
bool BoundSetPredicate::Equals(const Expression& other) const {
388-
throw IcebergError("BoundSetPredicate::Equals not implemented");
466+
if (op() != other.op()) {
467+
return false;
468+
}
469+
470+
if (const auto* other_pred = dynamic_cast<const BoundSetPredicate*>(&other);
471+
other_pred) {
472+
return value_set_ == other_pred->value_set_;
473+
}
474+
475+
return false;
389476
}
390477

391478
std::string BoundSetPredicate::ToString() const {
392-
// TODO(gangwu): Literal::Value does not have std::format support.
393-
throw IcebergError("BoundSetPredicate::ToString not implemented");
479+
switch (op()) {
480+
case Expression::Operation::kIn:
481+
return std::format("{} in {}", *term(), FormatRange(value_set_, ", ", "(", ")"));
482+
case Expression::Operation::kNotIn:
483+
return std::format("{} not in {}", *term(),
484+
FormatRange(value_set_, ", ", "(", ")"));
485+
default:
486+
return std::format("Invalid set predicate: operation = {}", op());
487+
}
394488
}
395489

396490
// Explicit template instantiations

src/iceberg/expression/predicate.h

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@
2323
/// Predicate interface for boolean expressions that test terms.
2424

2525
#include <concepts>
26+
#include <unordered_set>
2627

2728
#include "iceberg/expression/expression.h"
29+
#include "iceberg/expression/literal.h"
2830
#include "iceberg/expression/term.h"
2931

3032
namespace iceberg {
@@ -149,6 +151,8 @@ class ICEBERG_EXPORT BoundUnaryPredicate : public BoundPredicate {
149151

150152
std::string ToString() const override;
151153

154+
Result<std::shared_ptr<Expression>> Negate() const override;
155+
152156
bool Equals(const Expression& other) const override;
153157
};
154158

@@ -174,6 +178,8 @@ class ICEBERG_EXPORT BoundLiteralPredicate : public BoundPredicate {
174178

175179
std::string ToString() const override;
176180

181+
Result<std::shared_ptr<Expression>> Negate() const override;
182+
177183
bool Equals(const Expression& other) const override;
178184

179185
private:
@@ -183,6 +189,8 @@ class ICEBERG_EXPORT BoundLiteralPredicate : public BoundPredicate {
183189
/// \brief Bound set predicate (membership testing against a set of values).
184190
class ICEBERG_EXPORT BoundSetPredicate : public BoundPredicate {
185191
public:
192+
using LiteralSet = std::unordered_set<Literal, LiteralHash>;
193+
186194
/// \brief Create a bound set predicate.
187195
///
188196
/// \param op The set operation (kIn, kNotIn)
@@ -191,23 +199,27 @@ class ICEBERG_EXPORT BoundSetPredicate : public BoundPredicate {
191199
BoundSetPredicate(Expression::Operation op, std::shared_ptr<BoundTerm> term,
192200
std::span<const Literal> literals);
193201

202+
/// \brief Create a bound set predicate using a set of literals.
203+
BoundSetPredicate(Expression::Operation op, std::shared_ptr<BoundTerm> term,
204+
LiteralSet value_set);
205+
194206
~BoundSetPredicate() override;
195207

196208
/// \brief Returns the set of literals to test against.
197-
const std::vector<Literal::Value>& literal_set() const { return value_set_; }
209+
const LiteralSet& literal_set() const { return value_set_; }
198210

199211
Result<bool> Test(const Literal::Value& value) const override;
200212

201213
Kind kind() const override { return Kind::kSet; }
202214

203215
std::string ToString() const override;
204216

217+
Result<std::shared_ptr<Expression>> Negate() const override;
218+
205219
bool Equals(const Expression& other) const override;
206220

207221
private:
208-
/// FIXME: Literal::Value does not have hash support. We need to add this
209-
/// and replace the vector with a unordered_set.
210-
std::vector<Literal::Value> value_set_;
222+
LiteralSet value_set_;
211223
};
212224

213225
} // namespace iceberg

0 commit comments

Comments
 (0)