Skip to content

Commit dc23f76

Browse files
authored
feat: implement all functions of bound predicates (#280)
- Implemented `Negate`, `ToString` and `Test` functions for bound predicate subclasses. - Added hash support to `Literal`. - Refactored `BoundSetPredicate` to use unordered set for literals. - Refactored predicate unit test to be better organized.
1 parent 4ebe732 commit dc23f76

File tree

9 files changed

+762
-103
lines changed

9 files changed

+762
-103
lines changed

src/iceberg/expression/literal.cc

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -554,4 +554,51 @@ Result<Literal> LiteralCaster::CastTo(const Literal& literal,
554554
target_type->ToString());
555555
}
556556

557+
// LiteralValueHash implementation
558+
std::size_t LiteralValueHash::operator()(const Literal::Value& value) const noexcept {
559+
return std::visit(
560+
[](const auto& v) -> std::size_t {
561+
using T = std::decay_t<decltype(v)>;
562+
563+
constexpr size_t kHashPrime = 0x9e3779b9;
564+
565+
if constexpr (std::is_same_v<T, std::monostate>) {
566+
return 0;
567+
} else if constexpr (std::is_same_v<T, Literal::BelowMin>) {
568+
return std::numeric_limits<std::size_t>::min();
569+
} else if constexpr (std::is_same_v<T, Literal::AboveMax>) {
570+
return std::numeric_limits<std::size_t>::max();
571+
} else if constexpr (std::is_same_v<T, bool> || std::is_same_v<T, int32_t> ||
572+
std::is_same_v<T, int64_t> || std::is_same_v<T, float> ||
573+
std::is_same_v<T, double> ||
574+
std::is_same_v<T, std::string>) {
575+
return std::hash<T>{}(v);
576+
} else if constexpr (std::is_same_v<T, std::vector<uint8_t>>) {
577+
std::size_t hash = 0;
578+
for (size_t i = 0; i < v.size(); ++i) {
579+
hash ^= std::hash<uint8_t>{}(v[i]) + kHashPrime + (hash << 6) + (hash >> 2);
580+
}
581+
return hash;
582+
} else if constexpr (std::is_same_v<T, Decimal>) {
583+
const int128_t& val = v.value();
584+
std::size_t hash = std::hash<uint64_t>{}(static_cast<uint64_t>(val >> 64));
585+
hash ^= std::hash<uint64_t>{}(static_cast<uint64_t>(val)) + kHashPrime +
586+
(hash << 6) + (hash >> 2);
587+
return hash;
588+
} else if constexpr (std::is_same_v<T, Uuid>) {
589+
std::size_t hash = 0;
590+
const auto& bytes = v.bytes();
591+
for (size_t i = 0; i < bytes.size(); ++i) {
592+
hash ^=
593+
std::hash<uint8_t>{}(bytes[i]) + kHashPrime + (hash << 6) + (hash >> 2);
594+
}
595+
return hash;
596+
} else {
597+
static_assert(sizeof(T) == 0, "Unhandled variant type in LiteralValueHash");
598+
return 0;
599+
}
600+
},
601+
value);
602+
}
603+
557604
} // namespace iceberg

src/iceberg/expression/literal.h

Lines changed: 31 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -166,79 +166,43 @@ class ICEBERG_EXPORT Literal : public util::Formattable {
166166
std::shared_ptr<PrimitiveType> type_;
167167
};
168168

169-
template <TypeId type_id>
170-
struct LiteralTraits {
171-
using ValueType = void;
172-
};
173-
174-
template <>
175-
struct LiteralTraits<TypeId::kBoolean> {
176-
using ValueType = bool;
177-
};
178-
179-
template <>
180-
struct LiteralTraits<TypeId::kInt> {
181-
using ValueType = int32_t;
182-
};
183-
184-
template <>
185-
struct LiteralTraits<TypeId::kDate> {
186-
using ValueType = int32_t;
187-
};
188-
189-
template <>
190-
struct LiteralTraits<TypeId::kLong> {
191-
using ValueType = int64_t;
192-
};
193-
194-
template <>
195-
struct LiteralTraits<TypeId::kTime> {
196-
using ValueType = int64_t;
197-
};
198-
199-
template <>
200-
struct LiteralTraits<TypeId::kTimestamp> {
201-
using ValueType = int64_t;
169+
/// \brief Hash function for Literal to facilitate use in unordered containers
170+
struct ICEBERG_EXPORT LiteralValueHash {
171+
std::size_t operator()(const Literal::Value& value) const noexcept;
202172
};
203173

204-
template <>
205-
struct LiteralTraits<TypeId::kTimestampTz> {
206-
using ValueType = int64_t;
207-
};
208-
209-
template <>
210-
struct LiteralTraits<TypeId::kFloat> {
211-
using ValueType = float;
212-
};
213-
214-
template <>
215-
struct LiteralTraits<TypeId::kDouble> {
216-
using ValueType = double;
217-
};
218-
219-
template <>
220-
struct LiteralTraits<TypeId::kDecimal> {
221-
using ValueType = Decimal;
222-
};
223-
224-
template <>
225-
struct LiteralTraits<TypeId::kString> {
226-
using ValueType = std::string;
174+
struct ICEBERG_EXPORT LiteralHash {
175+
std::size_t operator()(const Literal& value) const noexcept {
176+
return LiteralValueHash{}(value.value());
177+
}
227178
};
228179

229-
template <>
230-
struct LiteralTraits<TypeId::kUuid> {
231-
using ValueType = Uuid;
180+
template <TypeId type_id>
181+
struct LiteralTraits {
182+
using ValueType = void;
232183
};
233184

234-
template <>
235-
struct LiteralTraits<TypeId::kBinary> {
236-
using ValueType = std::vector<uint8_t>;
237-
};
185+
#define DEFINE_LITERAL_TRAIT(TYPE_ID, VALUE_TYPE) \
186+
template <> \
187+
struct LiteralTraits<TypeId::TYPE_ID> { \
188+
using ValueType = VALUE_TYPE; \
189+
};
238190

239-
template <>
240-
struct LiteralTraits<TypeId::kFixed> {
241-
using ValueType = std::vector<uint8_t>;
242-
};
191+
DEFINE_LITERAL_TRAIT(kBoolean, bool)
192+
DEFINE_LITERAL_TRAIT(kInt, int32_t)
193+
DEFINE_LITERAL_TRAIT(kDate, int32_t)
194+
DEFINE_LITERAL_TRAIT(kLong, int64_t)
195+
DEFINE_LITERAL_TRAIT(kTime, int64_t)
196+
DEFINE_LITERAL_TRAIT(kTimestamp, int64_t)
197+
DEFINE_LITERAL_TRAIT(kTimestampTz, int64_t)
198+
DEFINE_LITERAL_TRAIT(kFloat, float)
199+
DEFINE_LITERAL_TRAIT(kDouble, double)
200+
DEFINE_LITERAL_TRAIT(kDecimal, Decimal)
201+
DEFINE_LITERAL_TRAIT(kString, std::string)
202+
DEFINE_LITERAL_TRAIT(kUuid, Uuid)
203+
DEFINE_LITERAL_TRAIT(kBinary, std::vector<uint8_t>)
204+
DEFINE_LITERAL_TRAIT(kFixed, std::vector<uint8_t>)
205+
206+
#undef DEFINE_LITERAL_TRAIT
243207

244208
} // namespace iceberg

0 commit comments

Comments
 (0)