Skip to content

Commit 2ccdaa7

Browse files
committed
follow review
1 parent 579ec0e commit 2ccdaa7

File tree

4 files changed

+99
-109
lines changed

4 files changed

+99
-109
lines changed

src/iceberg/expression/aggregate.cc

Lines changed: 26 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -40,58 +40,19 @@ std::shared_ptr<PrimitiveType> GetPrimitiveType(const BoundTerm& term) {
4040
return internal::checked_pointer_cast<PrimitiveType>(term.type());
4141
}
4242

43-
Result<Scalar> LiteralToScalar(const Literal& literal) {
44-
if (literal.IsNull()) {
45-
return Scalar{std::monostate{}};
43+
Result<Literal> EvaluateBoundTerm(const BoundTerm& term,
44+
const std::optional<std::vector<uint8_t>>& bound) {
45+
auto ptype = GetPrimitiveType(term);
46+
if (!bound.has_value()) {
47+
SingleValueStructLike data(Literal::Null(ptype));
48+
return term.Evaluate(data);
4649
}
4750

48-
switch (literal.type()->type_id()) {
49-
case TypeId::kBoolean:
50-
return Scalar{std::get<bool>(literal.value())};
51-
case TypeId::kInt:
52-
case TypeId::kDate:
53-
return Scalar{std::get<int32_t>(literal.value())};
54-
case TypeId::kLong:
55-
case TypeId::kTime:
56-
case TypeId::kTimestamp:
57-
case TypeId::kTimestampTz:
58-
return Scalar{std::get<int64_t>(literal.value())};
59-
case TypeId::kFloat:
60-
return Scalar{std::get<float>(literal.value())};
61-
case TypeId::kDouble:
62-
return Scalar{std::get<double>(literal.value())};
63-
case TypeId::kString: {
64-
const auto& str = std::get<std::string>(literal.value());
65-
return Scalar{std::string_view(str)};
66-
}
67-
case TypeId::kBinary:
68-
case TypeId::kFixed: {
69-
const auto& bytes = std::get<std::vector<uint8_t>>(literal.value());
70-
return Scalar{
71-
std::string_view(reinterpret_cast<const char*>(bytes.data()), bytes.size())};
72-
}
73-
case TypeId::kDecimal:
74-
return Scalar{std::get<Decimal>(literal.value())};
75-
default:
76-
return NotSupported("Cannot convert literal of type {} to Scalar",
77-
literal.type()->ToString());
78-
}
51+
ICEBERG_ASSIGN_OR_RAISE(auto literal, Literal::Deserialize(*bound, ptype));
52+
SingleValueStructLike data(std::move(literal));
53+
return term.Evaluate(data);
7954
}
8055

81-
class SingleValueStructLike : public StructLike {
82-
public:
83-
explicit SingleValueStructLike(Literal literal) : literal_(std::move(literal)) {}
84-
85-
Result<Scalar> GetField(size_t /*pos*/) const override {
86-
return LiteralToScalar(literal_);
87-
}
88-
89-
size_t num_fields() const override { return 1; }
90-
91-
private:
92-
Literal literal_;
93-
};
94-
9556
class CountAggregator : public BoundAggregate::Aggregator {
9657
public:
9758
explicit CountAggregator(const CountAggregate& aggregate) : aggregate_(aggregate) {}
@@ -110,12 +71,8 @@ class CountAggregator : public BoundAggregate::Aggregator {
11071
valid_ = false;
11172
return {};
11273
}
113-
ICEBERG_ASSIGN_OR_RAISE(auto maybe_count, aggregate_.CountFor(file));
114-
if (!maybe_count.has_value()) {
115-
valid_ = false;
116-
return {};
117-
}
118-
count_ += *maybe_count;
74+
ICEBERG_ASSIGN_OR_RAISE(auto count, aggregate_.CountFor(file));
75+
count_ += count;
11976
return {};
12077
}
12178

@@ -321,10 +278,6 @@ std::string Aggregate<T>::ToString() const {
321278
}
322279
}
323280

324-
Result<Literal> BoundAggregate::Evaluate(const DataFile& /*file*/) const {
325-
return NotImplemented("Evaluate(DataFile) not implemented");
326-
}
327-
328281
// -------------------- CountAggregate --------------------
329282

330283
Result<Literal> CountAggregate::Evaluate(const StructLike& data) const {
@@ -333,18 +286,9 @@ Result<Literal> CountAggregate::Evaluate(const StructLike& data) const {
333286

334287
Result<Literal> CountAggregate::Evaluate(const DataFile& file) const {
335288
ICEBERG_ASSIGN_OR_RAISE(auto count, CountFor(file));
336-
if (!count.has_value()) {
337-
return Literal::Null(int64());
338-
}
339-
return Literal::Long(*count);
289+
return Literal::Long(count);
340290
}
341291

342-
Result<std::optional<int64_t>> CountAggregate::CountFor(const DataFile& file) const {
343-
return NotImplemented("CountFor(DataFile) not implemented");
344-
}
345-
346-
bool CountAggregate::HasValue(const DataFile& /*file*/) const { return false; }
347-
348292
std::unique_ptr<BoundAggregate::Aggregator> CountAggregate::NewAggregator() const {
349293
return std::unique_ptr<BoundAggregate::Aggregator>(new CountAggregator(*this));
350294
}
@@ -366,15 +310,14 @@ Result<int64_t> CountNonNullAggregate::CountFor(const StructLike& data) const {
366310
[](const auto& val) { return val.IsNull() ? 0 : 1; });
367311
}
368312

369-
Result<std::optional<int64_t>> CountNonNullAggregate::CountFor(
370-
const DataFile& file) const {
313+
Result<int64_t> CountNonNullAggregate::CountFor(const DataFile& file) const {
371314
auto field_id = GetFieldId(term());
372-
auto value_count = GetMapValue(file.value_counts, field_id);
373-
auto null_count = GetMapValue(file.null_value_counts, field_id).value_or(0);
374-
if (!value_count.has_value()) {
375-
return std::nullopt;
315+
if (!HasValue(file)) {
316+
return NotFound("Missing metrics for field id {}", field_id);
376317
}
377-
return *value_count - null_count;
318+
auto value_count = GetMapValue(file.value_counts, field_id).value();
319+
auto null_count = GetMapValue(file.null_value_counts, field_id).value();
320+
return value_count - null_count;
378321
}
379322

380323
bool CountNonNullAggregate::HasValue(const DataFile& file) const {
@@ -399,13 +342,12 @@ Result<int64_t> CountNullAggregate::CountFor(const StructLike& data) const {
399342
[](const auto& val) { return val.IsNull() ? 1 : 0; });
400343
}
401344

402-
Result<std::optional<int64_t>> CountNullAggregate::CountFor(const DataFile& file) const {
345+
Result<int64_t> CountNullAggregate::CountFor(const DataFile& file) const {
403346
auto field_id = GetFieldId(term());
404-
auto null_count = GetMapValue(file.null_value_counts, field_id);
405-
if (!null_count.has_value()) {
406-
return std::nullopt;
347+
if (!HasValue(file)) {
348+
return NotFound("Missing metrics for field id {}", field_id);
407349
}
408-
return *null_count;
350+
return GetMapValue(file.null_value_counts, field_id).value();
409351
}
410352

411353
bool CountNullAggregate::HasValue(const DataFile& file) const {
@@ -423,9 +365,9 @@ Result<int64_t> CountStarAggregate::CountFor(const StructLike& /*data*/) const {
423365
return 1;
424366
}
425367

426-
Result<std::optional<int64_t>> CountStarAggregate::CountFor(const DataFile& file) const {
368+
Result<int64_t> CountStarAggregate::CountFor(const DataFile& file) const {
427369
if (!HasValue(file)) {
428-
return std::nullopt;
370+
return NotFound("Record count is missing");
429371
}
430372
return file.record_count;
431373
}
@@ -448,15 +390,7 @@ Result<Literal> MaxAggregate::Evaluate(const StructLike& data) const {
448390
Result<Literal> MaxAggregate::Evaluate(const DataFile& file) const {
449391
auto field_id = GetFieldId(term());
450392
auto upper = GetMapValue(file.upper_bounds, field_id);
451-
auto ptype = GetPrimitiveType(*term());
452-
if (!upper.has_value()) {
453-
SingleValueStructLike data(Literal::Null(ptype));
454-
return term()->Evaluate(data);
455-
}
456-
457-
ICEBERG_ASSIGN_OR_RAISE(auto literal, Literal::Deserialize(*upper, ptype));
458-
SingleValueStructLike data(std::move(literal));
459-
return term()->Evaluate(data);
393+
return EvaluateBoundTerm(*term(), upper);
460394
}
461395

462396
std::unique_ptr<BoundAggregate::Aggregator> MaxAggregate::NewAggregator() const {
@@ -487,15 +421,7 @@ Result<Literal> MinAggregate::Evaluate(const StructLike& data) const {
487421
Result<Literal> MinAggregate::Evaluate(const DataFile& file) const {
488422
auto field_id = GetFieldId(term());
489423
auto lower = GetMapValue(file.lower_bounds, field_id);
490-
auto ptype = GetPrimitiveType(*term());
491-
if (!lower.has_value()) {
492-
SingleValueStructLike data(Literal::Null(ptype));
493-
return term()->Evaluate(data);
494-
}
495-
496-
ICEBERG_ASSIGN_OR_RAISE(auto literal, Literal::Deserialize(*lower, ptype));
497-
SingleValueStructLike data(std::move(literal));
498-
return term()->Evaluate(data);
424+
return EvaluateBoundTerm(*term(), lower);
499425
}
500426

501427
std::unique_ptr<BoundAggregate::Aggregator> MinAggregate::NewAggregator() const {

src/iceberg/expression/aggregate.h

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
/// Aggregate expression definitions.
2424

2525
#include <memory>
26-
#include <optional>
2726
#include <span>
2827
#include <string>
2928
#include <vector>
@@ -128,10 +127,10 @@ class ICEBERG_EXPORT BoundAggregate : public Aggregate<BoundTerm>, public Bound
128127
}
129128

130129
Result<Literal> Evaluate(const StructLike& data) const override = 0;
131-
virtual Result<Literal> Evaluate(const DataFile& file) const;
130+
virtual Result<Literal> Evaluate(const DataFile& file) const = 0;
132131

133132
/// \brief Whether metrics in the data file are sufficient to evaluate.
134-
virtual bool HasValue(const DataFile& file) const { return false; }
133+
virtual bool HasValue(const DataFile& file) const = 0;
135134

136135
bool is_bound_aggregate() const override { return true; }
137136

@@ -154,10 +153,10 @@ class ICEBERG_EXPORT CountAggregate : public BoundAggregate {
154153

155154
/// \brief Count for a single row. Subclasses implement this.
156155
virtual Result<int64_t> CountFor(const StructLike& data) const = 0;
157-
/// \brief Count using metrics from a data file. Nullopt if not available.
158-
virtual Result<std::optional<int64_t>> CountFor(const DataFile& file) const;
156+
/// \brief Count using metrics from a data file.
157+
virtual Result<int64_t> CountFor(const DataFile& file) const = 0;
159158

160-
bool HasValue(const DataFile& file) const override;
159+
bool HasValue(const DataFile& file) const override = 0;
161160

162161
protected:
163162
CountAggregate(Expression::Operation op, std::shared_ptr<BoundTerm> term)
@@ -171,7 +170,7 @@ class ICEBERG_EXPORT CountNonNullAggregate : public CountAggregate {
171170
std::shared_ptr<BoundTerm> term);
172171

173172
Result<int64_t> CountFor(const StructLike& data) const override;
174-
Result<std::optional<int64_t>> CountFor(const DataFile& file) const override;
173+
Result<int64_t> CountFor(const DataFile& file) const override;
175174

176175
bool HasValue(const DataFile& file) const override;
177176

@@ -186,7 +185,7 @@ class ICEBERG_EXPORT CountNullAggregate : public CountAggregate {
186185
std::shared_ptr<BoundTerm> term);
187186

188187
Result<int64_t> CountFor(const StructLike& data) const override;
189-
Result<std::optional<int64_t>> CountFor(const DataFile& file) const override;
188+
Result<int64_t> CountFor(const DataFile& file) const override;
190189

191190
bool HasValue(const DataFile& file) const override;
192191

@@ -200,7 +199,7 @@ class ICEBERG_EXPORT CountStarAggregate : public CountAggregate {
200199
static Result<std::unique_ptr<CountStarAggregate>> Make();
201200

202201
Result<int64_t> CountFor(const StructLike& data) const override;
203-
Result<std::optional<int64_t>> CountFor(const DataFile& file) const override;
202+
Result<int64_t> CountFor(const DataFile& file) const override;
204203

205204
bool HasValue(const DataFile& file) const override;
206205

src/iceberg/row/struct_like.cc

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@
1919

2020
#include "iceberg/row/struct_like.h"
2121

22+
#include <string>
2223
#include <utility>
24+
#include <vector>
2325

2426
#include "iceberg/result.h"
2527
#include "iceberg/util/checked_cast.h"
@@ -28,6 +30,53 @@
2830

2931
namespace iceberg {
3032

33+
Result<Scalar> LiteralToScalar(const Literal& literal) {
34+
if (literal.IsNull()) {
35+
return Scalar{std::monostate{}};
36+
}
37+
38+
switch (literal.type()->type_id()) {
39+
case TypeId::kBoolean:
40+
return Scalar{std::get<bool>(literal.value())};
41+
case TypeId::kInt:
42+
case TypeId::kDate:
43+
return Scalar{std::get<int32_t>(literal.value())};
44+
case TypeId::kLong:
45+
case TypeId::kTime:
46+
case TypeId::kTimestamp:
47+
case TypeId::kTimestampTz:
48+
return Scalar{std::get<int64_t>(literal.value())};
49+
case TypeId::kFloat:
50+
return Scalar{std::get<float>(literal.value())};
51+
case TypeId::kDouble:
52+
return Scalar{std::get<double>(literal.value())};
53+
case TypeId::kString: {
54+
const auto& str = std::get<std::string>(literal.value());
55+
return Scalar{std::string_view(str)};
56+
}
57+
case TypeId::kBinary:
58+
case TypeId::kFixed: {
59+
const auto& bytes = std::get<std::vector<uint8_t>>(literal.value());
60+
return Scalar{
61+
std::string_view(reinterpret_cast<const char*>(bytes.data()), bytes.size())};
62+
}
63+
case TypeId::kDecimal:
64+
return Scalar{std::get<Decimal>(literal.value())};
65+
default:
66+
return NotSupported("Cannot convert literal of type {} to Scalar",
67+
literal.type()->ToString());
68+
}
69+
}
70+
71+
SingleValueStructLike::SingleValueStructLike(Literal literal)
72+
: literal_(std::move(literal)) {}
73+
74+
Result<Scalar> SingleValueStructLike::GetField(size_t /*pos*/) const {
75+
return LiteralToScalar(literal_);
76+
}
77+
78+
size_t SingleValueStructLike::num_fields() const { return 1; }
79+
3180
StructLikeAccessor::StructLikeAccessor(std::shared_ptr<Type> type,
3281
std::span<const size_t> position_path)
3382
: type_(std::move(type)) {

src/iceberg/row/struct_like.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ using Scalar = std::variant<std::monostate, // for null
5555
std::shared_ptr<ArrayLike>, // for list
5656
std::shared_ptr<MapLike>>; // for map
5757

58+
/// \brief Convert a Literal to a Scalar
59+
Result<Scalar> LiteralToScalar(const Literal& literal);
60+
5861
/// \brief An immutable struct-like wrapper.
5962
class ICEBERG_EXPORT StructLike {
6063
public:
@@ -68,6 +71,19 @@ class ICEBERG_EXPORT StructLike {
6871
virtual size_t num_fields() const = 0;
6972
};
7073

74+
/// \brief A single-field StructLike that wraps a Literal
75+
class ICEBERG_EXPORT SingleValueStructLike : public StructLike {
76+
public:
77+
explicit SingleValueStructLike(Literal literal);
78+
79+
Result<Scalar> GetField(size_t pos) const override;
80+
81+
size_t num_fields() const override;
82+
83+
private:
84+
Literal literal_;
85+
};
86+
7187
/// \brief An immutable array-like wrapper.
7288
class ICEBERG_EXPORT ArrayLike {
7389
public:

0 commit comments

Comments
 (0)