Skip to content

Commit bf9822f

Browse files
committed
polish
1 parent 89ad9e9 commit bf9822f

File tree

6 files changed

+87
-110
lines changed

6 files changed

+87
-110
lines changed

src/iceberg/expression/inclusive_metrics_evaluator.cc

Lines changed: 62 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ class InclusiveMetricsVisitor : public BoundVisitor<bool> {
8686
if (ContainsNullsOnly(id)) {
8787
return kRowCannotMatch;
8888
}
89-
if (std::dynamic_pointer_cast<BoundReference>(expr) == nullptr) {
89+
if (dynamic_cast<const BoundReference*>(expr.get()) == nullptr) {
9090
return kRowsMightMatch;
9191
}
9292
auto it = data_file_.nan_value_counts.find(id);
@@ -97,13 +97,12 @@ class InclusiveMetricsVisitor : public BoundVisitor<bool> {
9797
}
9898

9999
Result<bool> NotNaN(const std::shared_ptr<Bound>& expr) override {
100-
if (std::dynamic_pointer_cast<BoundReference>(expr) == nullptr) {
100+
if (dynamic_cast<const BoundReference*>(expr.get()) == nullptr) {
101101
// identity transforms are already removed by this time
102102
return kRowsMightMatch;
103103
}
104104

105105
int32_t id = expr->reference()->field().field_id();
106-
107106
if (ContainsNaNsOnly(id)) {
108107
return kRowCannotMatch;
109108
}
@@ -117,20 +116,18 @@ class InclusiveMetricsVisitor : public BoundVisitor<bool> {
117116
if (ContainsNullsOnly(id) || ContainsNaNsOnly(id)) {
118117
return kRowCannotMatch;
119118
}
120-
ICEBERG_ASSIGN_OR_RAISE(auto lower_result, LowerBound(expr));
121-
if (!lower_result.has_value() || lower_result.value().IsNull() ||
122-
lower_result.value().IsNaN()) {
119+
ICEBERG_ASSIGN_OR_RAISE(auto lower, LowerBound(expr));
120+
if (!lower.has_value() || lower->IsNull() || lower->IsNaN()) {
123121
// NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
124122
return kRowsMightMatch;
125123
}
126-
const auto& lower = lower_result.value();
127124

128125
// this also works for transforms that are order preserving:
129126
// if a transform f is order preserving, a < b means that f(a) <= f(b).
130127
// because lower <= a for all values of a in the file, f(lower) <= f(a).
131128
// when f(lower) >= X then f(a) >= f(lower) >= X, so there is no a such that f(a) < X
132129
// f(lower) >= X means rows cannot match
133-
if (lower >= lit) {
130+
if (lower.value() >= lit) {
134131
return kRowCannotMatch;
135132
}
136133

@@ -144,20 +141,18 @@ class InclusiveMetricsVisitor : public BoundVisitor<bool> {
144141
return kRowCannotMatch;
145142
}
146143

147-
ICEBERG_ASSIGN_OR_RAISE(auto lower_result, LowerBound(expr));
148-
if (!lower_result.has_value() || lower_result.value().IsNull() ||
149-
lower_result.value().IsNaN()) {
144+
ICEBERG_ASSIGN_OR_RAISE(auto lower, LowerBound(expr));
145+
if (!lower.has_value() || lower->IsNull() || lower->IsNaN()) {
150146
// NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
151147
return kRowsMightMatch;
152148
}
153-
const auto& lower = lower_result.value();
154149

155150
// this also works for transforms that are order preserving:
156151
// if a transform f is order preserving, a < b means that f(a) <= f(b).
157152
// because lower <= a for all values of a in the file, f(lower) <= f(a).
158153
// when f(lower) > X then f(a) >= f(lower) > X, so there is no a such that f(a) <= X
159154
// f(lower) > X means rows cannot match
160-
if (lower > lit) {
155+
if (lower.value() > lit) {
161156
return kRowCannotMatch;
162157
}
163158

@@ -171,12 +166,11 @@ class InclusiveMetricsVisitor : public BoundVisitor<bool> {
171166
return kRowCannotMatch;
172167
}
173168

174-
ICEBERG_ASSIGN_OR_RAISE(auto upper_result, UpperBound(expr));
175-
if (!upper_result.has_value() || upper_result.value().IsNull()) {
169+
ICEBERG_ASSIGN_OR_RAISE(auto upper, UpperBound(expr));
170+
if (!upper.has_value() || upper->IsNull()) {
176171
return kRowsMightMatch;
177172
}
178-
const auto& upper = upper_result.value();
179-
if (upper <= lit) {
173+
if (upper.value() <= lit) {
180174
return kRowCannotMatch;
181175
}
182176

@@ -190,12 +184,11 @@ class InclusiveMetricsVisitor : public BoundVisitor<bool> {
190184
return kRowCannotMatch;
191185
}
192186

193-
ICEBERG_ASSIGN_OR_RAISE(auto upper_result, UpperBound(expr));
194-
if (!upper_result.has_value() || upper_result.value().IsNull()) {
187+
ICEBERG_ASSIGN_OR_RAISE(auto upper, UpperBound(expr));
188+
if (!upper.has_value() || upper->IsNull()) {
195189
return kRowsMightMatch;
196190
}
197-
const auto& upper = upper_result.value();
198-
if (upper < lit) {
191+
if (upper.value() < lit) {
199192
return kRowCannotMatch;
200193
}
201194

@@ -209,21 +202,18 @@ class InclusiveMetricsVisitor : public BoundVisitor<bool> {
209202
return kRowCannotMatch;
210203
}
211204

212-
ICEBERG_ASSIGN_OR_RAISE(auto lower_result, LowerBound(expr));
213-
if (lower_result.has_value() && !lower_result.value().IsNull() &&
214-
!lower_result.value().IsNaN()) {
215-
const auto& lower = lower_result.value();
216-
if (!lower.IsNaN() && lower > lit) {
205+
ICEBERG_ASSIGN_OR_RAISE(auto lower, LowerBound(expr));
206+
if (lower.has_value() && !lower->IsNull() && !lower->IsNaN()) {
207+
if (lower.value() > lit) {
217208
return kRowCannotMatch;
218209
}
219210
}
220211

221-
ICEBERG_ASSIGN_OR_RAISE(auto upper_result, UpperBound(expr));
222-
if (!upper_result.has_value() || upper_result.value().IsNull()) {
212+
ICEBERG_ASSIGN_OR_RAISE(auto upper, UpperBound(expr));
213+
if (!upper.has_value() || upper->IsNull()) {
223214
return kRowsMightMatch;
224215
}
225-
const auto& upper = upper_result.value();
226-
if (upper < lit) {
216+
if (upper.value() < lit) {
227217
return kRowCannotMatch;
228218
}
229219

@@ -249,28 +239,25 @@ class InclusiveMetricsVisitor : public BoundVisitor<bool> {
249239
return kRowsMightMatch;
250240
}
251241

252-
ICEBERG_ASSIGN_OR_RAISE(auto lower_result, LowerBound(expr));
253-
if (!lower_result.has_value() || lower_result.value().IsNull() ||
254-
lower_result.value().IsNaN()) {
242+
ICEBERG_ASSIGN_OR_RAISE(auto lower, LowerBound(expr));
243+
if (!lower.has_value() || lower->IsNull() || lower->IsNaN()) {
255244
// NaN indicates unreliable bounds. See the InclusiveMetricsEvaluator docs for more.
256245
return kRowsMightMatch;
257246
}
258-
const auto& lower = lower_result.value();
259247
auto literals_view = literal_set | std::views::filter([&](const Literal& lit) {
260-
return lower <= lit;
248+
return lower.value() <= lit;
261249
});
262250
// if all values are less than lower bound, rows cannot match
263251
if (literals_view.empty()) {
264252
return kRowCannotMatch;
265253
}
266254

267-
ICEBERG_ASSIGN_OR_RAISE(auto upper_result, UpperBound(expr));
268-
if (!upper_result.has_value() || upper_result.value().IsNull()) {
255+
ICEBERG_ASSIGN_OR_RAISE(auto upper, UpperBound(expr));
256+
if (!upper.has_value() || upper->IsNull()) {
269257
return kRowsMightMatch;
270258
}
271-
const auto& upper = upper_result.value();
272259
auto filtered_view = literals_view | std::views::filter([&](const Literal& lit) {
273-
return upper >= lit;
260+
return upper.value() >= lit;
274261
});
275262
// if remaining values are greater than upper bound, rows cannot match
276263
if (filtered_view.empty()) {
@@ -290,8 +277,8 @@ class InclusiveMetricsVisitor : public BoundVisitor<bool> {
290277

291278
Result<bool> StartsWith(const std::shared_ptr<Bound>& expr,
292279
const Literal& lit) override {
293-
auto transform = std::dynamic_pointer_cast<BoundTransform>(expr);
294-
if (transform != nullptr &&
280+
if (auto transform = dynamic_cast<const BoundTransform*>(expr.get());
281+
transform != nullptr &&
295282
transform->transform()->transform_type() != TransformType::kIdentity) {
296283
// truncate must be rewritten in binding. the result is either always or never
297284
// compatible
@@ -305,28 +292,26 @@ class InclusiveMetricsVisitor : public BoundVisitor<bool> {
305292
if (lit.type()->type_id() != TypeId::kString) {
306293
return kRowCannotMatch;
307294
}
308-
const auto& prefix = get<std::string>(lit.value());
295+
const auto& prefix = std::get<std::string>(lit.value());
309296

310-
ICEBERG_ASSIGN_OR_RAISE(auto lower_result, LowerBound(expr));
311-
if (!lower_result.has_value() || lower_result.value().IsNull()) {
297+
ICEBERG_ASSIGN_OR_RAISE(auto lower, LowerBound(expr));
298+
if (!lower.has_value() || lower->IsNull()) {
312299
return kRowsMightMatch;
313300
}
314-
const auto& lower = lower_result.value();
315-
const auto& lower_str = get<std::string>(lower.value());
301+
const auto& lower_str = std::get<std::string>(lower->value());
316302
// truncate lower bound so that its length in bytes is not greater than the length of
317303
// prefix
318-
int length = std::min(prefix.size(), lower_str.size());
304+
size_t length = std::min(prefix.size(), lower_str.size());
319305
// if prefix of lower bound is greater than prefix, rows cannot match
320306
if (lower_str.substr(0, length) > prefix) {
321307
return kRowCannotMatch;
322308
}
323309

324-
ICEBERG_ASSIGN_OR_RAISE(auto upper_result, UpperBound(expr));
325-
if (!upper_result.has_value() || upper_result.value().IsNull()) {
310+
ICEBERG_ASSIGN_OR_RAISE(auto upper, UpperBound(expr));
311+
if (!upper.has_value() || upper->IsNull()) {
326312
return kRowsMightMatch;
327313
}
328-
const auto& upper = upper_result.value();
329-
const auto& upper_str = get<std::string>(upper.value());
314+
const auto& upper_str = std::get<std::string>(upper->value());
330315
// truncate upper bound so that its length in bytes is not greater than the length of
331316
// prefix
332317
length = std::min(prefix.size(), upper_str.size());
@@ -350,20 +335,17 @@ class InclusiveMetricsVisitor : public BoundVisitor<bool> {
350335
if (lit.type()->type_id() != TypeId::kString) {
351336
return kRowCannotMatch;
352337
}
353-
const auto& prefix = get<std::string>(lit.value());
338+
const auto& prefix = std::get<std::string>(lit.value());
354339

355340
// notStartsWith will match unless all values must start with the prefix. This happens
356341
// when the lower and upper bounds both start with the prefix.
357-
ICEBERG_ASSIGN_OR_RAISE(auto lower_result, LowerBound(expr));
358-
ICEBERG_ASSIGN_OR_RAISE(auto upper_result, UpperBound(expr));
359-
if (!lower_result.has_value() || lower_result.value().IsNull() ||
360-
!upper_result.has_value() || upper_result.value().IsNull()) {
342+
ICEBERG_ASSIGN_OR_RAISE(auto lower, LowerBound(expr));
343+
ICEBERG_ASSIGN_OR_RAISE(auto upper, UpperBound(expr));
344+
if (!lower.has_value() || lower->IsNull() || !upper.has_value() || upper->IsNull()) {
361345
return kRowsMightMatch;
362346
}
363-
const auto& lower = lower_result.value();
364-
const auto& upper = upper_result.value();
365-
const auto& lower_str = get<std::string>(lower.value());
366-
const auto& upper_str = get<std::string>(upper.value());
347+
const auto& lower_str = std::get<std::string>(lower->value());
348+
const auto& upper_str = std::get<std::string>(upper->value());
367349

368350
// if lower is shorter than the prefix then lower doesn't start with the prefix
369351
if (lower_str.size() < prefix.size()) {
@@ -395,24 +377,24 @@ class InclusiveMetricsVisitor : public BoundVisitor<bool> {
395377
bool ContainsNullsOnly(int32_t id) {
396378
auto val_it = data_file_.value_counts.find(id);
397379
auto null_it = data_file_.null_value_counts.find(id);
398-
return val_it != data_file_.value_counts.end() &&
399-
null_it != data_file_.null_value_counts.end() &&
400-
val_it->second - null_it->second == 0;
380+
return val_it != data_file_.value_counts.cend() &&
381+
null_it != data_file_.null_value_counts.cend() &&
382+
val_it->second == null_it->second;
401383
}
402384

403385
bool ContainsNaNsOnly(int32_t id) {
404386
auto val_it = data_file_.value_counts.find(id);
405387
auto nan_it = data_file_.nan_value_counts.find(id);
406-
return val_it != data_file_.value_counts.end() &&
407-
nan_it != data_file_.nan_value_counts.end() &&
388+
return val_it != data_file_.value_counts.cend() &&
389+
nan_it != data_file_.nan_value_counts.cend() &&
408390
val_it->second == nan_it->second;
409391
}
410392

411393
Result<std::optional<Literal>> LowerBound(const std::shared_ptr<Bound>& expr) {
412-
if (auto reference = std::dynamic_pointer_cast<BoundReference>(expr);
394+
if (auto reference = dynamic_cast<const BoundReference*>(expr.get());
413395
reference != nullptr) {
414396
return ParseLowerBound(*reference);
415-
} else if (auto transform = std::dynamic_pointer_cast<BoundTransform>(expr);
397+
} else if (auto transform = dynamic_cast<BoundTransform*>(expr.get());
416398
transform != nullptr) {
417399
return TransformLowerBound(*transform);
418400
} else {
@@ -422,10 +404,10 @@ class InclusiveMetricsVisitor : public BoundVisitor<bool> {
422404
}
423405

424406
Result<std::optional<Literal>> UpperBound(const std::shared_ptr<Bound>& expr) {
425-
if (auto reference = std::dynamic_pointer_cast<BoundReference>(expr);
407+
if (auto reference = dynamic_cast<const BoundReference*>(expr.get());
426408
reference != nullptr) {
427409
return ParseUpperBound(*reference);
428-
} else if (auto transform = std::dynamic_pointer_cast<BoundTransform>(expr);
410+
} else if (auto transform = dynamic_cast<BoundTransform*>(expr.get());
429411
transform != nullptr) {
430412
return TransformUpperBound(*transform);
431413
} else {
@@ -441,11 +423,8 @@ class InclusiveMetricsVisitor : public BoundVisitor<bool> {
441423
return NotSupported("Lower bound of non-primitive type is not supported.");
442424
}
443425
auto primitive_type = internal::checked_pointer_cast<PrimitiveType>(type);
444-
if (!data_file_.lower_bounds.empty() && data_file_.lower_bounds.contains(id)) {
445-
ICEBERG_ASSIGN_OR_RAISE(
446-
auto lower,
447-
Literal::Deserialize(data_file_.lower_bounds.at(id), primitive_type));
448-
return lower;
426+
if (data_file_.lower_bounds.contains(id)) {
427+
return Literal::Deserialize(data_file_.lower_bounds.at(id), primitive_type);
449428
}
450429

451430
return std::nullopt;
@@ -458,11 +437,8 @@ class InclusiveMetricsVisitor : public BoundVisitor<bool> {
458437
return NotSupported("Upper bound of non-primitive type is not supported.");
459438
}
460439
auto primitive_type = internal::checked_pointer_cast<PrimitiveType>(type);
461-
if (!data_file_.upper_bounds.empty() && data_file_.upper_bounds.contains(id)) {
462-
ICEBERG_ASSIGN_OR_RAISE(
463-
auto upper,
464-
Literal::Deserialize(data_file_.upper_bounds.at(id), primitive_type));
465-
return upper;
440+
if (data_file_.upper_bounds.contains(id)) {
441+
return Literal::Deserialize(data_file_.upper_bounds.at(id), primitive_type);
466442
}
467443

468444
return std::nullopt;
@@ -472,9 +448,9 @@ class InclusiveMetricsVisitor : public BoundVisitor<bool> {
472448
auto transform = boundTransform.transform();
473449
if (transform->PreservesOrder()) {
474450
ICEBERG_ASSIGN_OR_RAISE(auto lower, ParseLowerBound(*boundTransform.reference()));
475-
ICEBERG_ASSIGN_OR_RAISE(auto transform_func,
476-
transform->Bind(boundTransform.reference()->type()));
477451
if (lower.has_value()) {
452+
ICEBERG_ASSIGN_OR_RAISE(auto transform_func,
453+
transform->Bind(boundTransform.reference()->type()));
478454
return transform_func->Transform(lower.value());
479455
}
480456
}
@@ -486,9 +462,9 @@ class InclusiveMetricsVisitor : public BoundVisitor<bool> {
486462
auto transform = boundTransform.transform();
487463
if (transform->PreservesOrder()) {
488464
ICEBERG_ASSIGN_OR_RAISE(auto upper, ParseUpperBound(*boundTransform.reference()));
489-
ICEBERG_ASSIGN_OR_RAISE(auto transform_func,
490-
transform->Bind(boundTransform.reference()->type()));
491465
if (upper.has_value()) {
466+
ICEBERG_ASSIGN_OR_RAISE(auto transform_func,
467+
transform->Bind(boundTransform.reference()->type()));
492468
return transform_func->Transform(upper.value());
493469
}
494470
}
@@ -498,10 +474,10 @@ class InclusiveMetricsVisitor : public BoundVisitor<bool> {
498474

499475
/** Returns true if the expression term produces a non-null value for non-null input. */
500476
bool IsNonNullPreserving(const std::shared_ptr<Bound>& expr) {
501-
if (auto reference = std::dynamic_pointer_cast<BoundReference>(expr);
477+
if (auto reference = dynamic_cast<const BoundReference*>(expr.get());
502478
reference != nullptr) {
503479
return true;
504-
} else if (auto transform = std::dynamic_pointer_cast<BoundTransform>(expr);
480+
} else if (auto transform = dynamic_cast<BoundTransform*>(expr.get());
505481
transform != nullptr) {
506482
return transform->transform()->PreservesOrder();
507483
}
@@ -528,7 +504,7 @@ Result<std::unique_ptr<InclusiveMetricsEvaluator>> InclusiveMetricsEvaluator::Ma
528504
new InclusiveMetricsEvaluator(std::move(bound_expr)));
529505
}
530506

531-
Result<bool> InclusiveMetricsEvaluator::Eval(const DataFile& data_file) const {
507+
Result<bool> InclusiveMetricsEvaluator::Evaluate(const DataFile& data_file) const {
532508
if (data_file.record_count == 0) {
533509
return kRowCannotMatch;
534510
}

src/iceberg/expression/inclusive_metrics_evaluator.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,11 @@ class ICEBERG_EXPORT InclusiveMetricsEvaluator {
6262
///
6363
/// \param data_file The data file to evaluate
6464
/// \return true if the file matches the expression, false otherwise, or error
65-
Result<bool> Eval(const DataFile& data_file) const;
65+
Result<bool> Evaluate(const DataFile& data_file) const;
6666

6767
private:
6868
explicit InclusiveMetricsEvaluator(std::shared_ptr<Expression> expr);
6969

70-
private:
7170
std::shared_ptr<Expression> expr_;
7271
};
7372

0 commit comments

Comments
 (0)