From d2f2d661bc1014bb5fae92d1fe4066fff26bdf46 Mon Sep 17 00:00:00 2001 From: Arnav Balyan Date: Sat, 16 May 2026 21:47:23 +0530 Subject: [PATCH] update --- .../paimon/predicate/FunctionVisitor.java | 4 ++ .../org/apache/paimon/predicate/IsNaN.java | 68 +++++++++++++++++++ .../paimon/predicate/PredicateBuilder.java | 8 +++ .../paimon/predicate/PredicateTest.java | 28 ++++++++ .../filter2/predicate/ParquetFilters.java | 57 ++++++++++++++++ .../format/parquet/ParquetFiltersTest.java | 32 +++++++++ 6 files changed, 197 insertions(+) create mode 100644 paimon-common/src/main/java/org/apache/paimon/predicate/IsNaN.java diff --git a/paimon-common/src/main/java/org/apache/paimon/predicate/FunctionVisitor.java b/paimon-common/src/main/java/org/apache/paimon/predicate/FunctionVisitor.java index 5aa4ca1373ae..f7040dae06af 100644 --- a/paimon-common/src/main/java/org/apache/paimon/predicate/FunctionVisitor.java +++ b/paimon-common/src/main/java/org/apache/paimon/predicate/FunctionVisitor.java @@ -54,6 +54,10 @@ default T visit(CompoundPredicate predicate) { T visitIsNull(FieldRef fieldRef); + default T visitIsNaN(FieldRef fieldRef) { + throw new UnsupportedOperationException(); + } + // ----------------- Binary functions ------------------------ T visitStartsWith(FieldRef fieldRef, Object literal); diff --git a/paimon-common/src/main/java/org/apache/paimon/predicate/IsNaN.java b/paimon-common/src/main/java/org/apache/paimon/predicate/IsNaN.java new file mode 100644 index 000000000000..42d3a40832de --- /dev/null +++ b/paimon-common/src/main/java/org/apache/paimon/predicate/IsNaN.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.predicate; + +import org.apache.paimon.types.DataType; + +import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.annotation.JsonCreator; + +import java.util.List; +import java.util.Optional; + +/** A LeafUnaryFunction to evaluate field is nan for float and double columns. */ +public class IsNaN extends LeafUnaryFunction { + + public static final String NAME = "IS_NAN"; + + public static final IsNaN INSTANCE = new IsNaN(); + + @JsonCreator + private IsNaN() {} + + @Override + public boolean test(DataType type, Object field) { + if (field instanceof Float) { + return Float.isNaN((Float) field); + } + if (field instanceof Double) { + return Double.isNaN((Double) field); + } + return false; + } + + @Override + public boolean test(DataType type, long rowCount, Object min, Object max, Long nullCount) { + return true; + } + + @Override + public Optional negate() { + return Optional.empty(); + } + + @Override + public T visit(FunctionVisitor visitor, FieldRef fieldRef, List literals) { + return visitor.visitIsNaN(fieldRef); + } + + @Override + public String toJson() { + return NAME; + } +} diff --git a/paimon-common/src/main/java/org/apache/paimon/predicate/PredicateBuilder.java b/paimon-common/src/main/java/org/apache/paimon/predicate/PredicateBuilder.java index c4343475b478..05acce17290d 100644 --- a/paimon-common/src/main/java/org/apache/paimon/predicate/PredicateBuilder.java +++ b/paimon-common/src/main/java/org/apache/paimon/predicate/PredicateBuilder.java @@ -138,6 +138,14 @@ public Predicate isNotNull(Transform transform) { return leaf(IsNotNull.INSTANCE, transform); } + public Predicate isNaN(int idx) { + return leaf(IsNaN.INSTANCE, idx); + } + + public Predicate isNaN(Transform transform) { + return leaf(IsNaN.INSTANCE, transform); + } + public Predicate startsWith(int idx, Object patternLiteral) { return leaf(StartsWith.INSTANCE, idx, patternLiteral); } diff --git a/paimon-common/src/test/java/org/apache/paimon/predicate/PredicateTest.java b/paimon-common/src/test/java/org/apache/paimon/predicate/PredicateTest.java index 5bece366541e..0e67372d7280 100644 --- a/paimon-common/src/test/java/org/apache/paimon/predicate/PredicateTest.java +++ b/paimon-common/src/test/java/org/apache/paimon/predicate/PredicateTest.java @@ -23,6 +23,8 @@ import org.apache.paimon.format.SimpleColStats; import org.apache.paimon.types.CharType; import org.apache.paimon.types.DataTypes; +import org.apache.paimon.types.DoubleType; +import org.apache.paimon.types.FloatType; import org.apache.paimon.types.IntType; import org.apache.paimon.types.RowType; import org.apache.paimon.types.VarCharType; @@ -295,6 +297,32 @@ public void testIsNotNull() { assertThat(predicate.negate().orElse(null)).isEqualTo(builder.isNull(0)); } + @Test + public void testIsNaNDouble() { + PredicateBuilder builder = new PredicateBuilder(RowType.of(new DoubleType())); + Predicate predicate = builder.isNaN(0); + + assertThat(predicate.test(GenericRow.of(Double.NaN))).isEqualTo(true); + assertThat(predicate.test(GenericRow.of(1.5))).isEqualTo(false); + assertThat(predicate.test(GenericRow.of(Double.POSITIVE_INFINITY))).isEqualTo(false); + assertThat(predicate.test(GenericRow.of((Object) null))).isEqualTo(false); + + assertThat(test(predicate, 3, new SimpleColStats[] {new SimpleColStats(0.0, 1.0, 0L)})) + .isEqualTo(true); + + assertThat(predicate.negate()).isEmpty(); + } + + @Test + public void testIsNaNFloat() { + PredicateBuilder builder = new PredicateBuilder(RowType.of(new FloatType())); + Predicate predicate = builder.isNaN(0); + + assertThat(predicate.test(GenericRow.of(Float.NaN))).isEqualTo(true); + assertThat(predicate.test(GenericRow.of(1.5f))).isEqualTo(false); + assertThat(predicate.test(GenericRow.of((Object) null))).isEqualTo(false); + } + @Test public void testIn() { PredicateBuilder builder = new PredicateBuilder(RowType.of(new IntType())); diff --git a/paimon-format/src/main/java/org/apache/parquet/filter2/predicate/ParquetFilters.java b/paimon-format/src/main/java/org/apache/parquet/filter2/predicate/ParquetFilters.java index dacd12f492c1..29feeb1b5ecd 100644 --- a/paimon-format/src/main/java/org/apache/parquet/filter2/predicate/ParquetFilters.java +++ b/paimon-format/src/main/java/org/apache/parquet/filter2/predicate/ParquetFilters.java @@ -52,8 +52,11 @@ import org.apache.paimon.types.VectorType; import org.apache.parquet.filter2.compat.FilterCompat; +import org.apache.parquet.filter2.predicate.Operators.DoubleColumn; +import org.apache.parquet.filter2.predicate.Operators.FloatColumn; import org.apache.parquet.io.api.Binary; +import java.io.Serializable; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -97,6 +100,18 @@ public FilterPredicate visitIsNull(FieldRef fieldRef) { return new Operators.Eq<>(toParquetColumn(fieldRef), null); } + @Override + public FilterPredicate visitIsNaN(FieldRef fieldRef) { + Operators.Column column = toParquetColumn(fieldRef); + if (column instanceof DoubleColumn) { + return FilterApi.userDefined((DoubleColumn) column, new IsNaNDoublePredicate()); + } + if (column instanceof FloatColumn) { + return FilterApi.userDefined((FloatColumn) column, new IsNaNFloatPredicate()); + } + throw new UnsupportedOperationException(); + } + @Override public FilterPredicate visitLessThan(FieldRef fieldRef, Object literal) { return new Operators.Lt( @@ -441,4 +456,46 @@ public Operators.Column visit(RowType rowType) { throw new UnsupportedOperationException(); } } + + /** user defined predicate that keeps double rows where the value is nan. */ + public static class IsNaNDoublePredicate extends UserDefinedPredicate + implements Serializable { + private static final long serialVersionUID = 1L; + + @Override + public boolean keep(Double value) { + return value != null && Double.isNaN(value); + } + + @Override + public boolean canDrop(Statistics statistics) { + return false; + } + + @Override + public boolean inverseCanDrop(Statistics statistics) { + return false; + } + } + + /** user defined predicate that keeps float rows where the value is nan. */ + public static class IsNaNFloatPredicate extends UserDefinedPredicate + implements Serializable { + private static final long serialVersionUID = 1L; + + @Override + public boolean keep(Float value) { + return value != null && Float.isNaN(value); + } + + @Override + public boolean canDrop(Statistics statistics) { + return false; + } + + @Override + public boolean inverseCanDrop(Statistics statistics) { + return false; + } + } } diff --git a/paimon-format/src/test/java/org/apache/paimon/format/parquet/ParquetFiltersTest.java b/paimon-format/src/test/java/org/apache/paimon/format/parquet/ParquetFiltersTest.java index 18fe1ef28c78..4fdd1e3927aa 100644 --- a/paimon-format/src/test/java/org/apache/paimon/format/parquet/ParquetFiltersTest.java +++ b/paimon-format/src/test/java/org/apache/paimon/format/parquet/ParquetFiltersTest.java @@ -198,6 +198,38 @@ public void testInFilterString() { true); } + @Test + public void testIsNaNDouble() { + PredicateBuilder builder = + new PredicateBuilder( + new RowType( + Collections.singletonList( + new DataField(0, "d1", new DoubleType())))); + + FilterCompat.Filter filter = + ParquetFilters.convert(Collections.singletonList(builder.isNaN(0))); + FilterPredicateCompat compat = (FilterPredicateCompat) filter; + assertThat(compat.getFilterPredicate().toString()) + .contains( + "userdefinedbyinstance(d1, org.apache.parquet.filter2.predicate.ParquetFilters$IsNaNDoublePredicate"); + } + + @Test + public void testIsNaNFloat() { + PredicateBuilder builder = + new PredicateBuilder( + new RowType( + Collections.singletonList( + new DataField(0, "f1", new FloatType())))); + + FilterCompat.Filter filter = + ParquetFilters.convert(Collections.singletonList(builder.isNaN(0))); + FilterPredicateCompat compat = (FilterPredicateCompat) filter; + assertThat(compat.getFilterPredicate().toString()) + .contains( + "userdefinedbyinstance(f1, org.apache.parquet.filter2.predicate.ParquetFilters$IsNaNFloatPredicate"); + } + @Test public void testInFilterFloat() { PredicateBuilder builder =