Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ default T visit(CompoundPredicate predicate) {

T visitIsNull(FieldRef fieldRef);

default T visitIsNaN(FieldRef fieldRef) {
throw new UnsupportedOperationException();
}

// ----------------- Binary functions ------------------------

T visitStartsWith(FieldRef fieldRef, Object literal);
Expand Down
68 changes: 68 additions & 0 deletions paimon-common/src/main/java/org/apache/paimon/predicate/IsNaN.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.paimon.predicate;

import org.apache.paimon.types.DataType;

import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.annotation.JsonCreator;

import java.util.List;
import java.util.Optional;

/** A LeafUnaryFunction to evaluate field is nan for float and double columns. */
public class IsNaN extends LeafUnaryFunction {

public static final String NAME = "IS_NAN";

public static final IsNaN INSTANCE = new IsNaN();

@JsonCreator
private IsNaN() {}

@Override
public boolean test(DataType type, Object field) {
if (field instanceof Float) {
return Float.isNaN((Float) field);
}
if (field instanceof Double) {
return Double.isNaN((Double) field);
}
return false;
}

@Override
public boolean test(DataType type, long rowCount, Object min, Object max, Long nullCount) {
return true;
}

@Override
public Optional<LeafFunction> negate() {
return Optional.empty();
}

@Override
public <T> T visit(FunctionVisitor<T> visitor, FieldRef fieldRef, List<Object> literals) {
return visitor.visitIsNaN(fieldRef);
}

@Override
public String toJson() {
return NAME;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,14 @@ public Predicate isNotNull(Transform transform) {
return leaf(IsNotNull.INSTANCE, transform);
}

public Predicate isNaN(int idx) {
return leaf(IsNaN.INSTANCE, idx);
}

public Predicate isNaN(Transform transform) {
return leaf(IsNaN.INSTANCE, transform);
}

public Predicate startsWith(int idx, Object patternLiteral) {
return leaf(StartsWith.INSTANCE, idx, patternLiteral);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import org.apache.paimon.format.SimpleColStats;
import org.apache.paimon.types.CharType;
import org.apache.paimon.types.DataTypes;
import org.apache.paimon.types.DoubleType;
import org.apache.paimon.types.FloatType;
import org.apache.paimon.types.IntType;
import org.apache.paimon.types.RowType;
import org.apache.paimon.types.VarCharType;
Expand Down Expand Up @@ -295,6 +297,32 @@ public void testIsNotNull() {
assertThat(predicate.negate().orElse(null)).isEqualTo(builder.isNull(0));
}

@Test
public void testIsNaNDouble() {
PredicateBuilder builder = new PredicateBuilder(RowType.of(new DoubleType()));
Predicate predicate = builder.isNaN(0);

assertThat(predicate.test(GenericRow.of(Double.NaN))).isEqualTo(true);
assertThat(predicate.test(GenericRow.of(1.5))).isEqualTo(false);
assertThat(predicate.test(GenericRow.of(Double.POSITIVE_INFINITY))).isEqualTo(false);
assertThat(predicate.test(GenericRow.of((Object) null))).isEqualTo(false);

assertThat(test(predicate, 3, new SimpleColStats[] {new SimpleColStats(0.0, 1.0, 0L)}))
.isEqualTo(true);

assertThat(predicate.negate()).isEmpty();
}

@Test
public void testIsNaNFloat() {
PredicateBuilder builder = new PredicateBuilder(RowType.of(new FloatType()));
Predicate predicate = builder.isNaN(0);

assertThat(predicate.test(GenericRow.of(Float.NaN))).isEqualTo(true);
assertThat(predicate.test(GenericRow.of(1.5f))).isEqualTo(false);
assertThat(predicate.test(GenericRow.of((Object) null))).isEqualTo(false);
}

@Test
public void testIn() {
PredicateBuilder builder = new PredicateBuilder(RowType.of(new IntType()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,11 @@
import org.apache.paimon.types.VectorType;

import org.apache.parquet.filter2.compat.FilterCompat;
import org.apache.parquet.filter2.predicate.Operators.DoubleColumn;
import org.apache.parquet.filter2.predicate.Operators.FloatColumn;
import org.apache.parquet.io.api.Binary;

import java.io.Serializable;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
Expand Down Expand Up @@ -97,6 +100,18 @@ public FilterPredicate visitIsNull(FieldRef fieldRef) {
return new Operators.Eq<>(toParquetColumn(fieldRef), null);
}

@Override
public FilterPredicate visitIsNaN(FieldRef fieldRef) {
Operators.Column<?> column = toParquetColumn(fieldRef);
if (column instanceof DoubleColumn) {
return FilterApi.userDefined((DoubleColumn) column, new IsNaNDoublePredicate());
}
if (column instanceof FloatColumn) {
return FilterApi.userDefined((FloatColumn) column, new IsNaNFloatPredicate());
}
throw new UnsupportedOperationException();
}

@Override
public FilterPredicate visitLessThan(FieldRef fieldRef, Object literal) {
return new Operators.Lt(
Expand Down Expand Up @@ -441,4 +456,46 @@ public Operators.Column<?> visit(RowType rowType) {
throw new UnsupportedOperationException();
}
}

/** user defined predicate that keeps double rows where the value is nan. */
public static class IsNaNDoublePredicate extends UserDefinedPredicate<Double>
implements Serializable {
private static final long serialVersionUID = 1L;

@Override
public boolean keep(Double value) {
return value != null && Double.isNaN(value);
}

@Override
public boolean canDrop(Statistics<Double> statistics) {
return false;
}

@Override
public boolean inverseCanDrop(Statistics<Double> statistics) {
return false;
}
}

/** user defined predicate that keeps float rows where the value is nan. */
public static class IsNaNFloatPredicate extends UserDefinedPredicate<Float>
implements Serializable {
private static final long serialVersionUID = 1L;

@Override
public boolean keep(Float value) {
return value != null && Float.isNaN(value);
}

@Override
public boolean canDrop(Statistics<Float> statistics) {
return false;
}

@Override
public boolean inverseCanDrop(Statistics<Float> statistics) {
return false;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,38 @@ public void testInFilterString() {
true);
}

@Test
public void testIsNaNDouble() {
PredicateBuilder builder =
new PredicateBuilder(
new RowType(
Collections.singletonList(
new DataField(0, "d1", new DoubleType()))));

FilterCompat.Filter filter =
ParquetFilters.convert(Collections.singletonList(builder.isNaN(0)));
FilterPredicateCompat compat = (FilterPredicateCompat) filter;
assertThat(compat.getFilterPredicate().toString())
.contains(
"userdefinedbyinstance(d1, org.apache.parquet.filter2.predicate.ParquetFilters$IsNaNDoublePredicate");
}

@Test
public void testIsNaNFloat() {
PredicateBuilder builder =
new PredicateBuilder(
new RowType(
Collections.singletonList(
new DataField(0, "f1", new FloatType()))));

FilterCompat.Filter filter =
ParquetFilters.convert(Collections.singletonList(builder.isNaN(0)));
FilterPredicateCompat compat = (FilterPredicateCompat) filter;
assertThat(compat.getFilterPredicate().toString())
.contains(
"userdefinedbyinstance(f1, org.apache.parquet.filter2.predicate.ParquetFilters$IsNaNFloatPredicate");
}

@Test
public void testInFilterFloat() {
PredicateBuilder builder =
Expand Down