Skip to content

Commit 6d47aa4

Browse files
authored
[core] introduce Between LeafFunction (#7209)
This PR introduce a Between leaf function. Previously, between predicate is implemented through a compounded predicate combining a GreaterOrEqual and LessOrEqual, which is very inefficient for some index types. (btree, range bitmap and more) Also introduce NotBetween for internal use of negation of Between function.
1 parent 766d08b commit 6d47aa4

File tree

16 files changed

+459
-302
lines changed

16 files changed

+459
-302
lines changed

paimon-common/src/main/java/org/apache/paimon/globalindex/OffsetGlobalIndexReader.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,11 @@ public Optional<GlobalIndexResult> visitNotIn(FieldRef fieldRef, List<Object> li
111111
return applyOffset(wrapped.visitNotIn(fieldRef, literals));
112112
}
113113

114+
@Override
115+
public Optional<GlobalIndexResult> visitBetween(FieldRef fieldRef, Object from, Object to) {
116+
return applyOffset(wrapped.visitBetween(fieldRef, from, to));
117+
}
118+
114119
@Override
115120
public Optional<GlobalIndexResult> visitVectorSearch(VectorSearch vectorSearch) {
116121
return applyOffset(

paimon-common/src/main/java/org/apache/paimon/globalindex/UnionGlobalIndexReader.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,11 @@ public Optional<GlobalIndexResult> visitNotIn(FieldRef fieldRef, List<Object> li
108108
return union(reader -> reader.visitNotIn(fieldRef, literals));
109109
}
110110

111+
@Override
112+
public Optional<GlobalIndexResult> visitBetween(FieldRef fieldRef, Object from, Object to) {
113+
return union(reader -> reader.visitBetween(fieldRef, from, to));
114+
}
115+
111116
@Override
112117
public Optional<GlobalIndexResult> visitVectorSearch(VectorSearch vectorSearch) {
113118
return union(reader -> reader.visitVectorSearch(vectorSearch));

paimon-common/src/main/java/org/apache/paimon/globalindex/btree/BTreeFileMetaSelector.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,22 @@ public Optional<List<GlobalIndexIOMeta>> visitNotIn(FieldRef fieldRef, List<Obje
178178
return Optional.of(filter(meta -> true));
179179
}
180180

181+
@Override
182+
public Optional<List<GlobalIndexIOMeta>> visitBetween(
183+
FieldRef fieldRef, Object from, Object to) {
184+
return Optional.of(
185+
filter(
186+
meta -> {
187+
if (meta.onlyNulls()) {
188+
return false;
189+
}
190+
Object minKey = deserialize(meta.getFirstKey());
191+
Object maxKey = deserialize(meta.getLastKey());
192+
return comparator.compare(from, maxKey) <= 0
193+
&& comparator.compare(to, minKey) >= 0;
194+
}));
195+
}
196+
181197
@Override
182198
public Optional<List<GlobalIndexIOMeta>> visitAnd(
183199
List<Optional<List<GlobalIndexIOMeta>>> children) {

paimon-common/src/main/java/org/apache/paimon/globalindex/btree/BTreeIndexReader.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,19 @@ public Optional<GlobalIndexResult> visitNotIn(FieldRef fieldRef, List<Object> li
342342
}));
343343
}
344344

345+
@Override
346+
public Optional<GlobalIndexResult> visitBetween(FieldRef fieldRef, Object from, Object to) {
347+
return Optional.of(
348+
GlobalIndexResult.create(
349+
() -> {
350+
try {
351+
return rangeQuery(from, to, true, true);
352+
} catch (IOException ioe) {
353+
throw new RuntimeException("fail to read btree index file.", ioe);
354+
}
355+
}));
356+
}
357+
345358
private RoaringNavigableMap64 allNonNullRows() throws IOException {
346359
// Traverse all data to avoid returning null values, which is very advantageous in
347360
// situations where there are many null values

paimon-common/src/main/java/org/apache/paimon/globalindex/btree/LazyFilteredBTreeReader.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,20 @@ public Optional<GlobalIndexResult> visitNotIn(FieldRef fieldRef, List<Object> li
249249
return createUnionReader(selected).visitNotIn(fieldRef, literals);
250250
}
251251

252+
@Override
253+
public Optional<GlobalIndexResult> visitBetween(FieldRef fieldRef, Object from, Object to) {
254+
Optional<List<GlobalIndexIOMeta>> selectedOpt =
255+
fileSelector.visitBetween(fieldRef, from, to);
256+
if (!selectedOpt.isPresent()) {
257+
return Optional.empty();
258+
}
259+
List<GlobalIndexIOMeta> selected = selectedOpt.get();
260+
if (selected.isEmpty()) {
261+
return Optional.of(GlobalIndexResult.createEmpty());
262+
}
263+
return createUnionReader(selected).visitBetween(fieldRef, from, to);
264+
}
265+
252266
/**
253267
* Create a Union Reader for given files. The union reader is composed by readers from reader
254268
* cache, so please do not close it.
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.paimon.predicate;
20+
21+
import org.apache.paimon.types.DataType;
22+
23+
import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.annotation.JsonCreator;
24+
25+
import java.util.List;
26+
import java.util.Optional;
27+
28+
import static org.apache.paimon.predicate.CompareUtils.compareLiteral;
29+
30+
/** The {@link LeafFunction} to eval between. */
31+
public class Between extends LeafTernaryFunction {
32+
33+
private static final long serialVersionUID = 1L;
34+
35+
public static final String NAME = "BETWEEN";
36+
37+
public static final Between INSTANCE = new Between();
38+
39+
@JsonCreator
40+
public Between() {}
41+
42+
@Override
43+
public boolean test(DataType type, Object field, Object literal1, Object literal2) {
44+
return compareLiteral(type, literal1, field) <= 0
45+
&& compareLiteral(type, literal2, field) >= 0;
46+
}
47+
48+
@Override
49+
public boolean test(
50+
DataType type,
51+
long rowCount,
52+
Object min,
53+
Object max,
54+
Long nullCount,
55+
Object literal1,
56+
Object literal2) {
57+
// true if [min, max] and [l(0), l(1)] have intersection
58+
return compareLiteral(type, literal1, max) <= 0 && compareLiteral(type, literal2, min) >= 0;
59+
}
60+
61+
@Override
62+
public Optional<LeafFunction> negate() {
63+
return Optional.of(NotBetween.INSTANCE);
64+
}
65+
66+
@Override
67+
public <T> T visit(FunctionVisitor<T> visitor, FieldRef fieldRef, List<Object> literals) {
68+
return visitor.visitBetween(fieldRef, literals.get(0), literals.get(1));
69+
}
70+
}

paimon-common/src/main/java/org/apache/paimon/predicate/FunctionVisitor.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
package org.apache.paimon.predicate;
2020

21+
import java.util.Arrays;
2122
import java.util.List;
2223
import java.util.Optional;
2324
import java.util.stream.Collectors;
@@ -81,6 +82,16 @@ default T visit(CompoundPredicate predicate) {
8182

8283
T visitNotIn(FieldRef fieldRef, List<Object> literals);
8384

85+
default T visitBetween(FieldRef fieldRef, Object from, Object to) {
86+
return visitAnd(
87+
Arrays.asList(visitGreaterOrEqual(fieldRef, from), visitLessOrEqual(fieldRef, to)));
88+
}
89+
90+
default T visitNotBetween(FieldRef fieldRef, Object from, Object to) {
91+
return visitOr(
92+
Arrays.asList(visitLessThan(fieldRef, from), visitGreaterThan(fieldRef, to)));
93+
}
94+
8495
// ----------------- Compound functions ------------------------
8596

8697
T visitAnd(List<T> children);

paimon-common/src/main/java/org/apache/paimon/predicate/LeafFunction.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ public static LeafFunction fromJson(String name) throws IOException {
6262
return In.INSTANCE;
6363
case NotIn.NAME:
6464
return NotIn.INSTANCE;
65+
case Between.NAME:
66+
return Between.INSTANCE;
67+
case NotBetween.NAME:
68+
return NotBetween.INSTANCE;
6569
default:
6670
throw new IllegalArgumentException(
6771
"Could not resolve leaf function '" + name + "'");
@@ -98,6 +102,10 @@ public String toJson() {
98102
return In.NAME;
99103
} else if (this instanceof NotIn) {
100104
return NotIn.NAME;
105+
} else if (this instanceof Between) {
106+
return Between.NAME;
107+
} else if (this instanceof NotBetween) {
108+
return NotBetween.NAME;
101109
} else {
102110
throw new IllegalArgumentException(
103111
"Unknown leaf function class for JSON serialization: " + getClass());
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.paimon.predicate;
20+
21+
import org.apache.paimon.types.DataType;
22+
23+
import java.util.List;
24+
25+
/** Abstract {@link LeafFunction} for ternary function. */
26+
public abstract class LeafTernaryFunction extends LeafFunction {
27+
28+
public abstract boolean test(DataType type, Object field, Object literal1, Object literal2);
29+
30+
public abstract boolean test(
31+
DataType type,
32+
long rowCount,
33+
Object min,
34+
Object max,
35+
Long nullCount,
36+
Object literal1,
37+
Object literal2);
38+
39+
@Override
40+
public boolean test(DataType type, Object field, List<Object> literals) {
41+
if (field == null || literals.get(0) == null || literals.get(1) == null) {
42+
return false;
43+
}
44+
45+
return test(type, field, literals.get(0), literals.get(1));
46+
}
47+
48+
@Override
49+
public boolean test(
50+
DataType type,
51+
long rowCount,
52+
Object min,
53+
Object max,
54+
Long nullCount,
55+
List<Object> literals) {
56+
if (nullCount != null) {
57+
if (rowCount == nullCount || literals.get(0) == null || literals.get(1) == null) {
58+
return false;
59+
}
60+
}
61+
62+
return test(type, rowCount, min, max, nullCount, literals.get(0), literals.get(1));
63+
}
64+
}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.paimon.predicate;
20+
21+
import org.apache.paimon.types.DataType;
22+
23+
import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.annotation.JsonCreator;
24+
25+
import java.util.List;
26+
import java.util.Optional;
27+
28+
import static org.apache.paimon.predicate.CompareUtils.compareLiteral;
29+
30+
/**
31+
* The {@link LeafFunction} for not between. Now this is just an internal function as the negation
32+
* of {@link Between}.
33+
*/
34+
public class NotBetween extends LeafTernaryFunction {
35+
36+
private static final long serialVersionUID = 1L;
37+
38+
public static final String NAME = "NOT_BETWEEN";
39+
40+
public static final NotBetween INSTANCE = new NotBetween();
41+
42+
@JsonCreator
43+
public NotBetween() {}
44+
45+
@Override
46+
public boolean test(DataType type, Object field, Object literal1, Object literal2) {
47+
return compareLiteral(type, literal1, field) > 0
48+
|| compareLiteral(type, literal2, field) < 0;
49+
}
50+
51+
@Override
52+
public boolean test(
53+
DataType type,
54+
long rowCount,
55+
Object min,
56+
Object max,
57+
Long nullCount,
58+
Object literal1,
59+
Object literal2) {
60+
return compareLiteral(type, literal1, min) > 0 || compareLiteral(type, literal2, max) < 0;
61+
}
62+
63+
@Override
64+
public Optional<LeafFunction> negate() {
65+
return Optional.of(Between.INSTANCE);
66+
}
67+
68+
@Override
69+
public <T> T visit(FunctionVisitor<T> visitor, FieldRef fieldRef, List<Object> literals) {
70+
return visitor.visitNotBetween(fieldRef, literals.get(0), literals.get(1));
71+
}
72+
}

0 commit comments

Comments
 (0)