Skip to content

Commit f400839

Browse files
authored
[ESQL] Adding a Lucene min/max operator (#113785)
This operator only optimises the computation of the min/max value if the field contains a BKD tree, no deletes and we are visiting all documents for the segment. Otherwise it computes the value iterating on a tight loop.
1 parent c45977a commit f400839

13 files changed

+1592
-0
lines changed
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.compute.lucene;
9+
10+
import org.apache.lucene.index.NumericDocValues;
11+
import org.apache.lucene.index.PointValues;
12+
import org.apache.lucene.index.SortedNumericDocValues;
13+
import org.apache.lucene.search.Query;
14+
import org.apache.lucene.search.ScoreMode;
15+
import org.apache.lucene.util.NumericUtils;
16+
import org.elasticsearch.compute.data.Block;
17+
import org.elasticsearch.compute.data.BlockFactory;
18+
import org.elasticsearch.compute.operator.DriverContext;
19+
import org.elasticsearch.compute.operator.SourceOperator;
20+
import org.elasticsearch.search.MultiValueMode;
21+
22+
import java.io.IOException;
23+
import java.util.List;
24+
import java.util.function.Function;
25+
26+
/**
27+
* Factory that generates an operator that finds the max value of a field using the {@link LuceneMinMaxOperator}.
28+
*/
29+
public final class LuceneMaxFactory extends LuceneOperator.Factory {
30+
31+
public enum NumberType implements LuceneMinMaxOperator.NumberType {
32+
INTEGER {
33+
@Override
34+
public Block buildResult(BlockFactory blockFactory, long result, int pageSize) {
35+
return blockFactory.newConstantIntBlockWith(Math.toIntExact(result), pageSize);
36+
}
37+
38+
@Override
39+
public Block buildEmptyResult(BlockFactory blockFactory, int pageSize) {
40+
return blockFactory.newConstantIntBlockWith(Integer.MIN_VALUE, pageSize);
41+
}
42+
43+
@Override
44+
long bytesToLong(byte[] bytes) {
45+
return NumericUtils.sortableBytesToInt(bytes, 0);
46+
}
47+
},
48+
FLOAT {
49+
@Override
50+
public Block buildResult(BlockFactory blockFactory, long result, int pageSize) {
51+
return blockFactory.newConstantFloatBlockWith(NumericUtils.sortableIntToFloat(Math.toIntExact(result)), pageSize);
52+
}
53+
54+
@Override
55+
public Block buildEmptyResult(BlockFactory blockFactory, int pageSize) {
56+
return blockFactory.newConstantFloatBlockWith(-Float.MAX_VALUE, pageSize);
57+
}
58+
59+
@Override
60+
long bytesToLong(byte[] bytes) {
61+
return NumericUtils.sortableBytesToInt(bytes, 0);
62+
}
63+
},
64+
LONG {
65+
@Override
66+
public Block buildResult(BlockFactory blockFactory, long result, int pageSize) {
67+
return blockFactory.newConstantLongBlockWith(result, pageSize);
68+
}
69+
70+
@Override
71+
public Block buildEmptyResult(BlockFactory blockFactory, int pageSize) {
72+
return blockFactory.newConstantLongBlockWith(Long.MIN_VALUE, pageSize);
73+
}
74+
75+
@Override
76+
long bytesToLong(byte[] bytes) {
77+
return NumericUtils.sortableBytesToLong(bytes, 0);
78+
}
79+
},
80+
DOUBLE {
81+
@Override
82+
public Block buildResult(BlockFactory blockFactory, long result, int pageSize) {
83+
return blockFactory.newConstantDoubleBlockWith(NumericUtils.sortableLongToDouble(result), pageSize);
84+
}
85+
86+
@Override
87+
public Block buildEmptyResult(BlockFactory blockFactory, int pageSize) {
88+
return blockFactory.newConstantDoubleBlockWith(-Double.MAX_VALUE, pageSize);
89+
}
90+
91+
@Override
92+
long bytesToLong(byte[] bytes) {
93+
return NumericUtils.sortableBytesToLong(bytes, 0);
94+
}
95+
};
96+
97+
public final NumericDocValues multiValueMode(SortedNumericDocValues sortedNumericDocValues) {
98+
return MultiValueMode.MAX.select(sortedNumericDocValues);
99+
}
100+
101+
public final long fromPointValues(PointValues pointValues) throws IOException {
102+
return bytesToLong(pointValues.getMaxPackedValue());
103+
}
104+
105+
public final long evaluate(long value1, long value2) {
106+
return Math.max(value1, value2);
107+
}
108+
109+
abstract long bytesToLong(byte[] bytes);
110+
}
111+
112+
private final String fieldName;
113+
private final NumberType numberType;
114+
115+
public LuceneMaxFactory(
116+
List<? extends ShardContext> contexts,
117+
Function<ShardContext, Query> queryFunction,
118+
DataPartitioning dataPartitioning,
119+
int taskConcurrency,
120+
String fieldName,
121+
NumberType numberType,
122+
int limit
123+
) {
124+
super(contexts, queryFunction, dataPartitioning, taskConcurrency, limit, ScoreMode.COMPLETE_NO_SCORES);
125+
this.fieldName = fieldName;
126+
this.numberType = numberType;
127+
}
128+
129+
@Override
130+
public SourceOperator get(DriverContext driverContext) {
131+
return new LuceneMinMaxOperator(driverContext.blockFactory(), sliceQueue, fieldName, numberType, limit, Long.MIN_VALUE);
132+
}
133+
134+
@Override
135+
public String describe() {
136+
return "LuceneMaxOperator[type = "
137+
+ numberType.name()
138+
+ ", dataPartitioning = "
139+
+ dataPartitioning
140+
+ ", fieldName = "
141+
+ fieldName
142+
+ ", limit = "
143+
+ limit
144+
+ "]";
145+
}
146+
}
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.compute.lucene;
9+
10+
import org.apache.lucene.index.NumericDocValues;
11+
import org.apache.lucene.index.PointValues;
12+
import org.apache.lucene.index.SortedNumericDocValues;
13+
import org.apache.lucene.search.Query;
14+
import org.apache.lucene.search.ScoreMode;
15+
import org.apache.lucene.util.NumericUtils;
16+
import org.elasticsearch.compute.data.Block;
17+
import org.elasticsearch.compute.data.BlockFactory;
18+
import org.elasticsearch.compute.operator.DriverContext;
19+
import org.elasticsearch.compute.operator.SourceOperator;
20+
import org.elasticsearch.search.MultiValueMode;
21+
22+
import java.io.IOException;
23+
import java.util.List;
24+
import java.util.function.Function;
25+
26+
/**
27+
* Factory that generates an operator that finds the min value of a field using the {@link LuceneMinMaxOperator}.
28+
*/
29+
public final class LuceneMinFactory extends LuceneOperator.Factory {
30+
31+
public enum NumberType implements LuceneMinMaxOperator.NumberType {
32+
INTEGER {
33+
@Override
34+
public Block buildResult(BlockFactory blockFactory, long result, int pageSize) {
35+
return blockFactory.newConstantIntBlockWith(Math.toIntExact(result), pageSize);
36+
}
37+
38+
@Override
39+
public Block buildEmptyResult(BlockFactory blockFactory, int pageSize) {
40+
return blockFactory.newConstantIntBlockWith(Integer.MAX_VALUE, pageSize);
41+
}
42+
43+
@Override
44+
long bytesToLong(byte[] bytes) {
45+
return NumericUtils.sortableBytesToInt(bytes, 0);
46+
}
47+
},
48+
FLOAT {
49+
@Override
50+
public Block buildResult(BlockFactory blockFactory, long result, int pageSize) {
51+
return blockFactory.newConstantFloatBlockWith(NumericUtils.sortableIntToFloat(Math.toIntExact(result)), pageSize);
52+
}
53+
54+
@Override
55+
public Block buildEmptyResult(BlockFactory blockFactory, int pageSize) {
56+
return blockFactory.newConstantFloatBlockWith(Float.POSITIVE_INFINITY, pageSize);
57+
}
58+
59+
@Override
60+
long bytesToLong(byte[] bytes) {
61+
return NumericUtils.sortableBytesToInt(bytes, 0);
62+
}
63+
},
64+
LONG {
65+
@Override
66+
public Block buildResult(BlockFactory blockFactory, long result, int pageSize) {
67+
return blockFactory.newConstantLongBlockWith(result, pageSize);
68+
}
69+
70+
@Override
71+
public Block buildEmptyResult(BlockFactory blockFactory, int pageSize) {
72+
return blockFactory.newConstantLongBlockWith(Long.MAX_VALUE, pageSize);
73+
}
74+
75+
@Override
76+
long bytesToLong(byte[] bytes) {
77+
return NumericUtils.sortableBytesToLong(bytes, 0);
78+
}
79+
},
80+
DOUBLE {
81+
@Override
82+
public Block buildResult(BlockFactory blockFactory, long result, int pageSize) {
83+
return blockFactory.newConstantDoubleBlockWith(NumericUtils.sortableLongToDouble(result), pageSize);
84+
}
85+
86+
@Override
87+
public Block buildEmptyResult(BlockFactory blockFactory, int pageSize) {
88+
return blockFactory.newConstantDoubleBlockWith(Double.POSITIVE_INFINITY, pageSize);
89+
}
90+
91+
@Override
92+
long bytesToLong(byte[] bytes) {
93+
return NumericUtils.sortableBytesToLong(bytes, 0);
94+
}
95+
};
96+
97+
public final NumericDocValues multiValueMode(SortedNumericDocValues sortedNumericDocValues) {
98+
return MultiValueMode.MIN.select(sortedNumericDocValues);
99+
}
100+
101+
public final long fromPointValues(PointValues pointValues) throws IOException {
102+
return bytesToLong(pointValues.getMinPackedValue());
103+
}
104+
105+
public final long evaluate(long value1, long value2) {
106+
return Math.min(value1, value2);
107+
}
108+
109+
abstract long bytesToLong(byte[] bytes);
110+
}
111+
112+
private final String fieldName;
113+
private final NumberType numberType;
114+
115+
public LuceneMinFactory(
116+
List<? extends ShardContext> contexts,
117+
Function<ShardContext, Query> queryFunction,
118+
DataPartitioning dataPartitioning,
119+
int taskConcurrency,
120+
String fieldName,
121+
NumberType numberType,
122+
int limit
123+
) {
124+
super(contexts, queryFunction, dataPartitioning, taskConcurrency, limit, ScoreMode.COMPLETE_NO_SCORES);
125+
this.fieldName = fieldName;
126+
this.numberType = numberType;
127+
}
128+
129+
@Override
130+
public SourceOperator get(DriverContext driverContext) {
131+
return new LuceneMinMaxOperator(driverContext.blockFactory(), sliceQueue, fieldName, numberType, limit, Long.MAX_VALUE);
132+
}
133+
134+
@Override
135+
public String describe() {
136+
return "LuceneMinOperator[type = "
137+
+ numberType.name()
138+
+ ", dataPartitioning = "
139+
+ dataPartitioning
140+
+ ", fieldName = "
141+
+ fieldName
142+
+ ", limit = "
143+
+ limit
144+
+ "]";
145+
}
146+
}

0 commit comments

Comments
 (0)