Skip to content

Commit 8afbb52

Browse files
authored
ESQL: Compute infrastruture for LEFT JOIN (#118889)
This adds some infrastructure that we can use to run LOOKUP JOIN using real LEFT JOIN semantics. Right now if LOOKUP JOIN matches many rows in the `lookup` index we merge all of the values into a multivalued field. So the number of rows emitted from LOOKUP JOIN is the same as the number of rows that comes into LOOKUP JOIN. This change builds the infrastructure to emit one row per match, mostly reusing the infrastructure from ENRICH.
1 parent cda2b69 commit 8afbb52

File tree

16 files changed

+883
-46
lines changed

16 files changed

+883
-46
lines changed

x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BooleanArrayBlock.java

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/BytesRefArrayBlock.java

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/DoubleArrayBlock.java

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/FloatArrayBlock.java

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/IntArrayBlock.java

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/data/LongArrayBlock.java

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Block.java

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,10 +212,46 @@ default boolean mvSortedAscending() {
212212
/**
213213
* Expand multivalued fields into one row per value. Returns the same block if there aren't any multivalued
214214
* fields to expand. The returned block needs to be closed by the caller to release the block's resources.
215-
* TODO: pass BlockFactory
216215
*/
217216
Block expand();
218217

218+
/**
219+
* Build a {@link Block} with a {@code null} inserted {@code before} each
220+
* listed position.
221+
* <p>
222+
* Note: {@code before} must be non-decreasing.
223+
* </p>
224+
*/
225+
default Block insertNulls(IntVector before) {
226+
// TODO remove default and scatter to implementation where it can be a lot more efficient
227+
int myCount = getPositionCount();
228+
int beforeCount = before.getPositionCount();
229+
try (Builder builder = elementType().newBlockBuilder(myCount + beforeCount, blockFactory())) {
230+
int beforeP = 0;
231+
int nextNull = before.getInt(beforeP);
232+
for (int mainP = 0; mainP < myCount; mainP++) {
233+
while (mainP == nextNull) {
234+
builder.appendNull();
235+
beforeP++;
236+
if (beforeP >= beforeCount) {
237+
builder.copyFrom(this, mainP, myCount);
238+
return builder.build();
239+
}
240+
nextNull = before.getInt(beforeP);
241+
}
242+
// This line right below this is the super inefficient one.
243+
builder.copyFrom(this, mainP, mainP + 1);
244+
}
245+
assert nextNull == myCount;
246+
while (beforeP < beforeCount) {
247+
nextNull = before.getInt(beforeP++);
248+
assert nextNull == myCount;
249+
builder.appendNull();
250+
}
251+
return builder.build();
252+
}
253+
}
254+
219255
/**
220256
* Builds {@link Block}s. Typically, you use one of it's direct supinterfaces like {@link IntBlock.Builder}.
221257
* This is {@link Releasable} and should be released after building the block or if building the block fails.

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/X-ArrayBlock.java.st

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ $endif$
149149
int valueCount = getValueCount(pos);
150150
int first = getFirstValueIndex(pos);
151151
if (valueCount == 1) {
152-
builder.append$Type$(get$Type$(getFirstValueIndex(pos)$if(BytesRef)$, scratch$endif$));
152+
builder.append$Type$(get$Type$(first$if(BytesRef)$, scratch$endif$));
153153
} else {
154154
builder.beginPositionEntry();
155155
for (int c = 0; c < valueCount; c++) {

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/lookup/MergePositionsOperator.java

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,28 +20,32 @@
2020
import java.util.Objects;
2121

2222
/**
23-
* Combines values at the given blocks with the same positions into a single position for the blocks at the given channels
23+
* Combines values at the given blocks with the same positions into a single position
24+
* for the blocks at the given channels.
25+
* <p>
2426
* Example, input pages consisting of three blocks:
25-
* positions | field-1 | field-2 |
26-
* -----------------------------------
27+
* </p>
28+
* <pre>{@code
29+
* | positions | field-1 | field-2 |
30+
* ------------------------------------
2731
* Page 1:
28-
* 1 | a,b | 2020 |
29-
* 1 | c | 2021 |
30-
* ---------------------------------
32+
* | 1 | a,b | 2020 |
33+
* | 1 | c | 2021 |
3134
* Page 2:
32-
* 2 | a,e | 2021 |
33-
* ---------------------------------
35+
* | 2 | a,e | 2021 |
3436
* Page 3:
35-
* 4 | d | null |
36-
* ---------------------------------
37+
* | 4 | d | null |
38+
* }</pre>
3739
* Output:
40+
* <pre>{@code
3841
* | field-1 | field-2 |
3942
* ---------------------------
4043
* | null | null |
4144
* | a,b,c | 2020,2021 |
4245
* | a,e | 2021 |
4346
* | null | null |
4447
* | d | 2023 |
48+
* }</pre>
4549
*/
4650
public final class MergePositionsOperator implements Operator {
4751
private boolean finished = false;

0 commit comments

Comments
 (0)