Skip to content

Commit cd7625e

Browse files
committed
Processing review, changing null handling
1 parent fdfb149 commit cd7625e

File tree

3 files changed

+74
-36
lines changed

3 files changed

+74
-36
lines changed

x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1931,33 +1931,37 @@ l1:integer | l2:integer
19311931
null | 0
19321932
;
19331933

1934-
mvContainsAll#[skip:-9.1.99,reason:new mvContainsall function added in 9.2]
1934+
mvContainsAll
1935+
required_capability: fn_mv_contains_all
19351936
// tag::mv_contains_all[]
1936-
ROW a = "a", b = ["a", "b", "c"]
1937-
| EVAL a_subset_of_b = mv_contains_all(b, a)
1937+
ROW set = ["a", "b", "c"], element = "a"
1938+
| EVAL set_contains_element = mv_contains_all(set, element)
19381939
// end::mv_contains_all[]
19391940
;
19401941

19411942
// tag::mv_contains_all-result[]
1942-
a:keyword | b:keyword | a_subset_of_b:boolean
1943-
a | [a, b, c] | true
1943+
set:keyword | element:keyword | set_contains_element:boolean
1944+
[a, b, c] | a | true
19441945
// end::mv_contains_all-result[]
19451946
;
19461947

1947-
mvContainsAll_bothsides#[skip:-9.1.99,reason:new mvContainsall function added in 9.2]
1948+
mvContainsAll_bothsides
1949+
required_capability: fn_mv_contains_all
19481950
// tag::mv_contains_all_bothsides[]
1949-
ROW a = ["a","c"], b = ["a", "b", "c"]
1950-
| EVAL a_subset_of_b = mv_contains_all(b, a)
1951+
ROW setA = ["a","c"], setB = ["a", "b", "c"]
1952+
| EVAL a_subset_of_b = mv_contains_all(setB, setA)
1953+
| EVAL b_subset_of_a = mv_contains_all(setA, setB)
19511954
// end::mv_contains_all_bothsides[]
19521955
;
19531956

19541957
// tag::mv_contains_all_bothsides-result[]
1955-
a:keyword | b:keyword | a_subset_of_b:boolean
1956-
[a, c] | [a, b, c] | true
1958+
setA:keyword | setB:keyword | a_subset_of_b:boolean | b_subset_of_a:boolean
1959+
[a, c] | [a, b, c] | true | false
19571960
// end::mv_contains_all_bothsides-result[]
19581961
;
19591962

1960-
mvContainsAllCombinations#[skip:-9.1.99,reason:new mvContainsall function added in 9.2]
1963+
mvContainsAllCombinations
1964+
required_capability: fn_mv_contains_all
19611965

19621966
ROW a = "a", b = ["a", "b", "c"], n = null
19631967
| EVAL aa = mv_contains_all(a, a),
@@ -1969,11 +1973,12 @@ ROW a = "a", b = ["a", "b", "c"], n = null
19691973
nn = mv_contains_all(n,n)
19701974
;
19711975

1972-
a:keyword | b:keyword | n:null | aa:boolean | bb:boolean | ab:boolean | ba:boolean | na:boolean | an:boolean | nn:null
1973-
a | [a, b, c] | null | true | true | false | true | null | null | null
1976+
a:keyword | b:keyword | n:null | aa:boolean | bb:boolean | ab:boolean | ba:boolean | na:boolean | an:boolean | nn:boolean
1977+
a | [a, b, c] | null | true | true | false | true | false | true | true
19741978
;
19751979

1976-
mvContainsAll_where#[skip:-9.1.99,reason:new mvContainsall function added in 9.2]
1980+
mvContainsAll_where
1981+
required_capability: fn_mv_contains_all
19771982
// tag::mv_contains_all_where[]
19781983
FROM airports
19791984
| WHERE mv_contains_all(type, ["major","military"]) AND scalerank == 9

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,13 @@ public enum Cap {
245245
*/
246246
FN_MONTH_NAME,
247247

248+
249+
/**
250+
* support for MV_CONTAINS_ALL function
251+
*/
252+
FN_MV_CONTAINS_ALL,
253+
254+
248255
/**
249256
* Fixes for multiple functions not serializing their source, and emitting warnings with wrong line number and text.
250257
*/

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvContainsAll.java

Lines changed: 48 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,17 @@
1515
import org.elasticsearch.compute.data.BooleanBlock;
1616
import org.elasticsearch.compute.data.BytesRefBlock;
1717
import org.elasticsearch.compute.data.DoubleBlock;
18+
import org.elasticsearch.compute.data.ElementType;
1819
import org.elasticsearch.compute.data.IntBlock;
1920
import org.elasticsearch.compute.data.LongBlock;
20-
import org.elasticsearch.compute.operator.EvalOperator;
21+
import org.elasticsearch.compute.data.Page;
22+
import org.elasticsearch.compute.operator.DriverContext;
2123
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
24+
import org.elasticsearch.core.Releasables;
2225
import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException;
2326
import org.elasticsearch.xpack.esql.core.expression.Expression;
2427
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
28+
import org.elasticsearch.xpack.esql.core.expression.Nullability;
2529
import org.elasticsearch.xpack.esql.core.expression.function.scalar.BinaryScalarFunction;
2630
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
2731
import org.elasticsearch.xpack.esql.core.tree.Source;
@@ -50,17 +54,16 @@ public class MvContainsAll extends BinaryScalarFunction implements EvaluatorMapp
5054
"MvContainsAll",
5155
MvContainsAll::new
5256
);
53-
private DataType dataType;
5457

5558
@FunctionInfo(
5659
returnType = "boolean",
57-
description = "\"Checks if all values yielded by the second multivalue expression are present in the values yielded by "
58-
+ "the first multivalue expression. Returns a boolean, or null if either expression is null.",
60+
description = "Checks if all values yielded by the second multivalue expression are present in the values yielded by "
61+
+ "the first multivalue expression. Returns a boolean. Null values are treated as an empty set.",
5962
examples = {
6063
@Example(file = "string", tag = "mv_contains_all"),
6164
@Example(file = "string", tag = "mv_contains_all_bothsides"),
62-
@Example(file = "string", tag = "mv_contains_all_where"), },
63-
appliesTo = { @FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.PREVIEW, version = "9.2.0") }
65+
@Example(file = "string", tag = "mv_contains_all_where"),},
66+
appliesTo = {@FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.PREVIEW, version = "9.2.0")}
6467
)
6568
public MvContainsAll(
6669
Source source,
@@ -81,7 +84,7 @@ public MvContainsAll(
8184
"long",
8285
"text",
8386
"unsigned_long",
84-
"version" },
87+
"version"},
8588
description = "Multivalue expression."
8689
) Expression superset,
8790
@Param(
@@ -101,7 +104,7 @@ public MvContainsAll(
101104
"long",
102105
"text",
103106
"unsigned_long",
104-
"version" },
107+
"version"},
105108
description = "Multivalue expression."
106109
) Expression subset
107110
) {
@@ -127,24 +130,24 @@ protected TypeResolution resolveType() {
127130
if (resolution.unresolved()) {
128131
return resolution;
129132
}
130-
dataType = left().dataType() == DataType.NULL ? DataType.NULL : DataType.BOOLEAN;
131133
if (left().dataType() == DataType.NULL) {
132-
dataType = right().dataType() == DataType.NULL ? DataType.NULL : DataType.BOOLEAN;
133134
return isRepresentableExceptCounters(right(), sourceText(), SECOND);
134135
}
135136
return isType(right(), t -> t.noText() == left().dataType().noText(), sourceText(), SECOND, left().dataType().noText().typeName());
136137
}
137138

138139
@Override
139140
public DataType dataType() {
140-
if (dataType == null) {
141-
resolveType();
142-
}
143-
return dataType;
141+
return DataType.BOOLEAN;
142+
}
143+
144+
@Override
145+
public Nullability nullable() {
146+
return Nullability.FALSE;
144147
}
145148

146149
@Override
147-
protected BinaryScalarFunction replaceChildren(Expression newLeft, Expression newRight) {
150+
protected MvContainsAll replaceChildren(Expression newLeft, Expression newRight) {
148151
return new MvContainsAll(source(), newLeft, newRight);
149152
}
150153

@@ -162,7 +165,7 @@ public Object fold(FoldContext ctx) {
162165
public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
163166
var supersetType = PlannerUtils.toElementType(left().dataType());
164167
var subsetType = PlannerUtils.toElementType(right().dataType());
165-
if (supersetType != subsetType) {
168+
if (supersetType != ElementType.NULL && subsetType != ElementType.NULL && supersetType != subsetType) {
166169
throw new EsqlIllegalArgumentException(
167170
"Incompatible data types for MvContainsAll, superset type({}) value({}) and subset type({}) value({}) don't match.",
168171
supersetType,
@@ -177,11 +180,12 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
177180
case DOUBLE -> new MvContainsAllDoubleEvaluator.Factory(source(), toEvaluator.apply(left()), toEvaluator.apply(right()));
178181
case INT -> new MvContainsAllIntEvaluator.Factory(source(), toEvaluator.apply(left()), toEvaluator.apply(right()));
179182
case LONG -> new MvContainsAllLongEvaluator.Factory(source(), toEvaluator.apply(left()), toEvaluator.apply(right()));
180-
case NULL -> EvalOperator.CONSTANT_NULL_FACTORY;
183+
case NULL -> new MvContainsAllNullEvaluator(toEvaluator.apply(right()));
181184
default -> throw EsqlIllegalArgumentException.illegalDataType(dataType());
182185
};
183186
}
184187

188+
185189
@Evaluator(extraName = "Int")
186190
static void process(BooleanBlock.Builder builder, int position, IntBlock field1, IntBlock field2) {
187191
appendTo(builder, containsAll(field1, field2, position, IntBlock::getInt));
@@ -228,10 +232,10 @@ static void appendTo(BooleanBlock.Builder builder, Boolean bool) {
228232
* A block is considered a subset if the superset contains values that test equal for all the values in the subset, independent of
229233
* order. Duplicates are ignored in the sense that for each duplicate in the subset, we will search/match against the first/any value
230234
* in the superset.
235+
*
231236
* @param superset block to check against
232-
* @param subset block containing values that should be present in the other block.
233-
* @return {@code true} if the given blocks are a superset and subset to each other, {@code false} if not and {@code null} if the subset
234-
* or superset contains only null values.
237+
* @param subset block containing values that should be present in the other block.
238+
* @return {@code true} if the given blocks are a superset and subset to each other, {@code false} if not.
235239
*/
236240
static <BlockType extends Block, Type> Boolean containsAll(
237241
BlockType superset,
@@ -242,8 +246,8 @@ static <BlockType extends Block, Type> Boolean containsAll(
242246
if (superset == subset) {
243247
return true;
244248
}
245-
if (subset.areAllValuesNull() || superset.areAllValuesNull()) {
246-
return null;
249+
if (subset.areAllValuesNull()) {
250+
return true;
247251
}
248252

249253
final var subsetCount = subset.getValueCount(position);
@@ -283,4 +287,26 @@ static <BlockType extends Block, Type> boolean hasValue(
283287
interface ValueExtractor<BlockType extends Block, Type> {
284288
Type extractValue(BlockType block, int position);
285289
}
290+
291+
private record MvContainsAllNullEvaluator(ExpressionEvaluator.Factory toEvaluator) implements ExpressionEvaluator.Factory {
292+
@Override
293+
public ExpressionEvaluator get(DriverContext context) {
294+
return new ExpressionEvaluator() {
295+
final ExpressionEvaluator subsetField = toEvaluator.get(context);
296+
297+
@Override
298+
public Block eval(Page page) {
299+
try (Block block = subsetField.eval(page)) {
300+
var position = page.getPositionCount();
301+
return context.blockFactory().newConstantBooleanBlockWith(block.isNull(position), position);
302+
}
303+
}
304+
305+
@Override
306+
public void close() {
307+
Releasables.closeExpectNoException(subsetField);
308+
}
309+
};
310+
}
311+
}
286312
}

0 commit comments

Comments
 (0)