Skip to content

Commit 6248657

Browse files
committed
better test coverage + polish code
1 parent bc0cbf8 commit 6248657

File tree

4 files changed

+61
-25
lines changed

4 files changed

+61
-25
lines changed

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizePackedValuesBlockHash.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ public class CategorizePackedValuesBlockHash extends BlockHash {
5656
int emitBatchSize
5757
) {
5858
super(blockFactory);
59+
assert specs.get(0).categorizeDef() != null;
60+
5961
this.specs = specs;
6062
this.aggregatorMode = aggregatorMode;
6163
blocks = new Block[specs.size()];

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/Options.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import java.util.HashMap;
2222
import java.util.Map;
23+
import java.util.function.Consumer;
2324

2425
import static org.elasticsearch.common.logging.LoggerMessageFormat.format;
2526
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isFoldable;
@@ -33,6 +34,16 @@ public static Expression.TypeResolution resolve(
3334
Source source,
3435
TypeResolutions.ParamOrdinal paramOrdinal,
3536
Map<String, DataType> allowedOptions
37+
) {
38+
return resolve(options, source, paramOrdinal, allowedOptions, null);
39+
}
40+
41+
public static Expression.TypeResolution resolve(
42+
Expression options,
43+
Source source,
44+
TypeResolutions.ParamOrdinal paramOrdinal,
45+
Map<String, DataType> allowedOptions,
46+
Consumer<Map<String, Object>> verifyOptions
3647
) {
3748
if (options != null) {
3849
Expression.TypeResolution resolution = isNotNull(options, source.text(), paramOrdinal);
@@ -47,6 +58,9 @@ public static Expression.TypeResolution resolve(
4758
try {
4859
Map<String, Object> optionsMap = new HashMap<>();
4960
populateMap((MapExpression) options, optionsMap, source, paramOrdinal, allowedOptions);
61+
if (verifyOptions != null) {
62+
verifyOptions.accept(optionsMap);
63+
}
5064
} catch (InvalidArgumentException e) {
5165
return new Expression.TypeResolution(e.getMessage());
5266
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java

Lines changed: 31 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import java.util.List;
3939
import java.util.Locale;
4040
import java.util.Map;
41+
import java.util.TreeMap;
4142

4243
import static java.util.Map.entry;
4344
import static org.elasticsearch.common.logging.LoggerMessageFormat.format;
@@ -66,10 +67,12 @@ public class Categorize extends GroupingFunction.NonEvaluatableGroupingFunction
6667
Categorize::new
6768
);
6869

69-
public static final Map<String, DataType> ALLOWED_OPTIONS = Map.ofEntries(
70-
entry("analyzer", KEYWORD),
71-
entry("output_format", KEYWORD),
72-
entry("similarity_threshold", INTEGER)
70+
private static final String ANALYZER = "analyzer";
71+
private static final String OUTPUT_FORMAT = "output_format";
72+
private static final String SIMILARITY_THRESHOLD = "similarity_threshold";
73+
74+
private static final Map<String, DataType> ALLOWED_OPTIONS = new TreeMap<>(
75+
Map.ofEntries(entry(ANALYZER, KEYWORD), entry(OUTPUT_FORMAT, KEYWORD), entry(SIMILARITY_THRESHOLD, INTEGER))
7376
);
7477

7578
private final Expression field;
@@ -100,19 +103,19 @@ public Categorize(
100103
description = "(Optional) Categorize additional options as <<esql-function-named-params,function named parameters>>.",
101104
params = {
102105
@MapParam.MapParamEntry(
103-
name = "analyzer",
106+
name = ANALYZER,
104107
type = "keyword",
105108
valueHint = { "standard" },
106109
description = "Analyzer used to convert the field into tokens for text categorization."
107110
),
108111
@MapParam.MapParamEntry(
109-
name = "output_format",
112+
name = OUTPUT_FORMAT,
110113
type = "keyword",
111114
valueHint = { "regex", "tokens" },
112115
description = "The output format of the categories. Defaults to regex."
113116
),
114117
@MapParam.MapParamEntry(
115-
name = "similarity_threshold",
118+
name = SIMILARITY_THRESHOLD,
116119
type = "integer",
117120
valueHint = { "70" },
118121
description = "The minimum percentage of token weight that must match for text to be added to the category bucket. "
@@ -166,40 +169,43 @@ public Nullability nullable() {
166169

167170
@Override
168171
protected TypeResolution resolveType() {
169-
return isString(field(), sourceText(), DEFAULT).and(Options.resolve(options, source(), SECOND, ALLOWED_OPTIONS)).and(() -> {
170-
try {
171-
categorizeDef();
172-
} catch (InvalidArgumentException e) {
173-
return new TypeResolution(e.getMessage());
174-
}
175-
return TypeResolution.TYPE_RESOLVED;
176-
});
172+
return isString(field(), sourceText(), DEFAULT).and(
173+
Options.resolve(options, source(), SECOND, ALLOWED_OPTIONS, this::verifyOptions)
174+
);
177175
}
178176

179-
public CategorizeDef categorizeDef() {
180-
Map<String, Object> optionsMap = new HashMap<>();
181-
if (options != null) {
182-
Options.populateMap((MapExpression) options, optionsMap, source(), SECOND, ALLOWED_OPTIONS);
177+
private void verifyOptions(Map<String, Object> optionsMap) {
178+
if (options == null) {
179+
return;
183180
}
184-
Integer similarityThreshold = (Integer) optionsMap.get("similarity_threshold");
181+
Integer similarityThreshold = (Integer) optionsMap.get(SIMILARITY_THRESHOLD);
185182
if (similarityThreshold != null) {
186183
if (similarityThreshold <= 0 || similarityThreshold > 100) {
187184
throw new InvalidArgumentException(
188185
format("invalid similarity threshold [{}], expecting a number between 1 and 100, inclusive", similarityThreshold)
189186
);
190187
}
191188
}
192-
OutputFormat outputFormat = null;
193-
String outputFormatString = (String) optionsMap.get("output_format");
194-
if (outputFormatString != null) {
189+
String outputFormat = (String) optionsMap.get(OUTPUT_FORMAT);
190+
if (outputFormat != null) {
195191
try {
196-
outputFormat = OutputFormat.valueOf(outputFormatString.toUpperCase(Locale.ROOT));
192+
OutputFormat.valueOf(outputFormat.toUpperCase(Locale.ROOT));
197193
} catch (IllegalArgumentException e) {
198194
throw new InvalidArgumentException(
199-
format(null, "invalid output format [{}], expecting one of [REGEX, TOKENS]", outputFormatString)
195+
format(null, "invalid output format [{}], expecting one of [REGEX, TOKENS]", outputFormat)
200196
);
201197
}
202198
}
199+
}
200+
201+
public CategorizeDef categorizeDef() {
202+
Map<String, Object> optionsMap = new HashMap<>();
203+
if (options != null) {
204+
Options.populateMap((MapExpression) options, optionsMap, source(), SECOND, ALLOWED_OPTIONS);
205+
}
206+
Integer similarityThreshold = (Integer) optionsMap.get(SIMILARITY_THRESHOLD);
207+
String outputFormatString = (String) optionsMap.get(OUTPUT_FORMAT);
208+
OutputFormat outputFormat = outputFormatString == null ? null : OutputFormat.valueOf(outputFormatString.toUpperCase(Locale.ROOT));
203209
return new CategorizeDef(
204210
(String) optionsMap.get("analyzer"),
205211
outputFormat == null ? REGEX : outputFormat,

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1972,6 +1972,20 @@ public void testCategorizeWithFilteredAggregations() {
19721972
);
19731973
}
19741974

1975+
public void testCategorizeInvalidOptionsField() {
1976+
assumeTrue("categorize options must be enabled", EsqlCapabilities.Cap.CATEGORIZE_OPTIONS.isEnabled());
1977+
1978+
assertEquals(
1979+
"1:31: second argument of [CATEGORIZE(last_name, first_name)] must be a map expression, received [first_name]",
1980+
error("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, first_name)")
1981+
);
1982+
assertEquals(
1983+
"1:31: Invalid option [blah] in [CATEGORIZE(last_name, { \"blah\": 42 })], "
1984+
+ "expected one of [analyzer, output_format, similarity_threshold]",
1985+
error("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, { \"blah\": 42 })")
1986+
);
1987+
}
1988+
19751989
public void testCategorizeOptionOutputFormat() {
19761990
assumeTrue("categorize options must be enabled", EsqlCapabilities.Cap.CATEGORIZE_OPTIONS.isEnabled());
19771991

0 commit comments

Comments
 (0)