better test coverage + polish code

jan-elastic · jan-elastic · commit 624865743c65 · 2025-07-16T16:34:15.000+02:00
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizePackedValuesBlockHash.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizePackedValuesBlockHash.java
@@ -56,6 +56,8 @@ public class CategorizePackedValuesBlockHash extends BlockHash {
         int emitBatchSize
     ) {
         super(blockFactory);
+        assert specs.get(0).categorizeDef() != null;
+
         this.specs = specs;
         this.aggregatorMode = aggregatorMode;
         blocks = new Block[specs.size()];
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/Options.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/Options.java
@@ -20,6 +20,7 @@
 
 import java.util.HashMap;
 import java.util.Map;
+import java.util.function.Consumer;
 
 import static org.elasticsearch.common.logging.LoggerMessageFormat.format;
 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isFoldable;
@@ -33,6 +34,16 @@ public static Expression.TypeResolution resolve(
         Source source,
         TypeResolutions.ParamOrdinal paramOrdinal,
         Map<String, DataType> allowedOptions
+    ) {
+        return resolve(options, source, paramOrdinal, allowedOptions, null);
+    }
+
+    public static Expression.TypeResolution resolve(
+        Expression options,
+        Source source,
+        TypeResolutions.ParamOrdinal paramOrdinal,
+        Map<String, DataType> allowedOptions,
+        Consumer<Map<String, Object>> verifyOptions
     ) {
         if (options != null) {
             Expression.TypeResolution resolution = isNotNull(options, source.text(), paramOrdinal);
@@ -47,6 +58,9 @@ public static Expression.TypeResolution resolve(
             try {
                 Map<String, Object> optionsMap = new HashMap<>();
                 populateMap((MapExpression) options, optionsMap, source, paramOrdinal, allowedOptions);
+                if (verifyOptions != null) {
+                    verifyOptions.accept(optionsMap);
+                }
             } catch (InvalidArgumentException e) {
                 return new Expression.TypeResolution(e.getMessage());
             }
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java
@@ -38,6 +38,7 @@
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
+import java.util.TreeMap;
 
 import static java.util.Map.entry;
 import static org.elasticsearch.common.logging.LoggerMessageFormat.format;
@@ -66,10 +67,12 @@ public class Categorize extends GroupingFunction.NonEvaluatableGroupingFunction
         Categorize::new
     );
 
-    public static final Map<String, DataType> ALLOWED_OPTIONS = Map.ofEntries(
-        entry("analyzer", KEYWORD),
-        entry("output_format", KEYWORD),
-        entry("similarity_threshold", INTEGER)
+    private static final String ANALYZER = "analyzer";
+    private static final String OUTPUT_FORMAT = "output_format";
+    private static final String SIMILARITY_THRESHOLD = "similarity_threshold";
+
+    private static final Map<String, DataType> ALLOWED_OPTIONS = new TreeMap<>(
+        Map.ofEntries(entry(ANALYZER, KEYWORD), entry(OUTPUT_FORMAT, KEYWORD), entry(SIMILARITY_THRESHOLD, INTEGER))
     );
 
     private final Expression field;
@@ -100,19 +103,19 @@ public Categorize(
             description = "(Optional) Categorize additional options as <<esql-function-named-params,function named parameters>>.",
             params = {
                 @MapParam.MapParamEntry(
-                    name = "analyzer",
+                    name = ANALYZER,
                     type = "keyword",
                     valueHint = { "standard" },
                     description = "Analyzer used to convert the field into tokens for text categorization."
                 ),
                 @MapParam.MapParamEntry(
-                    name = "output_format",
+                    name = OUTPUT_FORMAT,
                     type = "keyword",
                     valueHint = { "regex", "tokens" },
                     description = "The output format of the categories. Defaults to regex."
                 ),
                 @MapParam.MapParamEntry(
-                    name = "similarity_threshold",
+                    name = SIMILARITY_THRESHOLD,
                     type = "integer",
                     valueHint = { "70" },
                     description = "The minimum percentage of token weight that must match for text to be added to the category bucket. "
@@ -166,40 +169,43 @@ public Nullability nullable() {
 
     @Override
     protected TypeResolution resolveType() {
-        return isString(field(), sourceText(), DEFAULT).and(Options.resolve(options, source(), SECOND, ALLOWED_OPTIONS)).and(() -> {
-            try {
-                categorizeDef();
-            } catch (InvalidArgumentException e) {
-                return new TypeResolution(e.getMessage());
-            }
-            return TypeResolution.TYPE_RESOLVED;
-        });
+        return isString(field(), sourceText(), DEFAULT).and(
+            Options.resolve(options, source(), SECOND, ALLOWED_OPTIONS, this::verifyOptions)
+        );
     }
 
-    public CategorizeDef categorizeDef() {
-        Map<String, Object> optionsMap = new HashMap<>();
-        if (options != null) {
-            Options.populateMap((MapExpression) options, optionsMap, source(), SECOND, ALLOWED_OPTIONS);
+    private void verifyOptions(Map<String, Object> optionsMap) {
+        if (options == null) {
+            return;
         }
-        Integer similarityThreshold = (Integer) optionsMap.get("similarity_threshold");
+        Integer similarityThreshold = (Integer) optionsMap.get(SIMILARITY_THRESHOLD);
         if (similarityThreshold != null) {
             if (similarityThreshold <= 0 || similarityThreshold > 100) {
                 throw new InvalidArgumentException(
                     format("invalid similarity threshold [{}], expecting a number between 1 and 100, inclusive", similarityThreshold)
                 );
             }
         }
-        OutputFormat outputFormat = null;
-        String outputFormatString = (String) optionsMap.get("output_format");
-        if (outputFormatString != null) {
+        String outputFormat = (String) optionsMap.get(OUTPUT_FORMAT);
+        if (outputFormat != null) {
             try {
-                outputFormat = OutputFormat.valueOf(outputFormatString.toUpperCase(Locale.ROOT));
+                OutputFormat.valueOf(outputFormat.toUpperCase(Locale.ROOT));
             } catch (IllegalArgumentException e) {
                 throw new InvalidArgumentException(
-                    format(null, "invalid output format [{}], expecting one of [REGEX, TOKENS]", outputFormatString)
+                    format(null, "invalid output format [{}], expecting one of [REGEX, TOKENS]", outputFormat)
                 );
             }
         }
+    }
+
+    public CategorizeDef categorizeDef() {
+        Map<String, Object> optionsMap = new HashMap<>();
+        if (options != null) {
+            Options.populateMap((MapExpression) options, optionsMap, source(), SECOND, ALLOWED_OPTIONS);
+        }
+        Integer similarityThreshold = (Integer) optionsMap.get(SIMILARITY_THRESHOLD);
+        String outputFormatString = (String) optionsMap.get(OUTPUT_FORMAT);
+        OutputFormat outputFormat = outputFormatString == null ? null : OutputFormat.valueOf(outputFormatString.toUpperCase(Locale.ROOT));
         return new CategorizeDef(
             (String) optionsMap.get("analyzer"),
             outputFormat == null ? REGEX : outputFormat,
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java
@@ -1972,6 +1972,20 @@ public void testCategorizeWithFilteredAggregations() {
         );
     }
 
+    public void testCategorizeInvalidOptionsField() {
+        assumeTrue("categorize options must be enabled", EsqlCapabilities.Cap.CATEGORIZE_OPTIONS.isEnabled());
+
+        assertEquals(
+            "1:31: second argument of [CATEGORIZE(last_name, first_name)] must be a map expression, received [first_name]",
+            error("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, first_name)")
+        );
+        assertEquals(
+            "1:31: Invalid option [blah] in [CATEGORIZE(last_name, { \"blah\": 42 })], "
+                + "expected one of [analyzer, output_format, similarity_threshold]",
+            error("FROM test | STATS COUNT(*) BY CATEGORIZE(last_name, { \"blah\": 42 })")
+        );
+    }
+
     public void testCategorizeOptionOutputFormat() {
         assumeTrue("categorize options must be enabled", EsqlCapabilities.Cap.CATEGORIZE_OPTIONS.isEnabled());