Skip to content

Commit 8ebc808

Browse files
kderussoelasticsearchmachineleemthompo
authored
Fix CHUNK and TOP_SNIPPETS documentation to include optional MapExpression params (elastic#139945)
* Fix TOP_SNIPPETS docs * Fix CHUNK docs * [CI] Auto commit changes from spotless * Update x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/TopSnippets.java Co-authored-by: Liam Thompson <[email protected]> * Regenerate docs with updated link * Spotless * Replace missing period symbol --------- Co-authored-by: elasticsearchmachine <[email protected]> Co-authored-by: Liam Thompson <[email protected]>
1 parent 00bdb63 commit 8ebc808

File tree

8 files changed

+227
-33
lines changed

8 files changed

+227
-33
lines changed

docs/reference/query-languages/esql/_snippets/functions/parameters/top_snippets.md

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/query-languages/esql/_snippets/functions/types/chunk.md

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/query-languages/esql/_snippets/functions/types/top_snippets.md

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/query-languages/esql/kibana/definition/functions/chunk.json

Lines changed: 38 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/query-languages/esql/kibana/definition/functions/top_snippets.json

Lines changed: 50 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/TopSnippets.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,16 +92,17 @@ public TopSnippets(
9292
""") Expression query,
9393
@MapParam(
9494
name = "options",
95-
description = "Options to customize snippet extraction behavior.",
95+
description = "(Optional) TopSnippets additional options as "
96+
+ "[function named parameters](/reference/query-languages/esql/esql-syntax.md#esql-function-named-params).",
9697
optional = true,
9798
params = {
9899
@MapParam.MapParamEntry(
99100
name = "num_snippets",
100-
type = { "integer" },
101+
type = "integer",
101102
description = "The maximum number of matching snippets to return.",
102103
valueHint = { "3" }
103104
),
104-
@MapParam.MapParamEntry(name = "num_words", type = { "integer" }, description = """
105+
@MapParam.MapParamEntry(name = "num_words", type = "integer", description = """
105106
The maximum number of words to return in each snippet.
106107
This allows better control of inference costs by limiting the size of tokens per snippet.
107108
""", valueHint = { "300" }) }

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ChunkTests.java

Lines changed: 61 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636

3737
import static org.elasticsearch.compute.data.BlockUtils.toJavaObject;
3838
import static org.elasticsearch.xpack.core.inference.chunking.ChunkingSettingsTests.createRandomChunkingSettings;
39+
import static org.elasticsearch.xpack.esql.core.type.DataType.UNSUPPORTED;
3940
import static org.elasticsearch.xpack.esql.expression.function.scalar.string.Chunk.ALLOWED_CHUNKING_SETTING_OPTIONS;
4041
import static org.elasticsearch.xpack.esql.expression.function.scalar.string.Chunk.DEFAULT_CHUNKING_SETTINGS;
4142
import static org.elasticsearch.xpack.esql.expression.function.scalar.util.ChunkUtils.chunkText;
@@ -64,42 +65,70 @@ private static String randomWordsBetween(int min, int max) {
6465

6566
@ParametersFactory
6667
public static Iterable<Object[]> parameters() {
67-
return parameterSuppliersFromTypedDataWithDefaultChecks(
68-
true,
69-
List.of(new TestCaseSupplier("Chunk with defaults", List.of(DataType.KEYWORD), () -> {
70-
String text = randomWordsBetween(25, 50);
71-
ChunkingSettings chunkingSettings = new SentenceBoundaryChunkingSettings(Chunk.DEFAULT_CHUNK_SIZE, 0);
68+
return parameterSuppliersFromTypedData(testCaseSuppliers());
69+
}
7270

73-
List<String> chunks = chunkText(text, chunkingSettings);
74-
Object expectedResult = chunks.size() == 1
75-
? new BytesRef(chunks.get(0).trim())
76-
: chunks.stream().map(s -> new BytesRef(s.trim())).toList();
71+
private static List<TestCaseSupplier> testCaseSuppliers() {
72+
List<TestCaseSupplier> suppliers = new ArrayList<>();
73+
suppliers.add(createTestCaseSupplier("Chunk with defaults", DataType.KEYWORD));
74+
suppliers.add(createTestCaseSupplier("Chunk with defaults text input", DataType.TEXT));
75+
return addFunctionNamedParams(suppliers);
76+
}
7777

78-
return new TestCaseSupplier.TestCase(
79-
List.of(new TestCaseSupplier.TypedData(new BytesRef(text), DataType.KEYWORD, "str")),
80-
"ChunkBytesRefEvaluator[str=Attribute[channel=0], "
81-
+ "chunkingSettings={\"strategy\":\"sentence\",\"max_chunk_size\":300,\"sentence_overlap\":0}]",
82-
DataType.KEYWORD,
83-
equalTo(expectedResult)
84-
);
85-
}), new TestCaseSupplier("Chunk with defaults text input", List.of(DataType.TEXT), () -> {
78+
private static TestCaseSupplier createTestCaseSupplier(String description, DataType fieldDataType) {
79+
return new TestCaseSupplier(description, List.of(fieldDataType), () -> {
80+
String text = randomWordsBetween(25, 50);
81+
ChunkingSettings chunkingSettings = new SentenceBoundaryChunkingSettings(Chunk.DEFAULT_CHUNK_SIZE, 0);
82+
83+
List<String> chunks = chunkText(text, chunkingSettings);
84+
Object expectedResult = chunks.size() == 1
85+
? new BytesRef(chunks.get(0).trim())
86+
: chunks.stream().map(s -> new BytesRef(s.trim())).toList();
87+
88+
return new TestCaseSupplier.TestCase(
89+
List.of(new TestCaseSupplier.TypedData(new BytesRef(text), fieldDataType, "str")),
90+
"ChunkBytesRefEvaluator[str=Attribute[channel=0], "
91+
+ "chunkingSettings={\"strategy\":\"sentence\",\"max_chunk_size\":300,\"sentence_overlap\":0}]",
92+
DataType.KEYWORD,
93+
equalTo(expectedResult)
94+
);
95+
});
96+
}
97+
98+
/**
99+
* Adds function named parameters to all the test case suppliers provided
100+
*/
101+
private static List<TestCaseSupplier> addFunctionNamedParams(List<TestCaseSupplier> suppliers) {
102+
List<TestCaseSupplier> result = new ArrayList<>(suppliers);
103+
for (TestCaseSupplier supplier : suppliers) {
104+
List<DataType> dataTypes = new ArrayList<>(supplier.types());
105+
dataTypes.add(UNSUPPORTED);
106+
result.add(new TestCaseSupplier(supplier.name() + ", with chunking_settings", dataTypes, () -> {
86107
String text = randomWordsBetween(25, 50);
87-
ChunkingSettings chunkingSettings = new SentenceBoundaryChunkingSettings(Chunk.DEFAULT_CHUNK_SIZE, 0);
108+
int chunkSize = 25;
109+
ChunkingSettings chunkingSettings = new SentenceBoundaryChunkingSettings(chunkSize, 0);
88110

89111
List<String> chunks = chunkText(text, chunkingSettings);
90112
Object expectedResult = chunks.size() == 1
91113
? new BytesRef(chunks.get(0).trim())
92114
: chunks.stream().map(s -> new BytesRef(s.trim())).toList();
93115

116+
List<TestCaseSupplier.TypedData> values = List.of(
117+
new TestCaseSupplier.TypedData(new BytesRef(text), supplier.types().get(0), "str"),
118+
new TestCaseSupplier.TypedData(createChunkingSettings(chunkingSettings), UNSUPPORTED, "chunking_settings")
119+
.forceLiteral()
120+
);
121+
94122
return new TestCaseSupplier.TestCase(
95-
List.of(new TestCaseSupplier.TypedData(new BytesRef(text), DataType.TEXT, "str")),
123+
values,
96124
"ChunkBytesRefEvaluator[str=Attribute[channel=0], "
97-
+ "chunkingSettings={\"strategy\":\"sentence\",\"max_chunk_size\":300,\"sentence_overlap\":0}]",
125+
+ "chunkingSettings={\"strategy\":\"sentence\",\"max_chunk_size\":25,\"sentence_overlap\":0}]",
98126
DataType.KEYWORD,
99127
equalTo(expectedResult)
100128
);
101-
}))
102-
);
129+
}));
130+
}
131+
return result;
103132
}
104133

105134
private static MapExpression createChunkingSettings(ChunkingSettings chunkingSettings) {
@@ -131,6 +160,16 @@ protected Expression build(Source source, List<Expression> args) {
131160
return new Chunk(source, args.get(0), options);
132161
}
133162

163+
@Override
164+
public void testFold() {
165+
Expression expression = buildFieldExpression(testCase);
166+
// Skip testFold if the expression is not foldable (e.g., when chunking_settings contains MapExpression)
167+
if (expression.foldable() == false) {
168+
return;
169+
}
170+
super.testFold();
171+
}
172+
134173
public void testDefaults() {
135174
// Default of 300 is huge, only one chunk returned in this case
136175
verifyChunks(null, 1);

0 commit comments

Comments
 (0)