Skip to content

Commit 4e681bd

Browse files
committed
add 8.x tx version; fix yaml tests; optimizations
1 parent a47b915 commit 4e681bd

File tree

8 files changed

+258
-165
lines changed

8 files changed

+258
-165
lines changed

server/src/main/java/org/elasticsearch/TransportVersions.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ static TransportVersion def(int id) {
168168
public static final TransportVersion ESQL_AGGREGATE_METRIC_DOUBLE_BLOCK_8_19 = def(8_841_0_24);
169169
public static final TransportVersion INTRODUCE_FAILURES_LIFECYCLE_BACKPORT_8_19 = def(8_841_0_25);
170170
public static final TransportVersion INTRODUCE_FAILURES_DEFAULT_RETENTION_BACKPORT_8_19 = def(8_841_0_26);
171+
public static final TransportVersion SPARSE_VECTOR_FIELD_PRUNING_OPTIONS_8_19 = def(8_841_0_27);
171172
public static final TransportVersion V_9_0_0 = def(9_000_0_09);
172173
public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_1 = def(9_000_0_10);
173174
public static final TransportVersion COHERE_BIT_EMBEDDING_TYPE_SUPPORT_ADDED = def(9_001_0_00);

server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -515,11 +515,16 @@ public static TokenPruningConfig parseIndexOptionsPruningConfig(Boolean prune, M
515515
if (pruningConfiguration == null) {
516516
return null;
517517
}
518+
519+
if (prune == null) {
520+
throw new MapperParsingException("[index_options] field [pruning_config] should only be set if [prune] is set to true");
521+
}
522+
518523
if ((pruningConfiguration instanceof Map) == false) {
519524
throw new MapperParsingException("[index_options] field [pruning_config] should be a map");
520525
}
521526

522-
if (prune != null && prune == false) {
527+
if (prune == false) {
523528
throw new MapperParsingException("[index_options] field [pruning_config] should not be set if [prune] is false");
524529
}
525530

server/src/main/java/org/elasticsearch/index/mapper/vectors/TokenPruningConfig.java

Lines changed: 13 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -204,33 +204,28 @@ public static TokenPruningConfig parseFromMap(Map<String, Object> pruningConfigM
204204
return null;
205205
}
206206

207-
private static Float parseFloatNumberFromObject(Object numberObject) {
207+
private static Float parseFloatNumberFromObject(Object numberObject, String fieldName, String exceptionDetails) {
208208
if (numberObject instanceof Integer intValue) {
209209
return (float) intValue;
210210
} else if (numberObject instanceof Float floatValue) {
211211
return floatValue;
212212
} else if (numberObject instanceof Double doubleValue) {
213213
return ((Double) numberObject).floatValue();
214214
}
215-
return null;
215+
216+
throw new MapperParsingException("[" + PRUNING_CONFIG_FIELD + "] field [" + fieldName + "]" + exceptionDetails);
216217
}
217218

218219
private static Float parseTokensWeightThreshold(Object mappedTokensWeightThreshold) {
219220
if (mappedTokensWeightThreshold == null) {
220221
return DEFAULT_TOKENS_WEIGHT_THRESHOLD;
221222
}
222223

223-
Float tokensWeightThreshold = parseFloatNumberFromObject(mappedTokensWeightThreshold);
224-
225-
if (tokensWeightThreshold == null) {
226-
throw new MapperParsingException(
227-
"["
228-
+ PRUNING_CONFIG_FIELD
229-
+ "] field ["
230-
+ TOKENS_WEIGHT_THRESHOLD.getPreferredName()
231-
+ "] field should be a number between 0.0 and 1.0"
232-
);
233-
}
224+
Float tokensWeightThreshold = parseFloatNumberFromObject(
225+
mappedTokensWeightThreshold,
226+
TOKENS_WEIGHT_THRESHOLD.getPreferredName(),
227+
"field should be a number between 0.0 and 1.0"
228+
);
234229

235230
if (tokensWeightThreshold < MIN_TOKENS_WEIGHT_THRESHOLD || tokensWeightThreshold > MAX_TOKENS_WEIGHT_THRESHOLD) {
236231
throw new MapperParsingException(
@@ -249,17 +244,11 @@ private static Float parseTokensFreqRatioThreshold(Object mappedTokensFreqRatioT
249244
return DEFAULT_TOKENS_FREQ_RATIO_THRESHOLD;
250245
}
251246

252-
Float tokensFreqRatioThreshold = parseFloatNumberFromObject(mappedTokensFreqRatioThreshold);
253-
254-
if (tokensFreqRatioThreshold == null) {
255-
throw new MapperParsingException(
256-
"["
257-
+ PRUNING_CONFIG_FIELD
258-
+ "] field ["
259-
+ TOKENS_FREQ_RATIO_THRESHOLD.getPreferredName()
260-
+ "] field should be a number between 1 and 100"
261-
);
262-
}
247+
Float tokensFreqRatioThreshold = parseFloatNumberFromObject(
248+
mappedTokensFreqRatioThreshold,
249+
TOKENS_FREQ_RATIO_THRESHOLD.getPreferredName(),
250+
"field should be a number between 1 and 100"
251+
);
263252

264253
if (tokensFreqRatioThreshold < MIN_TOKENS_FREQ_RATIO_THRESHOLD || tokensFreqRatioThreshold > MAX_TOKENS_FREQ_RATIO_THRESHOLD) {
265254
throw new MapperParsingException(

server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ protected void mappingWithIndexOptionsPrune(XContentBuilder b) throws IOExceptio
8888
protected void mappingWithIndexOptionsPruningConfig(XContentBuilder b) throws IOException {
8989
b.field("type", "sparse_vector");
9090
b.startObject("index_options");
91+
b.field("prune", true);
9192
b.startObject("pruning_config");
9293
b.field("tokens_freq_ratio_threshold", 5.0);
9394
b.field("tokens_weight_threshold", 0.4);
@@ -296,29 +297,64 @@ public void testPruningConfigurationIsMap() {
296297
Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
297298
b.field("type", "sparse_vector");
298299
b.startObject("index_options");
300+
b.field("prune", true);
299301
b.field("pruning_config", "this_is_not_a_map");
300302
b.endObject();
301303
})));
302304
assertThat(e.getMessage(), containsString("index_options] field [pruning_config] should be a map"));
303305
}
304306

307+
public void testWithIndexOptionsPruningConfigPruneRequired() throws Exception {
308+
309+
Exception eTestPruneIsFalse = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
310+
b.field("type", "sparse_vector");
311+
b.startObject("index_options");
312+
b.field("prune", false);
313+
b.startObject("pruning_config");
314+
b.field("tokens_freq_ratio_threshold", 5.0);
315+
b.field("tokens_weight_threshold", 0.4);
316+
b.endObject();
317+
b.endObject();
318+
})));
319+
assertThat(
320+
eTestPruneIsFalse.getMessage(),
321+
containsString("Failed to parse mapping: [index_options] field [pruning_config] should not be set if [prune] is false")
322+
);
323+
324+
Exception eTestPruneIsMissing = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
325+
b.field("type", "sparse_vector");
326+
b.startObject("index_options");
327+
b.startObject("pruning_config");
328+
b.field("tokens_freq_ratio_threshold", 5.0);
329+
b.field("tokens_weight_threshold", 0.4);
330+
b.endObject();
331+
b.endObject();
332+
})));
333+
assertThat(
334+
eTestPruneIsMissing.getMessage(),
335+
containsString("[index_options] field [pruning_config] should only be set if [prune] is set to true")
336+
);
337+
}
338+
305339
public void testTokensFreqRatioCorrect() {
306340
Exception eTestInteger = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
307341
b.field("type", "sparse_vector");
308342
b.startObject("index_options");
343+
b.field("prune", true);
309344
b.startObject("pruning_config");
310345
b.field("tokens_freq_ratio_threshold", "notaninteger");
311346
b.endObject();
312347
b.endObject();
313348
})));
314349
assertThat(
315350
eTestInteger.getMessage(),
316-
containsString("[pruning_config] field [tokens_freq_ratio_threshold] field should be a number between 1 and 100")
351+
containsString("Failed to parse mapping: [pruning_config] field [tokens_freq_ratio_threshold]field should be a number between 1 and 100")
317352
);
318353

319354
Exception eTestRangeLower = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
320355
b.field("type", "sparse_vector");
321356
b.startObject("index_options");
357+
b.field("prune", true);
322358
b.startObject("pruning_config");
323359
b.field("tokens_freq_ratio_threshold", -2);
324360
b.endObject();
@@ -332,6 +368,7 @@ public void testTokensFreqRatioCorrect() {
332368
Exception eTestRangeHigher = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
333369
b.field("type", "sparse_vector");
334370
b.startObject("index_options");
371+
b.field("prune", true);
335372
b.startObject("pruning_config");
336373
b.field("tokens_freq_ratio_threshold", 101);
337374
b.endObject();
@@ -347,19 +384,21 @@ public void testTokensWeightThresholdCorrect() {
347384
Exception eTestDouble = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
348385
b.field("type", "sparse_vector");
349386
b.startObject("index_options");
387+
b.field("prune", true);
350388
b.startObject("pruning_config");
351389
b.field("tokens_weight_threshold", "notadouble");
352390
b.endObject();
353391
b.endObject();
354392
})));
355393
assertThat(
356394
eTestDouble.getMessage(),
357-
containsString("[pruning_config] field [tokens_weight_threshold] field should be a number between 0.0 and 1.0")
395+
containsString("Failed to parse mapping: [pruning_config] field [tokens_weight_threshold]field should be a number between 0.0 and 1.0")
358396
);
359397

360398
Exception eTestRangeLower = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
361399
b.field("type", "sparse_vector");
362400
b.startObject("index_options");
401+
b.field("prune", true);
363402
b.startObject("pruning_config");
364403
b.field("tokens_weight_threshold", -0.1);
365404
b.endObject();
@@ -373,6 +412,7 @@ public void testTokensWeightThresholdCorrect() {
373412
Exception eTestRangeHigher = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
374413
b.field("type", "sparse_vector");
375414
b.startObject("index_options");
415+
b.field("prune", true);
376416
b.startObject("pruning_config");
377417
b.field("tokens_weight_threshold", 1.1);
378418
b.endObject();

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,9 @@ protected Query doToQuery(SearchExecutionContext context) throws IOException {
235235
);
236236
}
237237

238+
Boolean pruneTokensToUse = shouldPruneTokens;
239+
TokenPruningConfig pruningConfigToUse = tokenPruningConfig;
240+
238241
// if the query options for pruning are not set,
239242
// we need to check the index options for this field
240243
// and use those if set - however, only if the index
@@ -247,13 +250,12 @@ protected Query doToQuery(SearchExecutionContext context) throws IOException {
247250
sparseVectorFieldMapper
248251
);
249252

250-
return pruningOptions.pruneTokens
251-
? WeightedTokensUtils.queryBuilderWithPrunedTokens(fieldName, pruningOptions.pruningConfig, queryVectors, ft, context)
252-
: WeightedTokensUtils.queryBuilderWithAllTokens(fieldName, queryVectors, ft, context);
253+
pruneTokensToUse = pruningOptions.pruneTokens;
254+
pruningConfigToUse = pruningOptions.pruningConfig;
253255
}
254256

255-
return (shouldPruneTokens != null && shouldPruneTokens)
256-
? WeightedTokensUtils.queryBuilderWithPrunedTokens(fieldName, tokenPruningConfig, queryVectors, ft, context)
257+
return (pruneTokensToUse != null && pruneTokensToUse)
258+
? WeightedTokensUtils.queryBuilderWithPrunedTokens(fieldName, pruningConfigToUse, queryVectors, ft, context)
257259
: WeightedTokensUtils.queryBuilderWithAllTokens(fieldName, queryVectors, ft, context);
258260
}
259261

0 commit comments

Comments
 (0)