Skip to content

Commit be78331

Browse files
committed
some cleanups; refactoring
1 parent d8f3c63 commit be78331

File tree

9 files changed

+149
-109
lines changed

9 files changed

+149
-109
lines changed

docs/changelog/126739.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
pr: 126739
22
summary: Update `sparse_vector` field mapping to include default setting for token
33
pruning
4-
area: Relevance
4+
area: Mapping
55
type: enhancement
66
issues: []

libs/x-content/src/main/java/org/elasticsearch/xcontent/AbstractObjectParser.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,15 @@ public void declareBoolean(BiConsumer<Value, Boolean> consumer, ParseField field
287287
declareField(consumer, XContentParser::booleanValue, field, ValueType.BOOLEAN);
288288
}
289289

290+
public void declareBooleanOrNull(BiConsumer<Value, Boolean> consumer, boolean nullValue, ParseField field) {
291+
declareField(
292+
consumer,
293+
p -> p.currentToken() == XContentParser.Token.VALUE_NULL ? nullValue : p.booleanValue(),
294+
field,
295+
ValueType.BOOLEAN_OR_NULL
296+
);
297+
}
298+
290299
public <T> void declareObjectArray(BiConsumer<Value, List<T>> consumer, ContextParser<Context, T> objectParser, ParseField field) {
291300
declareFieldArray(consumer, objectParser, field, ValueType.OBJECT_ARRAY);
292301
}

server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ public class SparseVectorFieldMapper extends FieldMapper {
7878

7979
private final SparseVectorFieldMapper.IndexOptions indexOptions;
8080

81-
public static final NodeFeature SPARSE_VECTOR_INDEX_OPTIONS_FEATURE = new NodeFeature("sparse_vector_index_options_supported");
81+
public static final NodeFeature SPARSE_VECTOR_INDEX_OPTIONS_FEATURE = new NodeFeature("sparse_vector.index_options_supported");
8282

8383
private static SparseVectorFieldMapper toType(FieldMapper in) {
8484
return (SparseVectorFieldMapper) in;
@@ -484,7 +484,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
484484
}
485485

486486
public static Boolean parseIndexOptionsPruneValue(Map<String, Object> indexOptionsMap) {
487-
Object shouldPrune = indexOptionsMap.remove(IndexOptions.PRUNE_FIELD_NAME);
487+
Object shouldPrune = indexOptionsMap.get(IndexOptions.PRUNE_FIELD_NAME);
488488
if (shouldPrune == null) {
489489
return null;
490490
}
@@ -493,17 +493,31 @@ public static Boolean parseIndexOptionsPruneValue(Map<String, Object> indexOptio
493493
return boolValue;
494494
}
495495

496-
throw new MapperParsingException("[index_options] field [prune] should be true or false");
496+
throw new MapperParsingException(
497+
"["
498+
+ SPARSE_VECTOR_INDEX_OPTIONS
499+
+ "] field ["
500+
+ PRUNE_FIELD_NAME
501+
+ "] should be true or false"
502+
);
497503
}
498504

499505
public static TokenPruningConfig parseIndexOptionsPruningConfig(Boolean prune, Map<String, Object> indexOptionsMap) {
500-
Object pruningConfiguration = indexOptionsMap.remove(IndexOptions.PRUNING_CONFIG_FIELD_NAME);
506+
Object pruningConfiguration = indexOptionsMap.get(IndexOptions.PRUNING_CONFIG_FIELD_NAME);
501507
if (pruningConfiguration == null) {
502508
return null;
503509
}
504510

505511
if (prune == null || prune == false) {
506-
throw new MapperParsingException("[index_options] field [pruning_config] should only be set if [prune] is set to true");
512+
throw new MapperParsingException(
513+
"["
514+
+ SPARSE_VECTOR_INDEX_OPTIONS
515+
+ "] field ["
516+
+ PRUNING_CONFIG_FIELD_NAME
517+
+"] should only be set if ["
518+
+ PRUNE_FIELD_NAME
519+
+ "] is set to true"
520+
);
507521
}
508522

509523
Map<String, Object> pruningConfigurationMap = XContentMapValues.nodeMapValue(pruningConfiguration, PRUNING_CONFIG_FIELD_NAME);

server/src/main/java/org/elasticsearch/index/mapper/vectors/TokenPruningConfig.java

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,32 @@
99

1010
package org.elasticsearch.index.mapper.vectors;
1111

12+
import org.elasticsearch.ElasticsearchException;
1213
import org.elasticsearch.common.ParsingException;
1314
import org.elasticsearch.common.io.stream.StreamInput;
1415
import org.elasticsearch.common.io.stream.StreamOutput;
1516
import org.elasticsearch.common.io.stream.Writeable;
1617
import org.elasticsearch.index.mapper.MapperParsingException;
1718
import org.elasticsearch.index.query.QueryBuilder;
19+
import org.elasticsearch.xcontent.ConstructingObjectParser;
20+
import org.elasticsearch.xcontent.DeprecationHandler;
21+
import org.elasticsearch.xcontent.NamedXContentRegistry;
1822
import org.elasticsearch.xcontent.ParseField;
1923
import org.elasticsearch.xcontent.ToXContentObject;
2024
import org.elasticsearch.xcontent.XContentBuilder;
2125
import org.elasticsearch.xcontent.XContentParser;
26+
import org.elasticsearch.xcontent.XContentType;
27+
import org.elasticsearch.xcontent.support.MapXContentParser;
2228

2329
import java.io.IOException;
2430
import java.util.Locale;
2531
import java.util.Map;
2632
import java.util.Objects;
2733
import java.util.Set;
2834

35+
import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg;
36+
import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg;
37+
2938
public class TokenPruningConfig implements Writeable, ToXContentObject {
3039
public static final String PRUNING_CONFIG_FIELD = "pruning_config";
3140

@@ -184,7 +193,42 @@ public static TokenPruningConfig fromXContent(XContentParser parser) throws IOEx
184193
return new TokenPruningConfig(ratioThreshold, weightThreshold, onlyScorePrunedTokens);
185194
}
186195

196+
private static final ConstructingObjectParser<TokenPruningConfig, Void> PARSER = new ConstructingObjectParser<>(
197+
PRUNING_CONFIG_FIELD,
198+
args -> new TokenPruningConfig(
199+
args[0] == null ? DEFAULT_TOKENS_FREQ_RATIO_THRESHOLD : (Float)args[0],
200+
args[1] == null ? DEFAULT_TOKENS_WEIGHT_THRESHOLD : (Float)args[1],
201+
args[2] != null && (Boolean) args[2]
202+
)
203+
);
204+
205+
static {
206+
PARSER.declareFloatOrNull(optionalConstructorArg(), DEFAULT_TOKENS_FREQ_RATIO_THRESHOLD, TOKENS_FREQ_RATIO_THRESHOLD);
207+
PARSER.declareFloatOrNull(optionalConstructorArg(), DEFAULT_TOKENS_WEIGHT_THRESHOLD, TOKENS_WEIGHT_THRESHOLD);
208+
PARSER.declareBooleanOrNull(optionalConstructorArg(), false, ONLY_SCORE_PRUNED_TOKENS_FIELD);
209+
}
210+
187211
public static TokenPruningConfig parseFromMap(Map<String, Object> pruningConfigMap) {
212+
if (pruningConfigMap == null) {
213+
return null;
214+
}
215+
216+
try {
217+
XContentParser parser = new MapXContentParser(NamedXContentRegistry.EMPTY,
218+
DeprecationHandler.IGNORE_DEPRECATIONS,
219+
pruningConfigMap,
220+
XContentType.JSON
221+
);
222+
223+
return PARSER.parse(parser, null);
224+
} catch (Exception exc) {
225+
if (exc.getCause() != null && exc.getCause().getClass().equals(IllegalArgumentException.class)) {
226+
throw new ElasticsearchException(exc.getCause());
227+
}
228+
throw new ElasticsearchException(exc);
229+
}
230+
231+
/*
188232
Object mappedTokensFreqRatioThreshold = pruningConfigMap.remove(TOKENS_FREQ_RATIO_THRESHOLD.getPreferredName());
189233
Object mappedTokensWeightThreshold = pruningConfigMap.remove(TOKENS_WEIGHT_THRESHOLD.getPreferredName());
190234
Object mappedOnlyScorePrunedTokens = pruningConfigMap.remove(ONLY_SCORE_PRUNED_TOKENS_FIELD.getPreferredName());
@@ -202,8 +246,10 @@ public static TokenPruningConfig parseFromMap(Map<String, Object> pruningConfigM
202246
}
203247
204248
return null;
249+
*/
205250
}
206251

252+
/*
207253
private static Float parseFloatNumberFromObject(Object numberObject, String fieldName, String exceptionDetails) {
208254
if (numberObject instanceof Integer intValue) {
209255
return (float) intValue;
@@ -276,4 +322,5 @@ private static boolean parseScorePrunedTokens(Object mappedScorePrunedTokens) {
276322
"[" + PRUNING_CONFIG_FIELD + "] field [" + ONLY_SCORE_PRUNED_TOKENS_FIELD.getPreferredName() + "] field should be true or false"
277323
);
278324
}
325+
*/
279326
}

server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java

Lines changed: 43 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -127,78 +127,28 @@ public void testDefaults() throws Exception {
127127
DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping));
128128
assertEquals(Strings.toString(fieldMapping(this::minimalMapping)), mapper.mappingSource().toString());
129129

130-
ParsedDocument doc1 = mapper.parse(source(this::writeField));
130+
checkParsedDocument(mapper);
131+
}
131132

132-
List<IndexableField> fields = doc1.rootDoc().getFields("field");
133-
assertEquals(2, fields.size());
134-
assertThat(fields.get(0), Matchers.instanceOf(XFeatureField.class));
135-
XFeatureField featureField1 = null;
136-
XFeatureField featureField2 = null;
137-
for (IndexableField field : fields) {
138-
if (field.stringValue().equals("ten")) {
139-
featureField1 = (XFeatureField) field;
140-
} else if (field.stringValue().equals("twenty")) {
141-
featureField2 = (XFeatureField) field;
142-
} else {
143-
throw new UnsupportedOperationException();
144-
}
145-
}
133+
public void testDefaultsPreIndexOptions() throws Exception {
134+
DocumentMapper mapper = getDocumentMapperPreviousVersion(fieldMapping(this::minimalMapping));
135+
assertEquals(Strings.toString(fieldMapping(this::minimalMapping)), mapper.mappingSource().toString());
146136

147-
int freq1 = getFrequency(featureField1.tokenStream(null, null));
148-
int freq2 = getFrequency(featureField2.tokenStream(null, null));
149-
assertTrue(freq1 < freq2);
137+
checkParsedDocument(mapper);
150138
}
151139

152140
public void testWithIndexOptionsPrune() throws Exception {
153141
DocumentMapper mapper = createDocumentMapper(fieldMapping(this::mappingWithIndexOptionsPrune));
154142
assertEquals(Strings.toString(fieldMapping(this::mappingWithIndexOptionsPrune)), mapper.mappingSource().toString());
155143

156-
ParsedDocument doc1 = mapper.parse(source(this::writeField));
157-
158-
List<IndexableField> fields = doc1.rootDoc().getFields("field");
159-
assertEquals(2, fields.size());
160-
assertThat(fields.get(0), Matchers.instanceOf(XFeatureField.class));
161-
XFeatureField featureField1 = null;
162-
XFeatureField featureField2 = null;
163-
for (IndexableField field : fields) {
164-
if (field.stringValue().equals("ten")) {
165-
featureField1 = (XFeatureField) field;
166-
} else if (field.stringValue().equals("twenty")) {
167-
featureField2 = (XFeatureField) field;
168-
} else {
169-
throw new UnsupportedOperationException();
170-
}
171-
}
172-
173-
int freq1 = getFrequency(featureField1.tokenStream(null, null));
174-
int freq2 = getFrequency(featureField2.tokenStream(null, null));
175-
assertTrue(freq1 < freq2);
144+
checkParsedDocument(mapper);
176145
}
177146

178147
public void testWithIndexOptionsPruningConfigOnly() throws Exception {
179148
DocumentMapper mapper = createDocumentMapper(fieldMapping(this::mappingWithIndexOptionsPruningConfig));
180149
assertEquals(Strings.toString(fieldMapping(this::mappingWithIndexOptionsPruningConfig)), mapper.mappingSource().toString());
181150

182-
ParsedDocument doc1 = mapper.parse(source(this::writeField));
183-
184-
List<IndexableField> fields = doc1.rootDoc().getFields("field");
185-
assertEquals(2, fields.size());
186-
assertThat(fields.get(0), Matchers.instanceOf(XFeatureField.class));
187-
XFeatureField featureField1 = null;
188-
XFeatureField featureField2 = null;
189-
for (IndexableField field : fields) {
190-
if (field.stringValue().equals("ten")) {
191-
featureField1 = (XFeatureField) field;
192-
} else if (field.stringValue().equals("twenty")) {
193-
featureField2 = (XFeatureField) field;
194-
} else {
195-
throw new UnsupportedOperationException();
196-
}
197-
}
198-
199-
int freq1 = getFrequency(featureField1.tokenStream(null, null));
200-
int freq2 = getFrequency(featureField2.tokenStream(null, null));
201-
assertTrue(freq1 < freq2);
151+
checkParsedDocument(mapper);
202152
}
203153

204154
public void testDotInFieldName() throws Exception {
@@ -349,7 +299,7 @@ public void testTokensFreqRatioCorrect() {
349299
assertThat(
350300
eTestInteger.getMessage(),
351301
containsString(
352-
"Failed to parse mapping: [pruning_config] field [tokens_freq_ratio_threshold]field should be a number between 1 and 100"
302+
"Failed to parse mapping: org.elasticsearch.xcontent.XContentParseException: [0:0] [pruning_config] failed to parse field [tokens_freq_ratio_threshold]"
353303
)
354304
);
355305

@@ -364,7 +314,7 @@ public void testTokensFreqRatioCorrect() {
364314
})));
365315
assertThat(
366316
eTestRangeLower.getMessage(),
367-
containsString("[pruning_config] field [tokens_freq_ratio_threshold] field should be a number between 1 and 100")
317+
containsString("Failed to parse mapping: java.lang.IllegalArgumentException: [tokens_freq_ratio_threshold] must be between [1] and [100], got -2.0")
368318
);
369319

370320
Exception eTestRangeHigher = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
@@ -378,7 +328,7 @@ public void testTokensFreqRatioCorrect() {
378328
})));
379329
assertThat(
380330
eTestRangeHigher.getMessage(),
381-
containsString("[pruning_config] field [tokens_freq_ratio_threshold] field should be a number between 1 and 100")
331+
containsString("Failed to parse mapping: java.lang.IllegalArgumentException: [tokens_freq_ratio_threshold] must be between [1] and [100], got 101")
382332
);
383333
}
384334

@@ -395,7 +345,7 @@ public void testTokensWeightThresholdCorrect() {
395345
assertThat(
396346
eTestDouble.getMessage(),
397347
containsString(
398-
"Failed to parse mapping: [pruning_config] field [tokens_weight_threshold]field should be a number between 0.0 and 1.0"
348+
"Failed to parse mapping: org.elasticsearch.xcontent.XContentParseException: [0:0] [pruning_config] failed to parse field [tokens_weight_threshold]"
399349
)
400350
);
401351

@@ -410,7 +360,7 @@ public void testTokensWeightThresholdCorrect() {
410360
})));
411361
assertThat(
412362
eTestRangeLower.getMessage(),
413-
containsString("[pruning_config] field [tokens_weight_threshold] field should be a number between 0.0 and 1.0")
363+
containsString("Failed to parse mapping: java.lang.IllegalArgumentException: [tokens_weight_threshold] must be between 0 and 1")
414364
);
415365

416366
Exception eTestRangeHigher = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
@@ -424,7 +374,7 @@ public void testTokensWeightThresholdCorrect() {
424374
})));
425375
assertThat(
426376
eTestRangeHigher.getMessage(),
427-
containsString("[pruning_config] field [tokens_weight_threshold] field should be a number between 0.0 and 1.0")
377+
containsString("Failed to parse mapping: java.lang.IllegalArgumentException: [tokens_weight_threshold] must be between 0 and 1")
428378
);
429379
}
430380

@@ -559,4 +509,33 @@ private Map<String, Float> toFloats(Map<String, ?> value) {
559509
}
560510
return result;
561511
}
512+
513+
private void checkParsedDocument(DocumentMapper mapper) throws IOException {
514+
ParsedDocument doc1 = mapper.parse(source(this::writeField));
515+
516+
List<IndexableField> fields = doc1.rootDoc().getFields("field");
517+
assertEquals(2, fields.size());
518+
assertThat(fields.get(0), Matchers.instanceOf(XFeatureField.class));
519+
XFeatureField featureField1 = null;
520+
XFeatureField featureField2 = null;
521+
for (IndexableField field : fields) {
522+
if (field.stringValue().equals("ten")) {
523+
featureField1 = (XFeatureField) field;
524+
} else if (field.stringValue().equals("twenty")) {
525+
featureField2 = (XFeatureField) field;
526+
} else {
527+
throw new UnsupportedOperationException();
528+
}
529+
}
530+
531+
int freq1 = getFrequency(featureField1.tokenStream(null, null));
532+
int freq2 = getFrequency(featureField2.tokenStream(null, null));
533+
assertTrue(freq1 < freq2);
534+
}
535+
536+
private final IndexVersion PRE_SPARSE_VECTOR_INDEX_OPTIONS_VERSION = IndexVersions.DEFAULT_TO_ACORN_HNSW_FILTER_HEURISTIC;
537+
538+
private DocumentMapper getDocumentMapperPreviousVersion(XContentBuilder mappings) throws IOException {
539+
return createMapperService(PRE_SPARSE_VECTOR_INDEX_OPTIONS_VERSION, mappings).documentMapper();
540+
}
562541
}

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,8 +165,8 @@ public String getQuery() {
165165
return query;
166166
}
167167

168-
public boolean shouldPruneTokens() {
169-
return shouldPruneTokens != null ? shouldPruneTokens : DEFAULT_PRUNE;
168+
public Boolean shouldPruneTokens() {
169+
return shouldPruneTokens;
170170
}
171171

172172
public TokenPruningConfig getTokenPruningConfig() {
@@ -481,10 +481,15 @@ private TokenPruningSet setPruningConfigFromIndexIfNeeded(
481481
boolean doPruneTokens = false;
482482
TokenPruningConfig setTokenPruningConfig = queryPruningConfig;
483483
if (queryPruneTokens == null || queryPruningConfig == null) {
484+
if (queryPruneTokens != null) {
485+
doPruneTokens = queryPruneTokens;
486+
}
487+
484488
IndexFieldPruningSettings indexPruningSettings = getIndexFieldPruningSettings(fieldMapper);
485489
if (shouldPruneTokens == null && indexPruningSettings.prune != null && indexPruningSettings.prune) {
486490
doPruneTokens = true;
487491
}
492+
488493
if (setTokenPruningConfig == null && indexPruningSettings.pruningConfig != null) {
489494
setTokenPruningConfig = indexPruningSettings.pruningConfig;
490495
}

0 commit comments

Comments
 (0)