Skip to content

Commit 4be9290

Browse files
committed
Initial checkin of refactored index_options code
1 parent ffa423d commit 4be9290

File tree

5 files changed

+268
-16
lines changed

5 files changed

+268
-16
lines changed

server/src/main/java/org/elasticsearch/TransportVersions.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ static TransportVersion def(int id) {
191191
public static final TransportVersion ILM_ADD_SKIP_SETTING_8_19 = def(8_841_0_43);
192192
public static final TransportVersion ESQL_REGEX_MATCH_WITH_CASE_INSENSITIVITY_8_19 = def(8_841_0_44);
193193
public static final TransportVersion ESQL_QUERY_PLANNING_DURATION_8_19 = def(8_841_0_45);
194+
public static final TransportVersion SPARSE_VECTOR_FIELD_PRUNING_OPTIONS_8_19 = def(8_842_0_45);
194195
public static final TransportVersion V_9_0_0 = def(9_000_0_09);
195196
public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_1 = def(9_000_0_10);
196197
public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_2 = def(9_000_0_11);
@@ -286,6 +287,7 @@ static TransportVersion def(int id) {
286287
public static final TransportVersion ILM_ADD_SKIP_SETTING = def(9_089_0_00);
287288
public static final TransportVersion ML_INFERENCE_MISTRAL_CHAT_COMPLETION_ADDED = def(9_090_0_00);
288289
public static final TransportVersion IDP_CUSTOM_SAML_ATTRIBUTES_ALLOW_LIST = def(9_091_0_00);
290+
public static final TransportVersion SPARSE_VECTOR_FIELD_PRUNING_OPTIONS = def(9_092_0_00);
289291

290292
/*
291293
* STOP! READ THIS FIRST! No, really,

server/src/main/java/org/elasticsearch/index/IndexVersions.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ private static Version parseUnchecked(String version) {
142142
public static final IndexVersion DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ_BACKPORT_8_X = def(8_530_0_00, Version.LUCENE_9_12_1);
143143
public static final IndexVersion SEMANTIC_TEXT_DEFAULTS_TO_BBQ_BACKPORT_8_X = def(8_531_0_00, Version.LUCENE_9_12_1);
144144
public static final IndexVersion INDEX_INT_SORT_INT_TYPE_8_19 = def(8_532_0_00, Version.LUCENE_9_12_1);
145+
public static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT_BACKPORT_8_X = def(8_533_0_00, Version.LUCENE_9_12_1);
145146
public static final IndexVersion UPGRADE_TO_LUCENE_10_0_0 = def(9_000_0_00, Version.LUCENE_10_0_0);
146147
public static final IndexVersion LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT = def(9_001_0_00, Version.LUCENE_10_0_0);
147148
public static final IndexVersion TIME_BASED_K_ORDERED_DOC_ID = def(9_002_0_00, Version.LUCENE_10_0_0);
@@ -171,6 +172,7 @@ private static Version parseUnchecked(String version) {
171172
public static final IndexVersion DEFAULT_TO_ACORN_HNSW_FILTER_HEURISTIC = def(9_026_0_00, Version.LUCENE_10_2_1);
172173
public static final IndexVersion SEQ_NO_WITHOUT_POINTS = def(9_027_0_00, Version.LUCENE_10_2_1);
173174
public static final IndexVersion INDEX_INT_SORT_INT_TYPE = def(9_028_0_00, Version.LUCENE_10_2_1);
175+
public static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT = def(9_029_0_00, Version.LUCENE_10_2_1);
174176

175177
/*
176178
* STOP! READ THIS FIRST! No, really,

server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java

Lines changed: 186 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
import org.apache.lucene.util.BytesRef;
2323
import org.elasticsearch.common.logging.DeprecationCategory;
2424
import org.elasticsearch.common.lucene.Lucene;
25+
import org.elasticsearch.common.xcontent.support.XContentMapValues;
26+
import org.elasticsearch.core.Nullable;
27+
import org.elasticsearch.features.NodeFeature;
2528
import org.elasticsearch.index.IndexVersion;
2629
import org.elasticsearch.index.IndexVersions;
2730
import org.elasticsearch.index.analysis.NamedAnalyzer;
@@ -31,6 +34,7 @@
3134
import org.elasticsearch.index.mapper.FieldMapper;
3235
import org.elasticsearch.index.mapper.MappedFieldType;
3336
import org.elasticsearch.index.mapper.MapperBuilderContext;
37+
import org.elasticsearch.index.mapper.MappingParserContext;
3438
import org.elasticsearch.index.mapper.SourceLoader;
3539
import org.elasticsearch.index.mapper.SourceValueFetcher;
3640
import org.elasticsearch.index.mapper.TextSearchInfo;
@@ -40,17 +44,27 @@
4044
import org.elasticsearch.inference.WeightedTokensUtils;
4145
import org.elasticsearch.search.fetch.StoredFieldsSpec;
4246
import org.elasticsearch.search.lookup.Source;
47+
import org.elasticsearch.xcontent.ConstructingObjectParser;
48+
import org.elasticsearch.xcontent.DeprecationHandler;
49+
import org.elasticsearch.xcontent.NamedXContentRegistry;
50+
import org.elasticsearch.xcontent.ParseField;
51+
import org.elasticsearch.xcontent.ToXContent;
4352
import org.elasticsearch.xcontent.XContentBuilder;
53+
import org.elasticsearch.xcontent.XContentParser;
4454
import org.elasticsearch.xcontent.XContentParser.Token;
55+
import org.elasticsearch.xcontent.XContentType;
56+
import org.elasticsearch.xcontent.support.MapXContentParser;
4557

4658
import java.io.IOException;
4759
import java.io.UncheckedIOException;
4860
import java.util.LinkedHashMap;
4961
import java.util.List;
5062
import java.util.Map;
63+
import java.util.Objects;
5164
import java.util.stream.Stream;
5265

5366
import static org.elasticsearch.index.query.AbstractQueryBuilder.DEFAULT_BOOST;
67+
import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg;
5468

5569
/**
5670
* A {@link FieldMapper} that exposes Lucene's {@link FeatureField} as a sparse
@@ -59,6 +73,7 @@
5973
public class SparseVectorFieldMapper extends FieldMapper {
6074

6175
public static final String CONTENT_TYPE = "sparse_vector";
76+
public static final String SPARSE_VECTOR_INDEX_OPTIONS = "index_options";
6277

6378
static final String ERROR_MESSAGE_7X = "[sparse_vector] field type in old 7.x indices is allowed to "
6479
+ "contain [sparse_vector] fields, but they cannot be indexed or searched.";
@@ -67,6 +82,10 @@ public class SparseVectorFieldMapper extends FieldMapper {
6782

6883
static final IndexVersion NEW_SPARSE_VECTOR_INDEX_VERSION = IndexVersions.NEW_SPARSE_VECTOR;
6984
static final IndexVersion SPARSE_VECTOR_IN_FIELD_NAMES_INDEX_VERSION = IndexVersions.SPARSE_VECTOR_IN_FIELD_NAMES_SUPPORT;
85+
static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION = IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT;
86+
static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X = IndexVersions.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT_BACKPORT_8_X;
87+
88+
public static final NodeFeature SPARSE_VECTOR_INDEX_OPTIONS_FEATURE = new NodeFeature("sparse_vector.index_options_supported");
7089

7190
private static SparseVectorFieldMapper toType(FieldMapper in) {
7291
return (SparseVectorFieldMapper) in;
@@ -75,6 +94,15 @@ private static SparseVectorFieldMapper toType(FieldMapper in) {
7594
public static class Builder extends FieldMapper.Builder {
7695
private final Parameter<Boolean> stored = Parameter.storeParam(m -> toType(m).fieldType().isStored(), false);
7796
private final Parameter<Map<String, String>> meta = Parameter.metaParam();
97+
private final Parameter<IndexOptions> indexOptions = new Parameter<>(
98+
SPARSE_VECTOR_INDEX_OPTIONS,
99+
true,
100+
() -> null,
101+
(n, c, o) -> parseIndexOptions(c, o),
102+
m -> toType(m).fieldType().indexOptions,
103+
XContentBuilder::field,
104+
Objects::toString
105+
).acceptsNull();
78106

79107
public Builder(String name) {
80108
super(name);
@@ -87,19 +115,54 @@ public Builder setStored(boolean value) {
87115

88116
@Override
89117
protected Parameter<?>[] getParameters() {
90-
return new Parameter<?>[] { stored, meta };
118+
return new Parameter<?>[] { stored, meta, indexOptions };
91119
}
92120

93121
@Override
94122
public SparseVectorFieldMapper build(MapperBuilderContext context) {
95123
return new SparseVectorFieldMapper(
96124
leafName(),
97-
new SparseVectorFieldType(context.buildFullName(leafName()), stored.getValue(), meta.getValue()),
125+
new SparseVectorFieldType(context.buildFullName(leafName()), stored.getValue(), meta.getValue(), indexOptions.getValue()),
98126
builderParams(this, context)
99127
);
100128
}
101129
}
102130

131+
public IndexOptions getIndexOptions() {
132+
return fieldType().getIndexOptions();
133+
}
134+
135+
private static final ConstructingObjectParser<IndexOptions, Void> INDEX_OPTIONS_PARSER = new ConstructingObjectParser<>(
136+
SPARSE_VECTOR_INDEX_OPTIONS,
137+
args -> new IndexOptions((Boolean) args[0], (TokenPruningConfig) args[1])
138+
);
139+
140+
static {
141+
INDEX_OPTIONS_PARSER.declareBoolean(optionalConstructorArg(), IndexOptions.PRUNE_FIELD_NAME);
142+
INDEX_OPTIONS_PARSER.declareObject(optionalConstructorArg(), TokenPruningConfig.PARSER, IndexOptions.PRUNING_CONFIG_FIELD_NAME);
143+
}
144+
145+
private static SparseVectorFieldMapper.IndexOptions parseIndexOptions(MappingParserContext context, Object propNode) {
146+
if (propNode == null) {
147+
return null;
148+
}
149+
150+
Map<String, Object> indexOptionsMap = XContentMapValues.nodeMapValue(propNode, SPARSE_VECTOR_INDEX_OPTIONS);
151+
152+
XContentParser parser = new MapXContentParser(
153+
NamedXContentRegistry.EMPTY,
154+
DeprecationHandler.IGNORE_DEPRECATIONS,
155+
indexOptionsMap,
156+
XContentType.JSON
157+
);
158+
159+
try {
160+
return INDEX_OPTIONS_PARSER.parse(parser, null);
161+
} catch (IOException e) {
162+
throw new UncheckedIOException(e);
163+
}
164+
}
165+
103166
public static final TypeParser PARSER = new TypeParser((n, c) -> {
104167
if (c.indexVersionCreated().before(PREVIOUS_SPARSE_VECTOR_INDEX_VERSION)) {
105168
deprecationLogger.warn(DeprecationCategory.MAPPINGS, "sparse_vector", ERROR_MESSAGE_7X);
@@ -111,9 +174,19 @@ public SparseVectorFieldMapper build(MapperBuilderContext context) {
111174
}, notInMultiFields(CONTENT_TYPE));
112175

113176
public static final class SparseVectorFieldType extends MappedFieldType {
177+
private final IndexOptions indexOptions;
114178

115179
public SparseVectorFieldType(String name, boolean isStored, Map<String, String> meta) {
180+
this(name, isStored, meta, null);
181+
}
182+
183+
public SparseVectorFieldType(String name, boolean isStored, Map<String, String> meta, @Nullable SparseVectorFieldMapper.IndexOptions indexOptions) {
116184
super(name, true, isStored, false, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
185+
this.indexOptions = indexOptions;
186+
}
187+
188+
public IndexOptions getIndexOptions() {
189+
return indexOptions;
117190
}
118191

119192
@Override
@@ -155,14 +228,54 @@ public Query finalizeSparseVectorQuery(
155228
SearchExecutionContext context,
156229
String fieldName,
157230
List<WeightedToken> queryVectors,
158-
boolean shouldPruneTokens,
159-
TokenPruningConfig tokenPruningConfig
231+
Boolean shouldPruneTokensFromQuery,
232+
TokenPruningConfig tokenPruningConfigFromQuery
160233
) throws IOException {
161-
return (shouldPruneTokens)
162-
? WeightedTokensUtils.queryBuilderWithPrunedTokens(fieldName, tokenPruningConfig, queryVectors, this, context)
234+
TokenPruningConfig pruningConfig = null;
235+
236+
if (shouldPruneTokensFromQuery != null) {
237+
// if this is not null, the query is overriding the index config
238+
pruningConfig = shouldPruneTokensFromQuery ? tokenPruningConfigFromQuery : null;
239+
} else {
240+
// check and see if we explicitly do not prune in the index_options
241+
boolean explicitlyDoNotPrune = this.indexOptions != null
242+
&& this.indexOptions.prune != null
243+
&& this.indexOptions.prune == false;
244+
245+
if (explicitlyDoNotPrune == false) {
246+
// get the explicit pruning config from the index_options if available
247+
pruningConfig = this.indexOptions != null ? this.indexOptions.pruningConfig : null;
248+
249+
// if we're still null, set the default based on the index version
250+
// newer index versions default to true, while older is false
251+
pruningConfig = pruningConfig == null ? getDefaultPruningConfig(context) : pruningConfig;
252+
}
253+
}
254+
255+
return (pruningConfig != null)
256+
? WeightedTokensUtils.queryBuilderWithPrunedTokens(fieldName, pruningConfig, queryVectors, this, context)
163257
: WeightedTokensUtils.queryBuilderWithAllTokens(fieldName, queryVectors, this, context);
164258
}
165259

260+
private TokenPruningConfig getDefaultPruningConfig(SearchExecutionContext context) {
261+
IndexVersion indexVersion = context.indexVersionCreated();
262+
263+
if (indexVersion.after(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION)) {
264+
// default pruning for 9.1.0+ is true for this index
265+
return new TokenPruningConfig();
266+
}
267+
268+
if (indexVersion.between(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X, IndexVersions.UPGRADE_TO_LUCENE_10_0_0))
269+
{
270+
// default pruning for 8.19.0+ is true for this index
271+
return new TokenPruningConfig();
272+
}
273+
274+
// the index version is before we added index_options support
275+
// so pruning is off by default
276+
return null;
277+
}
278+
166279
private static String indexedValueForSearch(Object value) {
167280
if (value instanceof BytesRef) {
168281
return ((BytesRef) value).utf8ToString();
@@ -378,4 +491,71 @@ public void reset() {
378491
}
379492
}
380493

494+
public static class IndexOptions implements ToXContent {
495+
public static final ParseField PRUNE_FIELD_NAME = new ParseField("prune");
496+
public static final ParseField PRUNING_CONFIG_FIELD_NAME = new ParseField("pruning_config");
497+
498+
final Boolean prune;
499+
final TokenPruningConfig pruningConfig;
500+
501+
IndexOptions(@Nullable Boolean prune, @Nullable TokenPruningConfig pruningConfig) {
502+
if (pruningConfig != null && (prune == null || prune == false)) {
503+
throw new IllegalArgumentException(
504+
"["
505+
+ SPARSE_VECTOR_INDEX_OPTIONS
506+
+ "] field ["
507+
+ PRUNING_CONFIG_FIELD_NAME.getPreferredName()
508+
+ "] should only be set if ["
509+
+ PRUNE_FIELD_NAME.getPreferredName()
510+
+ "] is set to true"
511+
);
512+
}
513+
514+
this.prune = prune;
515+
this.pruningConfig = pruningConfig;
516+
}
517+
518+
public Boolean getPrune() {
519+
return prune;
520+
}
521+
522+
public TokenPruningConfig getPruningConfig() {
523+
return pruningConfig;
524+
}
525+
526+
@Override
527+
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
528+
builder.startObject();
529+
530+
if (prune != null) {
531+
builder.field(PRUNE_FIELD_NAME.getPreferredName(), prune);
532+
}
533+
if (pruningConfig != null) {
534+
builder.field(PRUNING_CONFIG_FIELD_NAME.getPreferredName(), pruningConfig);
535+
}
536+
537+
builder.endObject();
538+
return builder;
539+
}
540+
541+
@Override
542+
public final boolean equals(Object other) {
543+
if (other == this) {
544+
return true;
545+
}
546+
547+
if (other == null || getClass() != other.getClass()) {
548+
return false;
549+
}
550+
551+
IndexOptions otherAsIndexOptions = (IndexOptions) other;
552+
return Objects.equals(prune, otherAsIndexOptions.prune) && Objects.equals(pruningConfig, otherAsIndexOptions.pruningConfig);
553+
}
554+
555+
@Override
556+
public final int hashCode() {
557+
return Objects.hash(prune, pruningConfig);
558+
}
559+
}
560+
381561
}

server/src/main/java/org/elasticsearch/index/mapper/vectors/TokenPruningConfig.java

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,25 @@
1414
import org.elasticsearch.common.io.stream.StreamOutput;
1515
import org.elasticsearch.common.io.stream.Writeable;
1616
import org.elasticsearch.index.query.QueryBuilder;
17+
import org.elasticsearch.xcontent.ConstructingObjectParser;
18+
import org.elasticsearch.xcontent.DeprecationHandler;
19+
import org.elasticsearch.xcontent.NamedXContentRegistry;
1720
import org.elasticsearch.xcontent.ParseField;
1821
import org.elasticsearch.xcontent.ToXContentObject;
1922
import org.elasticsearch.xcontent.XContentBuilder;
2023
import org.elasticsearch.xcontent.XContentParser;
24+
import org.elasticsearch.xcontent.XContentType;
25+
import org.elasticsearch.xcontent.support.MapXContentParser;
2126

2227
import java.io.IOException;
28+
import java.io.UncheckedIOException;
2329
import java.util.Locale;
30+
import java.util.Map;
2431
import java.util.Objects;
2532
import java.util.Set;
2633

34+
import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg;
35+
2736
public class TokenPruningConfig implements Writeable, ToXContentObject {
2837
public static final String PRUNING_CONFIG_FIELD = "pruning_config";
2938
public static final ParseField TOKENS_FREQ_RATIO_THRESHOLD = new ParseField("tokens_freq_ratio_threshold");
@@ -176,4 +185,38 @@ public static TokenPruningConfig fromXContent(XContentParser parser) throws IOEx
176185
}
177186
return new TokenPruningConfig(ratioThreshold, weightThreshold, onlyScorePrunedTokens);
178187
}
188+
189+
public static final ConstructingObjectParser<TokenPruningConfig, Void> PARSER = new ConstructingObjectParser<>(
190+
PRUNING_CONFIG_FIELD,
191+
args -> new TokenPruningConfig(
192+
args[0] == null ? DEFAULT_TOKENS_FREQ_RATIO_THRESHOLD : (Float) args[0],
193+
args[1] == null ? DEFAULT_TOKENS_WEIGHT_THRESHOLD : (Float) args[1],
194+
args[2] != null && (Boolean) args[2]
195+
)
196+
);
197+
198+
static {
199+
PARSER.declareFloat(optionalConstructorArg(), TOKENS_FREQ_RATIO_THRESHOLD);
200+
PARSER.declareFloat(optionalConstructorArg(), TOKENS_WEIGHT_THRESHOLD);
201+
PARSER.declareBoolean(optionalConstructorArg(), ONLY_SCORE_PRUNED_TOKENS_FIELD);
202+
}
203+
204+
public static TokenPruningConfig parseFromMap(Map<String, Object> pruningConfigMap) {
205+
if (pruningConfigMap == null) {
206+
return null;
207+
}
208+
209+
try {
210+
XContentParser parser = new MapXContentParser(
211+
NamedXContentRegistry.EMPTY,
212+
DeprecationHandler.IGNORE_DEPRECATIONS,
213+
pruningConfigMap,
214+
XContentType.JSON
215+
);
216+
217+
return PARSER.parse(parser, null);
218+
} catch (IOException ioEx) {
219+
throw new UncheckedIOException(ioEx);
220+
}
221+
}
179222
}

0 commit comments

Comments
 (0)