Skip to content

Commit e02cd3a

Browse files
committed
Initial checkin - needs tests
1 parent 0360db2 commit e02cd3a

File tree

5 files changed

+426
-18
lines changed

5 files changed

+426
-18
lines changed

server/src/main/java/org/elasticsearch/index/IndexVersions.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ private static Version parseUnchecked(String version) {
159159
public static final IndexVersion SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_UNSIGNED_LONG = def(9_019_0_00, Version.LUCENE_10_1_0);
160160
public static final IndexVersion SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_SCALED_FLOAT = def(9_020_0_00, Version.LUCENE_10_1_0);
161161
public static final IndexVersion USE_LUCENE101_POSTINGS_FORMAT = def(9_021_0_00, Version.LUCENE_10_1_0);
162+
public static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT = def(9_022_0_00, Version.LUCENE_10_1_0);
162163
/*
163164
* STOP! READ THIS FIRST! No, really,
164165
* ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _

server/src/main/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapper.java

Lines changed: 254 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.apache.lucene.util.BytesRef;
2323
import org.elasticsearch.common.logging.DeprecationCategory;
2424
import org.elasticsearch.common.lucene.Lucene;
25+
import org.elasticsearch.core.Nullable;
2526
import org.elasticsearch.index.IndexVersion;
2627
import org.elasticsearch.index.IndexVersions;
2728
import org.elasticsearch.index.analysis.NamedAnalyzer;
@@ -31,13 +32,16 @@
3132
import org.elasticsearch.index.mapper.FieldMapper;
3233
import org.elasticsearch.index.mapper.MappedFieldType;
3334
import org.elasticsearch.index.mapper.MapperBuilderContext;
35+
import org.elasticsearch.index.mapper.MapperParsingException;
36+
import org.elasticsearch.index.mapper.MappingParserContext;
3437
import org.elasticsearch.index.mapper.SourceLoader;
3538
import org.elasticsearch.index.mapper.SourceValueFetcher;
3639
import org.elasticsearch.index.mapper.TextSearchInfo;
3740
import org.elasticsearch.index.mapper.ValueFetcher;
3841
import org.elasticsearch.index.query.SearchExecutionContext;
3942
import org.elasticsearch.search.fetch.StoredFieldsSpec;
4043
import org.elasticsearch.search.lookup.Source;
44+
import org.elasticsearch.xcontent.ToXContent;
4145
import org.elasticsearch.xcontent.XContentBuilder;
4246
import org.elasticsearch.xcontent.XContentParser.Token;
4347

@@ -46,6 +50,7 @@
4650
import java.util.LinkedHashMap;
4751
import java.util.List;
4852
import java.util.Map;
53+
import java.util.Objects;
4954
import java.util.stream.Stream;
5055

5156
import static org.elasticsearch.index.query.AbstractQueryBuilder.DEFAULT_BOOST;
@@ -65,6 +70,9 @@ public class SparseVectorFieldMapper extends FieldMapper {
6570

6671
static final IndexVersion NEW_SPARSE_VECTOR_INDEX_VERSION = IndexVersions.NEW_SPARSE_VECTOR;
6772
static final IndexVersion SPARSE_VECTOR_IN_FIELD_NAMES_INDEX_VERSION = IndexVersions.SPARSE_VECTOR_IN_FIELD_NAMES_SUPPORT;
73+
static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION = IndexVersion.SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT;
74+
75+
private final SparseVectorFieldMapper.IndexOptions indexOptions;
6876

6977
private static SparseVectorFieldMapper toType(FieldMapper in) {
7078
return (SparseVectorFieldMapper) in;
@@ -73,9 +81,23 @@ private static SparseVectorFieldMapper toType(FieldMapper in) {
7381
public static class Builder extends FieldMapper.Builder {
7482
private final Parameter<Boolean> stored = Parameter.storeParam(m -> toType(m).fieldType().isStored(), false);
7583
private final Parameter<Map<String, String>> meta = Parameter.metaParam();
84+
private final Parameter<IndexOptions> indexOptions;
7685

7786
public Builder(String name) {
7887
super(name);
88+
this.indexOptions = new Parameter<>(
89+
"index_options",
90+
true,
91+
() -> null,
92+
(n, c, o) -> o == null ? null : parseIndexOptions(n, c, o),
93+
m -> toType(m).fieldType().indexOptions,
94+
(b, n, v) -> {
95+
if (v != null) {
96+
b.field(n, v);
97+
}
98+
},
99+
Objects::toString
100+
);
79101
}
80102

81103
public Builder setStored(boolean value) {
@@ -85,17 +107,113 @@ public Builder setStored(boolean value) {
85107

86108
@Override
87109
protected Parameter<?>[] getParameters() {
88-
return new Parameter<?>[] { stored, meta };
110+
return new Parameter<?>[] { stored, meta, indexOptions };
89111
}
90112

91113
@Override
92114
public SparseVectorFieldMapper build(MapperBuilderContext context) {
93115
return new SparseVectorFieldMapper(
94116
leafName(),
95-
new SparseVectorFieldType(context.buildFullName(leafName()), stored.getValue(), meta.getValue()),
96-
builderParams(this, context)
117+
new SparseVectorFieldType(context.buildFullName(leafName()), stored.getValue(), meta.getValue(), indexOptions.getValue()),
118+
builderParams(this, context),
119+
indexOptions.getValue()
120+
);
121+
}
122+
}
123+
124+
public IndexOptions getIndexOptions() {
125+
return this.indexOptions;
126+
}
127+
128+
private static SparseVectorFieldMapper.IndexOptions parseIndexOptions(String fieldName, MappingParserContext context, Object propNode) {
129+
@SuppressWarnings("unchecked")
130+
Map<String, ?> indexOptionsMap = (Map<String, ?>) propNode;
131+
132+
boolean hasOneOption = false;
133+
Boolean prune = null;
134+
PruningConfig pruningConfig = null;
135+
136+
Object shouldPrune = indexOptionsMap.remove(IndexOptions.PRUNE_FIELD_NAME);
137+
if (shouldPrune != null) {
138+
if ((shouldPrune instanceof Boolean) == false) {
139+
throw new MapperParsingException("[index_options] field [prune] should be true or false");
140+
}
141+
hasOneOption = true;
142+
prune = ((Boolean) shouldPrune);
143+
}
144+
145+
Object hasPruningConfiguration = indexOptionsMap.remove(IndexOptions.PRUNING_CONFIG_FIELD_NAME);
146+
if (hasPruningConfiguration != null) {
147+
if ((hasPruningConfiguration instanceof Map) == false) {
148+
throw new MapperParsingException("[index_options] field [pruning_config] should be a map");
149+
}
150+
151+
Integer tokensFreqRatioThreshold = null;
152+
Double tokensWeightThreshold = null;
153+
154+
@SuppressWarnings("unchecked")
155+
Map<String, ?> pruningConfigMap = (Map<String, ?>) hasPruningConfiguration;
156+
Object hasTokensFreqRatioThreshold = pruningConfigMap.remove(PruningConfig.TOKENS_FREQ_RATIO_THRESHOLD_FIELD_NAME);
157+
Object hasTokensWeightThreshold = pruningConfigMap.remove(PruningConfig.TOKENS_WEIGHT_THRESHOLD_FIELD_NAME);
158+
159+
if (pruningConfigMap.isEmpty() == false) {
160+
throw new MapperParsingException("[index_options] field [pruning_config] has unknown fields");
161+
}
162+
163+
if (hasTokensFreqRatioThreshold != null) {
164+
if ((hasTokensFreqRatioThreshold instanceof Integer) == false) {
165+
throw new MapperParsingException(
166+
"[pruning_config] field [tokens_freq_ratio_threshold] field should be an integer between 1 and 100"
167+
);
168+
}
169+
tokensFreqRatioThreshold = (Integer) hasTokensFreqRatioThreshold;
170+
if (tokensFreqRatioThreshold < PruningConfig.MIN_TOKENS_FREQ_RATIO_THRESHOLD
171+
|| tokensFreqRatioThreshold > PruningConfig.MAX_TOKENS_FREQ_RATIO_THRESHOLD) {
172+
throw new MapperParsingException(
173+
"[pruning_config] field [tokens_freq_ratio_threshold] field should be an integer between 1 and 100"
174+
);
175+
}
176+
}
177+
178+
if (hasTokensWeightThreshold != null) {
179+
if ((hasTokensWeightThreshold instanceof Double) == false) {
180+
throw new MapperParsingException(
181+
"[pruning_config] field [tokens_weight_threshold] field should be an number between 0.0 and 1.0"
182+
);
183+
}
184+
tokensWeightThreshold = (Double) hasTokensWeightThreshold;
185+
if (tokensWeightThreshold < PruningConfig.MIN_TOKENS_WEIGHT_THRESHOLD
186+
|| tokensWeightThreshold > PruningConfig.MAX_TOKENS_WEIGHT_THRESHOLD) {
187+
throw new MapperParsingException(
188+
"[pruning_config] field [tokens_weight_threshold] field should be an number between 0.0 and 1.0"
189+
);
190+
}
191+
}
192+
193+
if (tokensFreqRatioThreshold != null || tokensWeightThreshold != null) {
194+
pruningConfig = new PruningConfig(tokensFreqRatioThreshold, tokensWeightThreshold);
195+
hasOneOption = true;
196+
}
197+
}
198+
199+
if (hasOneOption == false) {
200+
if (context.indexVersionCreated().before(SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION)) {
201+
// don't set defaults if this index was created before
202+
// we added this functionality in, so it will
203+
// not change current index behaviour
204+
return null;
205+
}
206+
207+
// index options are not set - for new indices, we
208+
// need to set pruning to true by default
209+
// with a default pruning configuration
210+
return new IndexOptions(
211+
true,
212+
new PruningConfig(PruningConfig.DEFAULT_TOKENS_FREQ_RATIO_THRESHOLD, PruningConfig.DEFAULT_TOKENS_WEIGHT_THRESHOLD)
97213
);
98214
}
215+
216+
return new SparseVectorFieldMapper.IndexOptions(prune, pruningConfig);
99217
}
100218

101219
public static final TypeParser PARSER = new TypeParser((n, c) -> {
@@ -109,9 +227,21 @@ public SparseVectorFieldMapper build(MapperBuilderContext context) {
109227
}, notInMultiFields(CONTENT_TYPE));
110228

111229
public static final class SparseVectorFieldType extends MappedFieldType {
230+
private final IndexOptions indexOptions;
112231

113232
public SparseVectorFieldType(String name, boolean isStored, Map<String, String> meta) {
114233
super(name, true, isStored, false, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
234+
this.indexOptions = null;
235+
}
236+
237+
public SparseVectorFieldType(
238+
String name,
239+
boolean isStored,
240+
Map<String, String> meta,
241+
@Nullable SparseVectorFieldMapper.IndexOptions indexOptions
242+
) {
243+
super(name, true, isStored, false, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
244+
this.indexOptions = indexOptions;
115245
}
116246

117247
@Override
@@ -157,8 +287,14 @@ private static String indexedValueForSearch(Object value) {
157287
}
158288
}
159289

160-
private SparseVectorFieldMapper(String simpleName, MappedFieldType mappedFieldType, BuilderParams builderParams) {
290+
private SparseVectorFieldMapper(
291+
String simpleName,
292+
MappedFieldType mappedFieldType,
293+
BuilderParams builderParams,
294+
@Nullable IndexOptions indexOptions
295+
) {
161296
super(simpleName, mappedFieldType, builderParams);
297+
this.indexOptions = indexOptions;
162298
}
163299

164300
@Override
@@ -364,4 +500,118 @@ public void reset() {
364500
}
365501
}
366502

503+
public static class IndexOptions implements ToXContent {
504+
public static final String PRUNE_FIELD_NAME = "prune";
505+
public static final String PRUNING_CONFIG_FIELD_NAME = "pruning_config";
506+
507+
final Boolean prune;
508+
final PruningConfig pruningConfig;
509+
510+
IndexOptions(@Nullable Boolean prune, @Nullable PruningConfig pruningConfig) {
511+
this.prune = prune;
512+
this.pruningConfig = pruningConfig;
513+
}
514+
515+
public Boolean getPrune() {
516+
return prune;
517+
}
518+
519+
public PruningConfig getPruningConfig() {
520+
return pruningConfig;
521+
}
522+
523+
@Override
524+
public final boolean equals(Object other) {
525+
if (other == this) {
526+
return true;
527+
}
528+
if (other instanceof IndexOptions otherOptions) {
529+
return Objects.equals(prune, otherOptions.prune) && Objects.equals(pruningConfig, otherOptions.pruningConfig);
530+
}
531+
return false;
532+
}
533+
534+
@Override
535+
public final int hashCode() {
536+
return Objects.hash(prune, pruningConfig);
537+
}
538+
539+
@Override
540+
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
541+
builder.startObject();
542+
if (prune != null) {
543+
builder.field(PRUNE_FIELD_NAME, prune);
544+
}
545+
if (pruningConfig != null) {
546+
builder.field(PRUNING_CONFIG_FIELD_NAME, pruningConfig);
547+
}
548+
builder.endObject();
549+
return builder;
550+
}
551+
}
552+
553+
public static class PruningConfig implements ToXContent {
554+
public static final String TOKENS_FREQ_RATIO_THRESHOLD_FIELD_NAME = "tokens_freq_ratio_threshold";
555+
public static final String TOKENS_WEIGHT_THRESHOLD_FIELD_NAME = "tokens_weight_threshold";
556+
557+
public static Integer DEFAULT_TOKENS_FREQ_RATIO_THRESHOLD = 5;
558+
public static Integer MIN_TOKENS_FREQ_RATIO_THRESHOLD = 1;
559+
public static Integer MAX_TOKENS_FREQ_RATIO_THRESHOLD = 100;
560+
561+
public static Double DEFAULT_TOKENS_WEIGHT_THRESHOLD = 0.4;
562+
public static Double MIN_TOKENS_WEIGHT_THRESHOLD = 0.0;
563+
public static Double MAX_TOKENS_WEIGHT_THRESHOLD = 1.0;
564+
565+
final Integer tokens_freq_ratio_threshold;
566+
final Double tokens_weight_threshold;
567+
568+
PruningConfig(@Nullable Integer tokens_freq_ratio_threshold, @Nullable Double tokens_weight_threshold) {
569+
this.tokens_freq_ratio_threshold = tokens_freq_ratio_threshold;
570+
this.tokens_weight_threshold = tokens_weight_threshold;
571+
}
572+
573+
public int getTokensFreqRatioThresholdOrDefault() {
574+
if (tokens_freq_ratio_threshold == null) {
575+
return DEFAULT_TOKENS_FREQ_RATIO_THRESHOLD;
576+
}
577+
return tokens_freq_ratio_threshold;
578+
}
579+
580+
public double getTokensWeightThresholdOrDefault() {
581+
if (tokens_weight_threshold == null) {
582+
return DEFAULT_TOKENS_WEIGHT_THRESHOLD;
583+
}
584+
return tokens_weight_threshold;
585+
}
586+
587+
@Override
588+
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
589+
builder.startObject();
590+
if (tokens_freq_ratio_threshold != null) {
591+
builder.field(TOKENS_FREQ_RATIO_THRESHOLD_FIELD_NAME, tokens_freq_ratio_threshold);
592+
}
593+
if (tokens_weight_threshold != null) {
594+
builder.field(TOKENS_WEIGHT_THRESHOLD_FIELD_NAME, tokens_weight_threshold);
595+
}
596+
builder.endObject();
597+
return builder;
598+
}
599+
600+
@Override
601+
public final boolean equals(Object other) {
602+
if (other == this) {
603+
return true;
604+
}
605+
if (other instanceof PruningConfig otherConfig) {
606+
return Objects.equals(tokens_freq_ratio_threshold, otherConfig.tokens_freq_ratio_threshold)
607+
&& Objects.equals(tokens_weight_threshold, otherConfig.tokens_weight_threshold);
608+
}
609+
return false;
610+
}
611+
612+
@Override
613+
public final int hashCode() {
614+
return Objects.hash(tokens_freq_ratio_threshold, tokens_weight_threshold);
615+
}
616+
}
367617
}

server/src/test/java/org/elasticsearch/index/mapper/vectors/SparseVectorFieldMapperTests.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,23 @@ protected void minimalMapping(XContentBuilder b) throws IOException {
6767
b.field("type", "sparse_vector");
6868
}
6969

70+
protected void mappingWithIndexOptionsPrune(XContentBuilder b) throws IOException {
71+
b.field("type", "sparse_vector");
72+
b.startObject("index_options");
73+
b.field("prune", true);
74+
b.endObject();
75+
}
76+
77+
protected void mappingWithIndexOptionsPruningConfig(XContentBuilder b) throws IOException {
78+
b.field("type", "sparse_vector");
79+
b.startObject("index_options");
80+
b.startObject("pruning_config");
81+
b.field("tokens_freq_ratio_threshold", 5);
82+
b.field("tokens_weight_threshold", 0.4);
83+
b.endObject();
84+
b.endObject();
85+
}
86+
7087
@Override
7188
protected boolean supportsStoredFields() {
7289
return false;
@@ -120,6 +137,20 @@ public void testDefaults() throws Exception {
120137
assertTrue(freq1 < freq2);
121138
}
122139

140+
public void testWithIndexOptionsPrune() throws Exception {
141+
DocumentMapper mapper = createDocumentMapper(fieldMapping(this::mappingWithIndexOptionsPrune));
142+
assertEquals(Strings.toString(fieldMapping(this::mappingWithIndexOptionsPrune)), mapper.mappingSource().toString());
143+
144+
// TODO -- finish
145+
}
146+
147+
public void testWithIndexOptionsPruningConfig() throws Exception {
148+
DocumentMapper mapper = createDocumentMapper(fieldMapping(this::mappingWithIndexOptionsPruningConfig));
149+
assertEquals(Strings.toString(fieldMapping(this::mappingWithIndexOptionsPruningConfig)), mapper.mappingSource().toString());
150+
151+
// TODO -- finish
152+
}
153+
123154
public void testDotInFieldName() throws Exception {
124155
DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping));
125156
ParsedDocument parsedDocument = mapper.parse(source(b -> b.field("field", Map.of("foo.bar", 10, "foobar", 20))));

0 commit comments

Comments
 (0)