2222import org .apache .lucene .util .BytesRef ;
2323import org .elasticsearch .common .logging .DeprecationCategory ;
2424import org .elasticsearch .common .lucene .Lucene ;
25+ import org .elasticsearch .core .Nullable ;
2526import org .elasticsearch .index .IndexVersion ;
2627import org .elasticsearch .index .IndexVersions ;
2728import org .elasticsearch .index .analysis .NamedAnalyzer ;
3132import org .elasticsearch .index .mapper .FieldMapper ;
3233import org .elasticsearch .index .mapper .MappedFieldType ;
3334import org .elasticsearch .index .mapper .MapperBuilderContext ;
35+ import org .elasticsearch .index .mapper .MapperParsingException ;
36+ import org .elasticsearch .index .mapper .MappingParserContext ;
3437import org .elasticsearch .index .mapper .SourceLoader ;
3538import org .elasticsearch .index .mapper .SourceValueFetcher ;
3639import org .elasticsearch .index .mapper .TextSearchInfo ;
3740import org .elasticsearch .index .mapper .ValueFetcher ;
3841import org .elasticsearch .index .query .SearchExecutionContext ;
3942import org .elasticsearch .search .fetch .StoredFieldsSpec ;
4043import org .elasticsearch .search .lookup .Source ;
44+ import org .elasticsearch .xcontent .ToXContent ;
4145import org .elasticsearch .xcontent .XContentBuilder ;
4246import org .elasticsearch .xcontent .XContentParser .Token ;
4347
4650import java .util .LinkedHashMap ;
4751import java .util .List ;
4852import java .util .Map ;
53+ import java .util .Objects ;
4954import java .util .stream .Stream ;
5055
5156import static org .elasticsearch .index .query .AbstractQueryBuilder .DEFAULT_BOOST ;
@@ -65,6 +70,9 @@ public class SparseVectorFieldMapper extends FieldMapper {
6570
6671 static final IndexVersion NEW_SPARSE_VECTOR_INDEX_VERSION = IndexVersions .NEW_SPARSE_VECTOR ;
6772 static final IndexVersion SPARSE_VECTOR_IN_FIELD_NAMES_INDEX_VERSION = IndexVersions .SPARSE_VECTOR_IN_FIELD_NAMES_SUPPORT ;
73+ static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION = IndexVersion .SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT ;
74+
75+ private final SparseVectorFieldMapper .IndexOptions indexOptions ;
6876
6977 private static SparseVectorFieldMapper toType (FieldMapper in ) {
7078 return (SparseVectorFieldMapper ) in ;
@@ -73,9 +81,23 @@ private static SparseVectorFieldMapper toType(FieldMapper in) {
7381 public static class Builder extends FieldMapper .Builder {
7482 private final Parameter <Boolean > stored = Parameter .storeParam (m -> toType (m ).fieldType ().isStored (), false );
7583 private final Parameter <Map <String , String >> meta = Parameter .metaParam ();
84+ private final Parameter <IndexOptions > indexOptions ;
7685
7786 public Builder (String name ) {
7887 super (name );
88+ this .indexOptions = new Parameter <>(
89+ "index_options" ,
90+ true ,
91+ () -> null ,
92+ (n , c , o ) -> o == null ? null : parseIndexOptions (n , c , o ),
93+ m -> toType (m ).fieldType ().indexOptions ,
94+ (b , n , v ) -> {
95+ if (v != null ) {
96+ b .field (n , v );
97+ }
98+ },
99+ Objects ::toString
100+ );
79101 }
80102
81103 public Builder setStored (boolean value ) {
@@ -85,17 +107,113 @@ public Builder setStored(boolean value) {
85107
86108 @ Override
87109 protected Parameter <?>[] getParameters () {
88- return new Parameter <?>[] { stored , meta };
110+ return new Parameter <?>[] { stored , meta , indexOptions };
89111 }
90112
91113 @ Override
92114 public SparseVectorFieldMapper build (MapperBuilderContext context ) {
93115 return new SparseVectorFieldMapper (
94116 leafName (),
95- new SparseVectorFieldType (context .buildFullName (leafName ()), stored .getValue (), meta .getValue ()),
96- builderParams (this , context )
117+ new SparseVectorFieldType (context .buildFullName (leafName ()), stored .getValue (), meta .getValue (), indexOptions .getValue ()),
118+ builderParams (this , context ),
119+ indexOptions .getValue ()
120+ );
121+ }
122+ }
123+
124+ public IndexOptions getIndexOptions () {
125+ return this .indexOptions ;
126+ }
127+
128+ private static SparseVectorFieldMapper .IndexOptions parseIndexOptions (String fieldName , MappingParserContext context , Object propNode ) {
129+ @ SuppressWarnings ("unchecked" )
130+ Map <String , ?> indexOptionsMap = (Map <String , ?>) propNode ;
131+
132+ boolean hasOneOption = false ;
133+ Boolean prune = null ;
134+ PruningConfig pruningConfig = null ;
135+
136+ Object shouldPrune = indexOptionsMap .remove (IndexOptions .PRUNE_FIELD_NAME );
137+ if (shouldPrune != null ) {
138+ if ((shouldPrune instanceof Boolean ) == false ) {
139+ throw new MapperParsingException ("[index_options] field [prune] should be true or false" );
140+ }
141+ hasOneOption = true ;
142+ prune = ((Boolean ) shouldPrune );
143+ }
144+
145+ Object hasPruningConfiguration = indexOptionsMap .remove (IndexOptions .PRUNING_CONFIG_FIELD_NAME );
146+ if (hasPruningConfiguration != null ) {
147+ if ((hasPruningConfiguration instanceof Map ) == false ) {
148+ throw new MapperParsingException ("[index_options] field [pruning_config] should be a map" );
149+ }
150+
151+ Integer tokensFreqRatioThreshold = null ;
152+ Double tokensWeightThreshold = null ;
153+
154+ @ SuppressWarnings ("unchecked" )
155+ Map <String , ?> pruningConfigMap = (Map <String , ?>) hasPruningConfiguration ;
156+ Object hasTokensFreqRatioThreshold = pruningConfigMap .remove (PruningConfig .TOKENS_FREQ_RATIO_THRESHOLD_FIELD_NAME );
157+ Object hasTokensWeightThreshold = pruningConfigMap .remove (PruningConfig .TOKENS_WEIGHT_THRESHOLD_FIELD_NAME );
158+
159+ if (pruningConfigMap .isEmpty () == false ) {
160+ throw new MapperParsingException ("[index_options] field [pruning_config] has unknown fields" );
161+ }
162+
163+ if (hasTokensFreqRatioThreshold != null ) {
164+ if ((hasTokensFreqRatioThreshold instanceof Integer ) == false ) {
165+ throw new MapperParsingException (
166+ "[pruning_config] field [tokens_freq_ratio_threshold] field should be an integer between 1 and 100"
167+ );
168+ }
169+ tokensFreqRatioThreshold = (Integer ) hasTokensFreqRatioThreshold ;
170+ if (tokensFreqRatioThreshold < PruningConfig .MIN_TOKENS_FREQ_RATIO_THRESHOLD
171+ || tokensFreqRatioThreshold > PruningConfig .MAX_TOKENS_FREQ_RATIO_THRESHOLD ) {
172+ throw new MapperParsingException (
173+ "[pruning_config] field [tokens_freq_ratio_threshold] field should be an integer between 1 and 100"
174+ );
175+ }
176+ }
177+
178+ if (hasTokensWeightThreshold != null ) {
179+ if ((hasTokensWeightThreshold instanceof Double ) == false ) {
180+ throw new MapperParsingException (
181+ "[pruning_config] field [tokens_weight_threshold] field should be an number between 0.0 and 1.0"
182+ );
183+ }
184+ tokensWeightThreshold = (Double ) hasTokensWeightThreshold ;
185+ if (tokensWeightThreshold < PruningConfig .MIN_TOKENS_WEIGHT_THRESHOLD
186+ || tokensWeightThreshold > PruningConfig .MAX_TOKENS_WEIGHT_THRESHOLD ) {
187+ throw new MapperParsingException (
188+ "[pruning_config] field [tokens_weight_threshold] field should be an number between 0.0 and 1.0"
189+ );
190+ }
191+ }
192+
193+ if (tokensFreqRatioThreshold != null || tokensWeightThreshold != null ) {
194+ pruningConfig = new PruningConfig (tokensFreqRatioThreshold , tokensWeightThreshold );
195+ hasOneOption = true ;
196+ }
197+ }
198+
199+ if (hasOneOption == false ) {
200+ if (context .indexVersionCreated ().before (SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION )) {
201+ // don't set defaults if this index was created before
202+ // we added this functionality in, so it will
203+ // not change current index behaviour
204+ return null ;
205+ }
206+
207+ // index options are not set - for new indices, we
208+ // need to set pruning to true by default
209+ // with a default pruning configuration
210+ return new IndexOptions (
211+ true ,
212+ new PruningConfig (PruningConfig .DEFAULT_TOKENS_FREQ_RATIO_THRESHOLD , PruningConfig .DEFAULT_TOKENS_WEIGHT_THRESHOLD )
97213 );
98214 }
215+
216+ return new SparseVectorFieldMapper .IndexOptions (prune , pruningConfig );
99217 }
100218
101219 public static final TypeParser PARSER = new TypeParser ((n , c ) -> {
@@ -109,9 +227,21 @@ public SparseVectorFieldMapper build(MapperBuilderContext context) {
109227 }, notInMultiFields (CONTENT_TYPE ));
110228
111229 public static final class SparseVectorFieldType extends MappedFieldType {
230+ private final IndexOptions indexOptions ;
112231
113232 public SparseVectorFieldType (String name , boolean isStored , Map <String , String > meta ) {
114233 super (name , true , isStored , false , TextSearchInfo .SIMPLE_MATCH_ONLY , meta );
234+ this .indexOptions = null ;
235+ }
236+
237+ public SparseVectorFieldType (
238+ String name ,
239+ boolean isStored ,
240+ Map <String , String > meta ,
241+ @ Nullable SparseVectorFieldMapper .IndexOptions indexOptions
242+ ) {
243+ super (name , true , isStored , false , TextSearchInfo .SIMPLE_MATCH_ONLY , meta );
244+ this .indexOptions = indexOptions ;
115245 }
116246
117247 @ Override
@@ -157,8 +287,14 @@ private static String indexedValueForSearch(Object value) {
157287 }
158288 }
159289
160- private SparseVectorFieldMapper (String simpleName , MappedFieldType mappedFieldType , BuilderParams builderParams ) {
290+ private SparseVectorFieldMapper (
291+ String simpleName ,
292+ MappedFieldType mappedFieldType ,
293+ BuilderParams builderParams ,
294+ @ Nullable IndexOptions indexOptions
295+ ) {
161296 super (simpleName , mappedFieldType , builderParams );
297+ this .indexOptions = indexOptions ;
162298 }
163299
164300 @ Override
@@ -364,4 +500,118 @@ public void reset() {
364500 }
365501 }
366502
503+ public static class IndexOptions implements ToXContent {
504+ public static final String PRUNE_FIELD_NAME = "prune" ;
505+ public static final String PRUNING_CONFIG_FIELD_NAME = "pruning_config" ;
506+
507+ final Boolean prune ;
508+ final PruningConfig pruningConfig ;
509+
510+ IndexOptions (@ Nullable Boolean prune , @ Nullable PruningConfig pruningConfig ) {
511+ this .prune = prune ;
512+ this .pruningConfig = pruningConfig ;
513+ }
514+
515+ public Boolean getPrune () {
516+ return prune ;
517+ }
518+
519+ public PruningConfig getPruningConfig () {
520+ return pruningConfig ;
521+ }
522+
523+ @ Override
524+ public final boolean equals (Object other ) {
525+ if (other == this ) {
526+ return true ;
527+ }
528+ if (other instanceof IndexOptions otherOptions ) {
529+ return Objects .equals (prune , otherOptions .prune ) && Objects .equals (pruningConfig , otherOptions .pruningConfig );
530+ }
531+ return false ;
532+ }
533+
534+ @ Override
535+ public final int hashCode () {
536+ return Objects .hash (prune , pruningConfig );
537+ }
538+
539+ @ Override
540+ public XContentBuilder toXContent (XContentBuilder builder , Params params ) throws IOException {
541+ builder .startObject ();
542+ if (prune != null ) {
543+ builder .field (PRUNE_FIELD_NAME , prune );
544+ }
545+ if (pruningConfig != null ) {
546+ builder .field (PRUNING_CONFIG_FIELD_NAME , pruningConfig );
547+ }
548+ builder .endObject ();
549+ return builder ;
550+ }
551+ }
552+
553+ public static class PruningConfig implements ToXContent {
554+ public static final String TOKENS_FREQ_RATIO_THRESHOLD_FIELD_NAME = "tokens_freq_ratio_threshold" ;
555+ public static final String TOKENS_WEIGHT_THRESHOLD_FIELD_NAME = "tokens_weight_threshold" ;
556+
557+ public static Integer DEFAULT_TOKENS_FREQ_RATIO_THRESHOLD = 5 ;
558+ public static Integer MIN_TOKENS_FREQ_RATIO_THRESHOLD = 1 ;
559+ public static Integer MAX_TOKENS_FREQ_RATIO_THRESHOLD = 100 ;
560+
561+ public static Double DEFAULT_TOKENS_WEIGHT_THRESHOLD = 0.4 ;
562+ public static Double MIN_TOKENS_WEIGHT_THRESHOLD = 0.0 ;
563+ public static Double MAX_TOKENS_WEIGHT_THRESHOLD = 1.0 ;
564+
565+ final Integer tokens_freq_ratio_threshold ;
566+ final Double tokens_weight_threshold ;
567+
568+ PruningConfig (@ Nullable Integer tokens_freq_ratio_threshold , @ Nullable Double tokens_weight_threshold ) {
569+ this .tokens_freq_ratio_threshold = tokens_freq_ratio_threshold ;
570+ this .tokens_weight_threshold = tokens_weight_threshold ;
571+ }
572+
573+ public int getTokensFreqRatioThresholdOrDefault () {
574+ if (tokens_freq_ratio_threshold == null ) {
575+ return DEFAULT_TOKENS_FREQ_RATIO_THRESHOLD ;
576+ }
577+ return tokens_freq_ratio_threshold ;
578+ }
579+
580+ public double getTokensWeightThresholdOrDefault () {
581+ if (tokens_weight_threshold == null ) {
582+ return DEFAULT_TOKENS_WEIGHT_THRESHOLD ;
583+ }
584+ return tokens_weight_threshold ;
585+ }
586+
587+ @ Override
588+ public XContentBuilder toXContent (XContentBuilder builder , Params params ) throws IOException {
589+ builder .startObject ();
590+ if (tokens_freq_ratio_threshold != null ) {
591+ builder .field (TOKENS_FREQ_RATIO_THRESHOLD_FIELD_NAME , tokens_freq_ratio_threshold );
592+ }
593+ if (tokens_weight_threshold != null ) {
594+ builder .field (TOKENS_WEIGHT_THRESHOLD_FIELD_NAME , tokens_weight_threshold );
595+ }
596+ builder .endObject ();
597+ return builder ;
598+ }
599+
600+ @ Override
601+ public final boolean equals (Object other ) {
602+ if (other == this ) {
603+ return true ;
604+ }
605+ if (other instanceof PruningConfig otherConfig ) {
606+ return Objects .equals (tokens_freq_ratio_threshold , otherConfig .tokens_freq_ratio_threshold )
607+ && Objects .equals (tokens_weight_threshold , otherConfig .tokens_weight_threshold );
608+ }
609+ return false ;
610+ }
611+
612+ @ Override
613+ public final int hashCode () {
614+ return Objects .hash (tokens_freq_ratio_threshold , tokens_weight_threshold );
615+ }
616+ }
367617}
0 commit comments