2222import org .apache .lucene .util .BytesRef ;
2323import org .elasticsearch .common .logging .DeprecationCategory ;
2424import org .elasticsearch .common .lucene .Lucene ;
25+ import org .elasticsearch .common .xcontent .support .XContentMapValues ;
26+ import org .elasticsearch .core .Nullable ;
27+ import org .elasticsearch .features .NodeFeature ;
2528import org .elasticsearch .index .IndexVersion ;
2629import org .elasticsearch .index .IndexVersions ;
2730import org .elasticsearch .index .analysis .NamedAnalyzer ;
3134import org .elasticsearch .index .mapper .FieldMapper ;
3235import org .elasticsearch .index .mapper .MappedFieldType ;
3336import org .elasticsearch .index .mapper .MapperBuilderContext ;
37+ import org .elasticsearch .index .mapper .MappingParserContext ;
3438import org .elasticsearch .index .mapper .SourceLoader ;
3539import org .elasticsearch .index .mapper .SourceValueFetcher ;
3640import org .elasticsearch .index .mapper .TextSearchInfo ;
4044import org .elasticsearch .inference .WeightedTokensUtils ;
4145import org .elasticsearch .search .fetch .StoredFieldsSpec ;
4246import org .elasticsearch .search .lookup .Source ;
47+ import org .elasticsearch .xcontent .ConstructingObjectParser ;
48+ import org .elasticsearch .xcontent .DeprecationHandler ;
49+ import org .elasticsearch .xcontent .NamedXContentRegistry ;
50+ import org .elasticsearch .xcontent .ParseField ;
51+ import org .elasticsearch .xcontent .ToXContent ;
4352import org .elasticsearch .xcontent .XContentBuilder ;
53+ import org .elasticsearch .xcontent .XContentParser ;
4454import org .elasticsearch .xcontent .XContentParser .Token ;
55+ import org .elasticsearch .xcontent .XContentType ;
56+ import org .elasticsearch .xcontent .support .MapXContentParser ;
4557
4658import java .io .IOException ;
4759import java .io .UncheckedIOException ;
4860import java .util .LinkedHashMap ;
4961import java .util .List ;
5062import java .util .Map ;
63+ import java .util .Objects ;
5164import java .util .stream .Stream ;
5265
5366import static org .elasticsearch .index .query .AbstractQueryBuilder .DEFAULT_BOOST ;
67+ import static org .elasticsearch .xcontent .ConstructingObjectParser .optionalConstructorArg ;
5468
5569/**
5670 * A {@link FieldMapper} that exposes Lucene's {@link FeatureField} as a sparse
5973public class SparseVectorFieldMapper extends FieldMapper {
6074
6175 public static final String CONTENT_TYPE = "sparse_vector" ;
76+ public static final String SPARSE_VECTOR_INDEX_OPTIONS = "index_options" ;
6277
6378 static final String ERROR_MESSAGE_7X = "[sparse_vector] field type in old 7.x indices is allowed to "
6479 + "contain [sparse_vector] fields, but they cannot be indexed or searched." ;
@@ -67,6 +82,10 @@ public class SparseVectorFieldMapper extends FieldMapper {
6782
6883 static final IndexVersion NEW_SPARSE_VECTOR_INDEX_VERSION = IndexVersions .NEW_SPARSE_VECTOR ;
6984 static final IndexVersion SPARSE_VECTOR_IN_FIELD_NAMES_INDEX_VERSION = IndexVersions .SPARSE_VECTOR_IN_FIELD_NAMES_SUPPORT ;
85+ static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION = IndexVersions .SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT ;
86+ static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X = IndexVersions .SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT_BACKPORT_8_X ;
87+
88+ public static final NodeFeature SPARSE_VECTOR_INDEX_OPTIONS_FEATURE = new NodeFeature ("sparse_vector.index_options_supported" );
7089
7190 private static SparseVectorFieldMapper toType (FieldMapper in ) {
7291 return (SparseVectorFieldMapper ) in ;
@@ -75,6 +94,15 @@ private static SparseVectorFieldMapper toType(FieldMapper in) {
7594 public static class Builder extends FieldMapper .Builder {
7695 private final Parameter <Boolean > stored = Parameter .storeParam (m -> toType (m ).fieldType ().isStored (), false );
7796 private final Parameter <Map <String , String >> meta = Parameter .metaParam ();
97+ private final Parameter <IndexOptions > indexOptions = new Parameter <>(
98+ SPARSE_VECTOR_INDEX_OPTIONS ,
99+ true ,
100+ () -> null ,
101+ (n , c , o ) -> parseIndexOptions (c , o ),
102+ m -> toType (m ).fieldType ().indexOptions ,
103+ XContentBuilder ::field ,
104+ Objects ::toString
105+ ).acceptsNull ();
78106
79107 public Builder (String name ) {
80108 super (name );
@@ -87,19 +115,54 @@ public Builder setStored(boolean value) {
87115
88116 @ Override
89117 protected Parameter <?>[] getParameters () {
90- return new Parameter <?>[] { stored , meta };
118+ return new Parameter <?>[] { stored , meta , indexOptions };
91119 }
92120
93121 @ Override
94122 public SparseVectorFieldMapper build (MapperBuilderContext context ) {
95123 return new SparseVectorFieldMapper (
96124 leafName (),
97- new SparseVectorFieldType (context .buildFullName (leafName ()), stored .getValue (), meta .getValue ()),
125+ new SparseVectorFieldType (context .buildFullName (leafName ()), stored .getValue (), meta .getValue (), indexOptions . getValue () ),
98126 builderParams (this , context )
99127 );
100128 }
101129 }
102130
131+ public IndexOptions getIndexOptions () {
132+ return fieldType ().getIndexOptions ();
133+ }
134+
135+ private static final ConstructingObjectParser <IndexOptions , Void > INDEX_OPTIONS_PARSER = new ConstructingObjectParser <>(
136+ SPARSE_VECTOR_INDEX_OPTIONS ,
137+ args -> new IndexOptions ((Boolean ) args [0 ], (TokenPruningConfig ) args [1 ])
138+ );
139+
140+ static {
141+ INDEX_OPTIONS_PARSER .declareBoolean (optionalConstructorArg (), IndexOptions .PRUNE_FIELD_NAME );
142+ INDEX_OPTIONS_PARSER .declareObject (optionalConstructorArg (), TokenPruningConfig .PARSER , IndexOptions .PRUNING_CONFIG_FIELD_NAME );
143+ }
144+
145+ private static SparseVectorFieldMapper .IndexOptions parseIndexOptions (MappingParserContext context , Object propNode ) {
146+ if (propNode == null ) {
147+ return null ;
148+ }
149+
150+ Map <String , Object > indexOptionsMap = XContentMapValues .nodeMapValue (propNode , SPARSE_VECTOR_INDEX_OPTIONS );
151+
152+ XContentParser parser = new MapXContentParser (
153+ NamedXContentRegistry .EMPTY ,
154+ DeprecationHandler .IGNORE_DEPRECATIONS ,
155+ indexOptionsMap ,
156+ XContentType .JSON
157+ );
158+
159+ try {
160+ return INDEX_OPTIONS_PARSER .parse (parser , null );
161+ } catch (IOException e ) {
162+ throw new UncheckedIOException (e );
163+ }
164+ }
165+
103166 public static final TypeParser PARSER = new TypeParser ((n , c ) -> {
104167 if (c .indexVersionCreated ().before (PREVIOUS_SPARSE_VECTOR_INDEX_VERSION )) {
105168 deprecationLogger .warn (DeprecationCategory .MAPPINGS , "sparse_vector" , ERROR_MESSAGE_7X );
@@ -111,9 +174,19 @@ public SparseVectorFieldMapper build(MapperBuilderContext context) {
111174 }, notInMultiFields (CONTENT_TYPE ));
112175
113176 public static final class SparseVectorFieldType extends MappedFieldType {
177+ private final IndexOptions indexOptions ;
114178
115179 public SparseVectorFieldType (String name , boolean isStored , Map <String , String > meta ) {
180+ this (name , isStored , meta , null );
181+ }
182+
183+ public SparseVectorFieldType (String name , boolean isStored , Map <String , String > meta , @ Nullable SparseVectorFieldMapper .IndexOptions indexOptions ) {
116184 super (name , true , isStored , false , TextSearchInfo .SIMPLE_MATCH_ONLY , meta );
185+ this .indexOptions = indexOptions ;
186+ }
187+
188+ public IndexOptions getIndexOptions () {
189+ return indexOptions ;
117190 }
118191
119192 @ Override
@@ -155,14 +228,54 @@ public Query finalizeSparseVectorQuery(
155228 SearchExecutionContext context ,
156229 String fieldName ,
157230 List <WeightedToken > queryVectors ,
158- boolean shouldPruneTokens ,
159- TokenPruningConfig tokenPruningConfig
231+ Boolean shouldPruneTokensFromQuery ,
232+ TokenPruningConfig tokenPruningConfigFromQuery
160233 ) throws IOException {
161- return (shouldPruneTokens )
162- ? WeightedTokensUtils .queryBuilderWithPrunedTokens (fieldName , tokenPruningConfig , queryVectors , this , context )
234+ TokenPruningConfig pruningConfig = null ;
235+
236+ if (shouldPruneTokensFromQuery != null ) {
237+ // if this is not null, the query is overriding the index config
238+ pruningConfig = shouldPruneTokensFromQuery ? tokenPruningConfigFromQuery : null ;
239+ } else {
240+ // check and see if we explicitly do not prune in the index_options
241+ boolean explicitlyDoNotPrune = this .indexOptions != null
242+ && this .indexOptions .prune != null
243+ && this .indexOptions .prune == false ;
244+
245+ if (explicitlyDoNotPrune == false ) {
246+ // get the explicit pruning config from the index_options if available
247+ pruningConfig = this .indexOptions != null ? this .indexOptions .pruningConfig : null ;
248+
249+ // if we're still null, set the default based on the index version
250+ // newer index versions default to true, while older is false
251+ pruningConfig = pruningConfig == null ? getDefaultPruningConfig (context ) : pruningConfig ;
252+ }
253+ }
254+
255+ return (pruningConfig != null )
256+ ? WeightedTokensUtils .queryBuilderWithPrunedTokens (fieldName , pruningConfig , queryVectors , this , context )
163257 : WeightedTokensUtils .queryBuilderWithAllTokens (fieldName , queryVectors , this , context );
164258 }
165259
260+ private TokenPruningConfig getDefaultPruningConfig (SearchExecutionContext context ) {
261+ IndexVersion indexVersion = context .indexVersionCreated ();
262+
263+ if (indexVersion .after (SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION )) {
264+ // default pruning for 9.1.0+ is true for this index
265+ return new TokenPruningConfig ();
266+ }
267+
268+ if (indexVersion .between (SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_VERSION_8_X , IndexVersions .UPGRADE_TO_LUCENE_10_0_0 ))
269+ {
270+ // default pruning for 8.19.0+ is true for this index
271+ return new TokenPruningConfig ();
272+ }
273+
274+ // the index version is before we added index_options support
275+ // so pruning is off by default
276+ return null ;
277+ }
278+
166279 private static String indexedValueForSearch (Object value ) {
167280 if (value instanceof BytesRef ) {
168281 return ((BytesRef ) value ).utf8ToString ();
@@ -378,4 +491,71 @@ public void reset() {
378491 }
379492 }
380493
494+ public static class IndexOptions implements ToXContent {
495+ public static final ParseField PRUNE_FIELD_NAME = new ParseField ("prune" );
496+ public static final ParseField PRUNING_CONFIG_FIELD_NAME = new ParseField ("pruning_config" );
497+
498+ final Boolean prune ;
499+ final TokenPruningConfig pruningConfig ;
500+
501+ IndexOptions (@ Nullable Boolean prune , @ Nullable TokenPruningConfig pruningConfig ) {
502+ if (pruningConfig != null && (prune == null || prune == false )) {
503+ throw new IllegalArgumentException (
504+ "["
505+ + SPARSE_VECTOR_INDEX_OPTIONS
506+ + "] field ["
507+ + PRUNING_CONFIG_FIELD_NAME .getPreferredName ()
508+ + "] should only be set if ["
509+ + PRUNE_FIELD_NAME .getPreferredName ()
510+ + "] is set to true"
511+ );
512+ }
513+
514+ this .prune = prune ;
515+ this .pruningConfig = pruningConfig ;
516+ }
517+
518+ public Boolean getPrune () {
519+ return prune ;
520+ }
521+
522+ public TokenPruningConfig getPruningConfig () {
523+ return pruningConfig ;
524+ }
525+
526+ @ Override
527+ public XContentBuilder toXContent (XContentBuilder builder , Params params ) throws IOException {
528+ builder .startObject ();
529+
530+ if (prune != null ) {
531+ builder .field (PRUNE_FIELD_NAME .getPreferredName (), prune );
532+ }
533+ if (pruningConfig != null ) {
534+ builder .field (PRUNING_CONFIG_FIELD_NAME .getPreferredName (), pruningConfig );
535+ }
536+
537+ builder .endObject ();
538+ return builder ;
539+ }
540+
541+ @ Override
542+ public final boolean equals (Object other ) {
543+ if (other == this ) {
544+ return true ;
545+ }
546+
547+ if (other == null || getClass () != other .getClass ()) {
548+ return false ;
549+ }
550+
551+ IndexOptions otherAsIndexOptions = (IndexOptions ) other ;
552+ return Objects .equals (prune , otherAsIndexOptions .prune ) && Objects .equals (pruningConfig , otherAsIndexOptions .pruningConfig );
553+ }
554+
555+ @ Override
556+ public final int hashCode () {
557+ return Objects .hash (prune , pruningConfig );
558+ }
559+ }
560+
381561}
0 commit comments