6969import org .elasticsearch .inference .InferenceResults ;
7070import org .elasticsearch .inference .MinimalServiceSettings ;
7171import org .elasticsearch .inference .SimilarityMeasure ;
72+ import org .elasticsearch .inference .TaskType ;
7273import org .elasticsearch .search .fetch .StoredFieldsSpec ;
7374import org .elasticsearch .search .lookup .Source ;
7475import org .elasticsearch .search .vectors .KnnVectorQueryBuilder ;
@@ -139,6 +140,9 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
139140 "semantic_text.exclude_sub_fields_from_field_caps"
140141 );
141142 public static final NodeFeature SEMANTIC_TEXT_INDEX_OPTIONS = new NodeFeature ("semantic_text.index_options" );
143+ public static final NodeFeature SEMANTIC_TEXT_INDEX_OPTIONS_WITH_DEFAULTS = new NodeFeature (
144+ "semantic_text.index_options_with_defaults"
145+ );
142146
143147 public static final String CONTENT_TYPE = "semantic_text" ;
144148 public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID ;
@@ -166,19 +170,9 @@ public static BiConsumer<String, MappingParserContext> validateParserContext(Str
166170 public static class Builder extends FieldMapper .Builder {
167171 private final ModelRegistry modelRegistry ;
168172 private final boolean useLegacyFormat ;
173+ private final IndexVersion indexVersionCreated ;
169174
170- private final Parameter <String > inferenceId = Parameter .stringParam (
171- INFERENCE_ID_FIELD ,
172- false ,
173- mapper -> ((SemanticTextFieldType ) mapper .fieldType ()).inferenceId ,
174- DEFAULT_ELSER_2_INFERENCE_ID
175- ).addValidator (v -> {
176- if (Strings .isEmpty (v )) {
177- throw new IllegalArgumentException (
178- "[" + INFERENCE_ID_FIELD + "] on mapper [" + leafName () + "] of type [" + CONTENT_TYPE + "] must not be empty"
179- );
180- }
181- }).alwaysSerialize ();
175+ private final Parameter <String > inferenceId ;
182176
183177 private final Parameter <String > searchInferenceId = Parameter .stringParam (
184178 SEARCH_INFERENCE_ID_FIELD ,
@@ -193,25 +187,9 @@ public static class Builder extends FieldMapper.Builder {
193187 }
194188 });
195189
196- private final Parameter <MinimalServiceSettings > modelSettings = new Parameter <>(
197- MODEL_SETTINGS_FIELD ,
198- true ,
199- () -> null ,
200- (n , c , o ) -> SemanticTextField .parseModelSettingsFromMap (o ),
201- mapper -> ((SemanticTextFieldType ) mapper .fieldType ()).modelSettings ,
202- XContentBuilder ::field ,
203- Objects ::toString
204- ).acceptsNull ().setMergeValidator (SemanticTextFieldMapper ::canMergeModelSettings );
190+ private final Parameter <MinimalServiceSettings > modelSettings ;
205191
206- private final Parameter <SemanticTextIndexOptions > indexOptions = new Parameter <>(
207- INDEX_OPTIONS_FIELD ,
208- true ,
209- () -> null ,
210- (n , c , o ) -> parseIndexOptionsFromMap (n , o , c .indexVersionCreated ()),
211- mapper -> ((SemanticTextFieldType ) mapper .fieldType ()).indexOptions ,
212- XContentBuilder ::field ,
213- Objects ::toString
214- ).acceptsNull ();
192+ private final Parameter <SemanticTextIndexOptions > indexOptions ;
215193
216194 @ SuppressWarnings ("unchecked" )
217195 private final Parameter <ChunkingSettings > chunkingSettings = new Parameter <>(
@@ -248,6 +226,50 @@ public Builder(
248226 super (name );
249227 this .modelRegistry = modelRegistry ;
250228 this .useLegacyFormat = InferenceMetadataFieldsMapper .isEnabled (indexSettings .getSettings ()) == false ;
229+ this .indexVersionCreated = indexSettings .getIndexVersionCreated ();
230+
231+ this .inferenceId = Parameter .stringParam (
232+ INFERENCE_ID_FIELD ,
233+ false ,
234+ mapper -> ((SemanticTextFieldType ) mapper .fieldType ()).inferenceId ,
235+ DEFAULT_ELSER_2_INFERENCE_ID
236+ ).addValidator (v -> {
237+ if (Strings .isEmpty (v )) {
238+ throw new IllegalArgumentException (
239+ "[" + INFERENCE_ID_FIELD + "] on mapper [" + leafName () + "] of type [" + CONTENT_TYPE + "] must not be empty"
240+ );
241+ }
242+ }).alwaysSerialize ();
243+
244+ this .modelSettings = new Parameter <>(
245+ MODEL_SETTINGS_FIELD ,
246+ true ,
247+ () -> null ,
248+ (n , c , o ) -> SemanticTextField .parseModelSettingsFromMap (o ),
249+ mapper -> ((SemanticTextFieldType ) mapper .fieldType ()).modelSettings ,
250+ XContentBuilder ::field ,
251+ Objects ::toString
252+ ).acceptsNull ().setMergeValidator (SemanticTextFieldMapper ::canMergeModelSettings );
253+
254+ this .indexOptions = new Parameter <>(
255+ INDEX_OPTIONS_FIELD ,
256+ true ,
257+ () -> null ,
258+ (n , c , o ) -> parseIndexOptionsFromMap (n , o , c .indexVersionCreated ()),
259+ mapper -> ((SemanticTextFieldType ) mapper .fieldType ()).indexOptions ,
260+ (b , n , v ) -> {
261+ if (v == null ) {
262+ MinimalServiceSettings resolvedModelSettings = modelSettings .get () != null
263+ ? modelSettings .get ()
264+ : modelRegistry .getMinimalServiceSettings (inferenceId .get ());
265+ b .field (INDEX_OPTIONS_FIELD , defaultIndexOptions (indexVersionCreated , resolvedModelSettings ));
266+ } else {
267+ b .field (INDEX_OPTIONS_FIELD , v );
268+ }
269+ },
270+ Objects ::toString
271+ ).acceptsNull ();
272+
251273 this .inferenceFieldBuilder = c -> {
252274 // Resolve the model setting from the registry if it has not been set yet.
253275 var resolvedModelSettings = modelSettings .get () != null ? modelSettings .get () : getResolvedModelSettings (c , false );
@@ -365,8 +387,11 @@ public SemanticTextFieldMapper build(MapperBuilderContext context) {
365387 validateServiceSettings (modelSettings .get (), resolvedModelSettings );
366388 }
367389
368- if (context .getMergeReason () != MapperService .MergeReason .MAPPING_RECOVERY && indexOptions .get () != null ) {
369- validateIndexOptions (indexOptions .get (), inferenceId .getValue (), resolvedModelSettings );
390+ // If index_options are specified by the user, we will validate them against the model settings to ensure compatibility.
391+ // We do not serialize or otherwise store model settings at this time, this happens when the underlying vector field is created.
392+ SemanticTextIndexOptions builderIndexOptions = indexOptions .get ();
393+ if (context .getMergeReason () != MapperService .MergeReason .MAPPING_RECOVERY && builderIndexOptions != null ) {
394+ validateIndexOptions (builderIndexOptions , inferenceId .getValue (), resolvedModelSettings );
370395 }
371396
372397 final String fullName = context .buildFullName (leafName ());
@@ -1166,6 +1191,9 @@ private static Mapper.Builder createEmbeddingsField(
11661191 }
11671192 denseVectorMapperBuilder .dimensions (modelSettings .dimensions ());
11681193 denseVectorMapperBuilder .elementType (modelSettings .elementType ());
1194+ // Here is where we persist index_options. If they are specified by the user, we will use those index_options,
1195+ // otherwise we will determine if we can set default index options. If we can't, we won't persist any index_options
1196+ // and the field will use the defaults for the dense_vector field.
11691197 if (indexOptions != null ) {
11701198 DenseVectorFieldMapper .DenseVectorIndexOptions denseVectorIndexOptions =
11711199 (DenseVectorFieldMapper .DenseVectorIndexOptions ) indexOptions .indexOptions ();
@@ -1208,7 +1236,6 @@ static DenseVectorFieldMapper.DenseVectorIndexOptions defaultDenseVectorIndexOpt
12081236 // As embedding models for text perform better with BBQ, we aggressively default semantic_text fields to use optimized index
12091237 // options
12101238 if (indexVersionDefaultsToBbqHnsw (indexVersionCreated )) {
1211-
12121239 DenseVectorFieldMapper .DenseVectorIndexOptions defaultBbqHnswIndexOptions = defaultBbqHnswDenseVectorIndexOptions ();
12131240 return defaultBbqHnswIndexOptions .validate (modelSettings .elementType (), modelSettings .dimensions (), false )
12141241 ? defaultBbqHnswIndexOptions
@@ -1230,11 +1257,24 @@ static DenseVectorFieldMapper.DenseVectorIndexOptions defaultBbqHnswDenseVectorI
12301257 return new DenseVectorFieldMapper .BBQHnswIndexOptions (m , efConstruction , rescoreVector );
12311258 }
12321259
1233- static SemanticTextIndexOptions defaultBbqHnswSemanticTextIndexOptions () {
1234- return new SemanticTextIndexOptions (
1235- SemanticTextIndexOptions .SupportedIndexOptions .DENSE_VECTOR ,
1236- defaultBbqHnswDenseVectorIndexOptions ()
1237- );
1260+ static SemanticTextIndexOptions defaultIndexOptions (IndexVersion indexVersionCreated , MinimalServiceSettings modelSettings ) {
1261+
1262+ if (modelSettings == null ) {
1263+ return null ;
1264+ }
1265+
1266+ SemanticTextIndexOptions defaultIndexOptions = null ;
1267+ if (modelSettings .taskType () == TaskType .TEXT_EMBEDDING ) {
1268+ DenseVectorFieldMapper .DenseVectorIndexOptions denseVectorIndexOptions = defaultDenseVectorIndexOptions (
1269+ indexVersionCreated ,
1270+ modelSettings
1271+ );
1272+ defaultIndexOptions = denseVectorIndexOptions == null
1273+ ? null
1274+ : new SemanticTextIndexOptions (SemanticTextIndexOptions .SupportedIndexOptions .DENSE_VECTOR , denseVectorIndexOptions );
1275+ }
1276+
1277+ return defaultIndexOptions ;
12381278 }
12391279
12401280 private static boolean canMergeModelSettings (MinimalServiceSettings previous , MinimalServiceSettings current , Conflicts conflicts ) {
0 commit comments