Skip to content

Commit d421439

Browse files
committed
semantic_text add index_options
1 parent 5023cdd commit d421439

File tree

4 files changed

+184
-10
lines changed

4 files changed

+184
-10
lines changed

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1191,7 +1191,7 @@ public abstract static class IndexOptions implements ToXContent {
11911191

11921192
abstract KnnVectorsFormat getVectorsFormat(ElementType elementType);
11931193

1194-
final void validateElementType(ElementType elementType) {
1194+
public final void validateElementType(ElementType elementType) {
11951195
if (type.supportsElementType(elementType) == false) {
11961196
throw new IllegalArgumentException(
11971197
"[element_type] cannot be [" + elementType.toString() + "] when using index type [" + type + "]"
@@ -2324,6 +2324,10 @@ public FieldMapper.Builder getMergeBuilder() {
23242324
return new Builder(leafName(), indexCreatedVersion).init(this);
23252325
}
23262326

2327+
public IndexOptions indexOptions() {
2328+
return indexOptions;
2329+
}
2330+
23272331
public static IndexOptions parseIndexOptions(String fieldName, Object propNode) {
23282332
@SuppressWarnings("unchecked")
23292333
Map<String, ?> indexOptionsMap = (Map<String, ?>) propNode;

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -997,7 +997,11 @@ private static Mapper.Builder createEmbeddingsField(
997997
}
998998
denseVectorMapperBuilder.dimensions(modelSettings.dimensions());
999999
denseVectorMapperBuilder.elementType(modelSettings.elementType());
1000-
denseVectorMapperBuilder.indexOptions(indexOptions);
1000+
if (indexOptions != null) {
1001+
indexOptions.validateDimension(modelSettings.dimensions());
1002+
indexOptions.validateElementType(modelSettings.elementType());
1003+
denseVectorMapperBuilder.indexOptions(indexOptions);
1004+
}
10011005

10021006
yield denseVectorMapperBuilder;
10031007
}

x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java

Lines changed: 119 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
import org.apache.lucene.search.join.BitSetProducer;
2424
import org.apache.lucene.search.join.QueryBitSetProducer;
2525
import org.apache.lucene.search.join.ScoreMode;
26-
import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest;
2726
import org.elasticsearch.cluster.metadata.IndexMetadata;
2827
import org.elasticsearch.common.CheckedBiConsumer;
2928
import org.elasticsearch.common.CheckedBiFunction;
@@ -73,6 +72,7 @@
7372

7473
import java.io.IOException;
7574
import java.util.Collection;
75+
import java.util.HashMap;
7676
import java.util.HashSet;
7777
import java.util.LinkedHashMap;
7878
import java.util.List;
@@ -879,17 +879,29 @@ private MapperService mapperServiceForFieldWithModelSettings(
879879
String searchInferenceId,
880880
MinimalServiceSettings modelSettings
881881
) throws IOException {
882-
String mappingParams = "type=semantic_text,inference_id=" + inferenceId;
882+
return mapperServiceForFieldWithModelSettingsAndIndexOptions(fieldName, inferenceId, searchInferenceId, modelSettings, null);
883+
}
884+
885+
private MapperService mapperServiceForFieldWithModelSettingsAndIndexOptions(
886+
String fieldName,
887+
String inferenceId,
888+
String searchInferenceId,
889+
MinimalServiceSettings modelSettings,
890+
DenseVectorFieldMapper.IndexOptions indexOptions
891+
) throws IOException {
892+
XContentBuilder mappingBuilder = JsonXContent.contentBuilder().startObject();
893+
mappingBuilder.startObject("properties").startObject(fieldName).field("type", "semantic_text").field("inference_id", inferenceId);
883894
if (searchInferenceId != null) {
884-
mappingParams += ",search_inference_id=" + searchInferenceId;
895+
mappingBuilder.field("search_inference_id", searchInferenceId);
885896
}
897+
if (indexOptions != null) {
898+
mappingBuilder.field("index_options", indexOptions);
899+
}
900+
901+
mappingBuilder.endObject().endObject().endObject();
886902

887903
MapperService mapperService = createMapperService(mapping(b -> {}), useLegacyFormat);
888-
mapperService.merge(
889-
"_doc",
890-
new CompressedXContent(Strings.toString(PutMappingRequest.simpleMapping(fieldName, mappingParams))),
891-
MapperService.MergeReason.MAPPING_UPDATE
892-
);
904+
mapperService.merge("_doc", new CompressedXContent(Strings.toString(mappingBuilder)), MapperService.MergeReason.MAPPING_UPDATE);
893905

894906
SemanticTextField semanticTextField = new SemanticTextField(
895907
useLegacyFormat,
@@ -951,6 +963,105 @@ public void testExistsQueryDenseVector() throws IOException {
951963
assertThat(existsQuery, instanceOf(ESToParentBlockJoinQuery.class));
952964
}
953965

966+
public void testDenseVectorIndexOptions() throws IOException {
967+
final String fieldName = "field";
968+
final String inferenceId = "test_service";
969+
970+
List<DenseVectorFieldMapper.IndexOptions> indexOptionsList = List.of(
971+
DenseVectorFieldMapper.parseIndexOptions(fieldName, new HashMap<>(Map.of("type", "hnsw"))),
972+
DenseVectorFieldMapper.parseIndexOptions(fieldName, new HashMap<>(Map.of("type", "int8_hnsw"))),
973+
DenseVectorFieldMapper.parseIndexOptions(fieldName, new HashMap<>(Map.of("type", "int4_hnsw"))),
974+
DenseVectorFieldMapper.parseIndexOptions(fieldName, new HashMap<>(Map.of("type", "bbq_hnsw"))),
975+
DenseVectorFieldMapper.parseIndexOptions(fieldName, new HashMap<>(Map.of("type", "flat"))),
976+
DenseVectorFieldMapper.parseIndexOptions(fieldName, new HashMap<>(Map.of("type", "int8_flat"))),
977+
DenseVectorFieldMapper.parseIndexOptions(fieldName, new HashMap<>(Map.of("type", "int4_flat"))),
978+
DenseVectorFieldMapper.parseIndexOptions(fieldName, new HashMap<>(Map.of("type", "bbq_flat"))),
979+
DenseVectorFieldMapper.parseIndexOptions(fieldName, new HashMap<>(Map.of("type", "hnsw", "m", 32, "ef_construction", 200)))
980+
);
981+
982+
for (DenseVectorFieldMapper.IndexOptions indexOptions : indexOptionsList) {
983+
BiConsumer<MapperService, DenseVectorFieldMapper.IndexOptions> assertMapperService = (m, e) -> {
984+
Mapper mapper = m.mappingLookup().getMapper(fieldName);
985+
assertThat(mapper, instanceOf(SemanticTextFieldMapper.class));
986+
SemanticTextFieldMapper semanticTextFieldMapper = (SemanticTextFieldMapper) mapper;
987+
988+
FieldMapper fieldMapper = semanticTextFieldMapper.fieldType().getEmbeddingsField();
989+
assertThat(fieldMapper, instanceOf(DenseVectorFieldMapper.class));
990+
DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) fieldMapper;
991+
992+
assertThat(denseVectorFieldMapper.indexOptions(), equalTo(e));
993+
};
994+
995+
MapperService floatMapperService = mapperServiceForFieldWithModelSettingsAndIndexOptions(
996+
fieldName,
997+
inferenceId,
998+
inferenceId,
999+
new MinimalServiceSettings(
1000+
TaskType.TEXT_EMBEDDING,
1001+
1024,
1002+
SimilarityMeasure.COSINE,
1003+
DenseVectorFieldMapper.ElementType.FLOAT
1004+
),
1005+
indexOptions
1006+
);
1007+
assertMapperService.accept(floatMapperService, indexOptions);
1008+
}
1009+
}
1010+
1011+
public void testDenseVectorIndexOptionsVaild() {
1012+
final String fieldName = "field";
1013+
final String inferenceId = "test_service";
1014+
1015+
{
1016+
DenseVectorFieldMapper.IndexOptions indexOptions = DenseVectorFieldMapper.parseIndexOptions(
1017+
fieldName,
1018+
new HashMap<>(Map.of("type", "int8_hnsw"))
1019+
);
1020+
MinimalServiceSettings invalidSettings = new MinimalServiceSettings(
1021+
TaskType.TEXT_EMBEDDING,
1022+
1024,
1023+
SimilarityMeasure.L2_NORM,
1024+
DenseVectorFieldMapper.ElementType.BYTE
1025+
);
1026+
1027+
Exception e = expectThrows(
1028+
DocumentParsingException.class,
1029+
() -> mapperServiceForFieldWithModelSettingsAndIndexOptions(
1030+
fieldName,
1031+
inferenceId,
1032+
inferenceId,
1033+
invalidSettings,
1034+
indexOptions
1035+
)
1036+
);
1037+
assertThat(e.getCause().getMessage(), containsString("cannot be [byte] when using index type [int8_hnsw]"));
1038+
}
1039+
1040+
{
1041+
DenseVectorFieldMapper.IndexOptions indexOptions = DenseVectorFieldMapper.parseIndexOptions(
1042+
fieldName,
1043+
new HashMap<>(Map.of("type", "bbq_hnsw"))
1044+
);
1045+
MinimalServiceSettings invalidSettings = new MinimalServiceSettings(
1046+
TaskType.TEXT_EMBEDDING,
1047+
10,
1048+
SimilarityMeasure.COSINE,
1049+
DenseVectorFieldMapper.ElementType.BYTE
1050+
);
1051+
Exception e = expectThrows(
1052+
DocumentParsingException.class,
1053+
() -> mapperServiceForFieldWithModelSettingsAndIndexOptions(
1054+
fieldName,
1055+
inferenceId,
1056+
inferenceId,
1057+
invalidSettings,
1058+
indexOptions
1059+
)
1060+
);
1061+
assertThat(e.getCause().getMessage(), containsString("bbq_hnsw does not support dimensions fewer than 64"));
1062+
}
1063+
}
1064+
9541065
@Override
9551066
protected void assertExistsQuery(MappedFieldType fieldType, Query query, LuceneDocument fields) {
9561067
// Until a doc is indexed, the query is rewritten as match no docs

x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_field_mapping.yml

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,61 @@ setup:
192192
- match: { "test-index.mappings.properties.dense_field.model_settings.task_type": text_embedding }
193193
- length: { "test-index.mappings.properties.dense_field": 3 }
194194

195+
---
196+
"Indexes dense vector document with index_options":
197+
198+
- do:
199+
indices.create:
200+
index: test-index-options
201+
body:
202+
mappings:
203+
properties:
204+
dense_field:
205+
type: semantic_text
206+
inference_id: dense-inference-id
207+
index_options:
208+
type: "hnsw"
209+
m: 24
210+
ef_construction: 200
211+
212+
- do:
213+
index:
214+
index: test-index-options
215+
id: doc_2
216+
body:
217+
dense_field:
218+
text: "these are not the droids you're looking for. He's free to go around"
219+
inference:
220+
inference_id: "dense-inference-id"
221+
model_settings:
222+
task_type: "text_embedding"
223+
dimensions: 4
224+
similarity: "cosine"
225+
element_type: "float"
226+
index_options:
227+
type: "int8_hnsw"
228+
m: 24
229+
ef_construction: 100
230+
confidence_interval: 0.9
231+
chunks:
232+
- text: "these are not the droids you're looking for"
233+
embeddings: [0.04673296958208084, -0.03237321600317955, -0.02543032355606556, 0.056035321205854416]
234+
- text: "He's free to go around"
235+
embeddings: [0.00641461368650198, -0.0016253676731139421, -0.05126338079571724, 0.053438711911439896]
236+
237+
# Checks mapping is updated when first doc arrives
238+
- do:
239+
indices.get_mapping:
240+
index: test-index-options
241+
242+
- match: { "test-index-options.mappings.properties.dense_field.type": "semantic_text" }
243+
- match: { "test-index-options.mappings.properties.dense_field.inference_id": "dense-inference-id" }
244+
- match: { "test-index-options.mappings.properties.dense_field.model_settings.task_type": "text_embedding" }
245+
- match: { "test-index-options.mappings.properties.dense_field.index_options.type": "hnsw" }
246+
- match: { "test-index-options.mappings.properties.dense_field.index_options.m": 24 }
247+
- match: { "test-index-options.mappings.properties.dense_field.index_options.ef_construction": 200 }
248+
- length: { "test-index.mappings.properties.dense_field": 4 }
249+
195250
---
196251
"Field caps with text embedding":
197252
- requires:

0 commit comments

Comments
 (0)