Skip to content

Commit 4da3c7e

Browse files
committed
Merge remote-tracking branch 'upstream/main' into fix-match-only-text
2 parents 41ed0f0 + d6e2b57 commit 4da3c7e

File tree

11 files changed

+427
-71
lines changed

11 files changed

+427
-71
lines changed

docs/changelog/129967.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 129967
2+
summary: Support returning default `index_options` for `semantic_text` fields when
3+
`include_defaults` is true
4+
area: Search
5+
type: bug
6+
issues: []

muted-tests.yml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -524,9 +524,6 @@ tests:
524524
- class: org.elasticsearch.search.query.VectorIT
525525
method: testFilteredQueryStrategy
526526
issue: https://github.com/elastic/elasticsearch/issues/129517
527-
- class: org.elasticsearch.test.apmintegration.TracesApmIT
528-
method: testApmIntegration
529-
issue: https://github.com/elastic/elasticsearch/issues/129651
530527
- class: org.elasticsearch.snapshots.SnapshotShutdownIT
531528
method: testSnapshotShutdownProgressTracker
532529
issue: https://github.com/elastic/elasticsearch/issues/129752
@@ -555,8 +552,6 @@ tests:
555552
- class: org.elasticsearch.xpack.inference.qa.mixed.CohereServiceMixedIT
556553
method: testCohereEmbeddings
557554
issue: https://github.com/elastic/elasticsearch/issues/130010
558-
- class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT
559-
issue: https://github.com/elastic/elasticsearch/issues/128224
560555
- class: org.elasticsearch.xpack.esql.qa.multi_node.GenerativeIT
561556
method: test
562557
issue: https://github.com/elastic/elasticsearch/issues/130067

test/external-modules/apm-integration/src/javaRestTest/java/org/elasticsearch/test/apmintegration/RecordingApmServer.java

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
import org.apache.logging.log4j.LogManager;
1616
import org.apache.logging.log4j.Logger;
1717
import org.elasticsearch.core.SuppressForbidden;
18-
import org.elasticsearch.xcontent.spi.XContentProvider;
1918
import org.junit.rules.ExternalResource;
2019

2120
import java.io.BufferedReader;
@@ -25,7 +24,6 @@
2524
import java.net.InetAddress;
2625
import java.net.InetSocketAddress;
2726
import java.nio.charset.StandardCharsets;
28-
import java.util.ArrayList;
2927
import java.util.List;
3028
import java.util.concurrent.ArrayBlockingQueue;
3129
import java.util.concurrent.TimeUnit;
@@ -35,14 +33,12 @@
3533
public class RecordingApmServer extends ExternalResource {
3634
private static final Logger logger = LogManager.getLogger(RecordingApmServer.class);
3735

38-
private static final XContentProvider.FormatProvider XCONTENT = XContentProvider.provider().getJsonXContent();
39-
4036
final ArrayBlockingQueue<String> received = new ArrayBlockingQueue<>(1000);
4137

4238
private static HttpServer server;
4339
private final Thread messageConsumerThread = consumerThread();
4440
private volatile Consumer<String> consumer;
45-
private volatile boolean consumerRunning = true;
41+
private volatile boolean running = true;
4642

4743
@Override
4844
protected void before() throws Throwable {
@@ -56,7 +52,7 @@ protected void before() throws Throwable {
5652

5753
private Thread consumerThread() {
5854
return new Thread(() -> {
59-
while (consumerRunning) {
55+
while (running) {
6056
if (consumer != null) {
6157
try {
6258
String msg = received.poll(1L, TimeUnit.SECONDS);
@@ -74,28 +70,38 @@ private Thread consumerThread() {
7470

7571
@Override
7672
protected void after() {
73+
running = false;
7774
server.stop(1);
78-
consumerRunning = false;
75+
consumer = null;
7976
}
8077

8178
private void handle(HttpExchange exchange) throws IOException {
8279
try (exchange) {
83-
try {
84-
try (InputStream requestBody = exchange.getRequestBody()) {
85-
if (requestBody != null) {
86-
var read = readJsonMessages(requestBody);
87-
received.addAll(read);
80+
if (running) {
81+
try {
82+
try (InputStream requestBody = exchange.getRequestBody()) {
83+
if (requestBody != null) {
84+
var read = readJsonMessages(requestBody);
85+
received.addAll(read);
86+
}
8887
}
89-
}
9088

91-
} catch (RuntimeException e) {
92-
logger.warn("failed to parse request", e);
89+
} catch (Throwable t) {
90+
// The lifetime of HttpServer makes message handling "brittle": we need to start handling and recording received
91+
// messages before the test starts running. We should also stop handling them before the test ends (and the test
92+
// cluster is torn down), or we may run into IOException as the communication channel is interrupted.
93+
// Coordinating the lifecycle of the mock HttpServer and of the test ES cluster is difficult and error-prone, so
94+
// we just handle Throwable and don't care (log, but don't care): if we have an error in communicating to/from
95+
// the mock server while the test is running, the test would fail anyway as the expected messages will not arrive, and
96+
// if we have an error outside the test scope (before or after) that is OK.
97+
logger.warn("failed to parse request", t);
98+
}
9399
}
94100
exchange.sendResponseHeaders(201, 0);
95101
}
96102
}
97103

98-
private List<String> readJsonMessages(InputStream input) throws IOException {
104+
private List<String> readJsonMessages(InputStream input) {
99105
// parse NDJSON
100106
return new BufferedReader(new InputStreamReader(input, StandardCharsets.UTF_8)).lines().toList();
101107
}
@@ -104,14 +110,7 @@ public int getPort() {
104110
return server.getAddress().getPort();
105111
}
106112

107-
public List<String> getMessages() {
108-
List<String> list = new ArrayList<>(received.size());
109-
received.drainTo(list);
110-
return list;
111-
}
112-
113113
public void addMessageConsumer(Consumer<String> messageConsumer) {
114114
this.consumer = messageConsumer;
115115
}
116-
117116
}

test/external-modules/apm-integration/src/javaRestTest/java/org/elasticsearch/test/apmintegration/TracesApmIT.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,8 @@ public void testApmIntegration() throws Exception {
9191

9292
client().performRequest(nodeStatsRequest);
9393

94-
finished.await(30, TimeUnit.SECONDS);
94+
var completed = finished.await(30, TimeUnit.SECONDS);
95+
assertTrue("Timeout when waiting for assertions to complete", completed);
9596
assertThat(assertions, equalTo(Collections.emptySet()));
9697
}
9798

@@ -143,5 +144,4 @@ private Map<String, Object> parseMap(String message) {
143144
return Collections.emptyMap();
144145
}
145146
}
146-
147147
}

x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-colors.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
"type": "dense_vector",
1414
"similarity": "l2_norm",
1515
"index_options": {
16-
"type": "hnsw"
16+
"type": "hnsw",
17+
"m": 16,
18+
"ef_construction": 100
1719
}
1820
}
1921
}

x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-dense_vector.json

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,12 @@
55
},
66
"vector": {
77
"type": "dense_vector",
8-
"similarity": "l2_norm"
8+
"similarity": "l2_norm",
9+
"index_options": {
10+
"type": "hnsw",
11+
"m": 16,
12+
"ef_construction": 100
13+
}
914
}
1015
}
1116
}

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.SEMANTIC_TEXT_EXCLUDE_SUB_FIELDS_FROM_FIELD_CAPS;
1919
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.SEMANTIC_TEXT_INDEX_OPTIONS;
20+
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.SEMANTIC_TEXT_INDEX_OPTIONS_WITH_DEFAULTS;
2021
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.SEMANTIC_TEXT_SUPPORT_CHUNKING_CONFIG;
2122
import static org.elasticsearch.xpack.inference.queries.SemanticKnnVectorQueryRewriteInterceptor.SEMANTIC_KNN_FILTER_FIX;
2223
import static org.elasticsearch.xpack.inference.queries.SemanticKnnVectorQueryRewriteInterceptor.SEMANTIC_KNN_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED;
@@ -66,7 +67,8 @@ public Set<NodeFeature> getTestFeatures() {
6667
SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER,
6768
SEMANTIC_TEXT_EXCLUDE_SUB_FIELDS_FROM_FIELD_CAPS,
6869
SEMANTIC_TEXT_INDEX_OPTIONS,
69-
COHERE_V2_API
70+
COHERE_V2_API,
71+
SEMANTIC_TEXT_INDEX_OPTIONS_WITH_DEFAULTS
7072
);
7173
}
7274
}

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java

Lines changed: 78 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
import org.elasticsearch.inference.InferenceResults;
7070
import org.elasticsearch.inference.MinimalServiceSettings;
7171
import org.elasticsearch.inference.SimilarityMeasure;
72+
import org.elasticsearch.inference.TaskType;
7273
import org.elasticsearch.search.fetch.StoredFieldsSpec;
7374
import org.elasticsearch.search.lookup.Source;
7475
import org.elasticsearch.search.vectors.KnnVectorQueryBuilder;
@@ -139,6 +140,9 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie
139140
"semantic_text.exclude_sub_fields_from_field_caps"
140141
);
141142
public static final NodeFeature SEMANTIC_TEXT_INDEX_OPTIONS = new NodeFeature("semantic_text.index_options");
143+
public static final NodeFeature SEMANTIC_TEXT_INDEX_OPTIONS_WITH_DEFAULTS = new NodeFeature(
144+
"semantic_text.index_options_with_defaults"
145+
);
142146

143147
public static final String CONTENT_TYPE = "semantic_text";
144148
public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID;
@@ -166,19 +170,9 @@ public static BiConsumer<String, MappingParserContext> validateParserContext(Str
166170
public static class Builder extends FieldMapper.Builder {
167171
private final ModelRegistry modelRegistry;
168172
private final boolean useLegacyFormat;
173+
private final IndexVersion indexVersionCreated;
169174

170-
private final Parameter<String> inferenceId = Parameter.stringParam(
171-
INFERENCE_ID_FIELD,
172-
false,
173-
mapper -> ((SemanticTextFieldType) mapper.fieldType()).inferenceId,
174-
DEFAULT_ELSER_2_INFERENCE_ID
175-
).addValidator(v -> {
176-
if (Strings.isEmpty(v)) {
177-
throw new IllegalArgumentException(
178-
"[" + INFERENCE_ID_FIELD + "] on mapper [" + leafName() + "] of type [" + CONTENT_TYPE + "] must not be empty"
179-
);
180-
}
181-
}).alwaysSerialize();
175+
private final Parameter<String> inferenceId;
182176

183177
private final Parameter<String> searchInferenceId = Parameter.stringParam(
184178
SEARCH_INFERENCE_ID_FIELD,
@@ -193,25 +187,9 @@ public static class Builder extends FieldMapper.Builder {
193187
}
194188
});
195189

196-
private final Parameter<MinimalServiceSettings> modelSettings = new Parameter<>(
197-
MODEL_SETTINGS_FIELD,
198-
true,
199-
() -> null,
200-
(n, c, o) -> SemanticTextField.parseModelSettingsFromMap(o),
201-
mapper -> ((SemanticTextFieldType) mapper.fieldType()).modelSettings,
202-
XContentBuilder::field,
203-
Objects::toString
204-
).acceptsNull().setMergeValidator(SemanticTextFieldMapper::canMergeModelSettings);
190+
private final Parameter<MinimalServiceSettings> modelSettings;
205191

206-
private final Parameter<SemanticTextIndexOptions> indexOptions = new Parameter<>(
207-
INDEX_OPTIONS_FIELD,
208-
true,
209-
() -> null,
210-
(n, c, o) -> parseIndexOptionsFromMap(n, o, c.indexVersionCreated()),
211-
mapper -> ((SemanticTextFieldType) mapper.fieldType()).indexOptions,
212-
XContentBuilder::field,
213-
Objects::toString
214-
).acceptsNull();
192+
private final Parameter<SemanticTextIndexOptions> indexOptions;
215193

216194
@SuppressWarnings("unchecked")
217195
private final Parameter<ChunkingSettings> chunkingSettings = new Parameter<>(
@@ -248,6 +226,50 @@ public Builder(
248226
super(name);
249227
this.modelRegistry = modelRegistry;
250228
this.useLegacyFormat = InferenceMetadataFieldsMapper.isEnabled(indexSettings.getSettings()) == false;
229+
this.indexVersionCreated = indexSettings.getIndexVersionCreated();
230+
231+
this.inferenceId = Parameter.stringParam(
232+
INFERENCE_ID_FIELD,
233+
false,
234+
mapper -> ((SemanticTextFieldType) mapper.fieldType()).inferenceId,
235+
DEFAULT_ELSER_2_INFERENCE_ID
236+
).addValidator(v -> {
237+
if (Strings.isEmpty(v)) {
238+
throw new IllegalArgumentException(
239+
"[" + INFERENCE_ID_FIELD + "] on mapper [" + leafName() + "] of type [" + CONTENT_TYPE + "] must not be empty"
240+
);
241+
}
242+
}).alwaysSerialize();
243+
244+
this.modelSettings = new Parameter<>(
245+
MODEL_SETTINGS_FIELD,
246+
true,
247+
() -> null,
248+
(n, c, o) -> SemanticTextField.parseModelSettingsFromMap(o),
249+
mapper -> ((SemanticTextFieldType) mapper.fieldType()).modelSettings,
250+
XContentBuilder::field,
251+
Objects::toString
252+
).acceptsNull().setMergeValidator(SemanticTextFieldMapper::canMergeModelSettings);
253+
254+
this.indexOptions = new Parameter<>(
255+
INDEX_OPTIONS_FIELD,
256+
true,
257+
() -> null,
258+
(n, c, o) -> parseIndexOptionsFromMap(n, o, c.indexVersionCreated()),
259+
mapper -> ((SemanticTextFieldType) mapper.fieldType()).indexOptions,
260+
(b, n, v) -> {
261+
if (v == null) {
262+
MinimalServiceSettings resolvedModelSettings = modelSettings.get() != null
263+
? modelSettings.get()
264+
: modelRegistry.getMinimalServiceSettings(inferenceId.get());
265+
b.field(INDEX_OPTIONS_FIELD, defaultIndexOptions(indexVersionCreated, resolvedModelSettings));
266+
} else {
267+
b.field(INDEX_OPTIONS_FIELD, v);
268+
}
269+
},
270+
Objects::toString
271+
).acceptsNull();
272+
251273
this.inferenceFieldBuilder = c -> {
252274
// Resolve the model setting from the registry if it has not been set yet.
253275
var resolvedModelSettings = modelSettings.get() != null ? modelSettings.get() : getResolvedModelSettings(c, false);
@@ -365,8 +387,11 @@ public SemanticTextFieldMapper build(MapperBuilderContext context) {
365387
validateServiceSettings(modelSettings.get(), resolvedModelSettings);
366388
}
367389

368-
if (context.getMergeReason() != MapperService.MergeReason.MAPPING_RECOVERY && indexOptions.get() != null) {
369-
validateIndexOptions(indexOptions.get(), inferenceId.getValue(), resolvedModelSettings);
390+
// If index_options are specified by the user, we will validate them against the model settings to ensure compatibility.
391+
// We do not serialize or otherwise store model settings at this time, this happens when the underlying vector field is created.
392+
SemanticTextIndexOptions builderIndexOptions = indexOptions.get();
393+
if (context.getMergeReason() != MapperService.MergeReason.MAPPING_RECOVERY && builderIndexOptions != null) {
394+
validateIndexOptions(builderIndexOptions, inferenceId.getValue(), resolvedModelSettings);
370395
}
371396

372397
final String fullName = context.buildFullName(leafName());
@@ -1166,6 +1191,9 @@ private static Mapper.Builder createEmbeddingsField(
11661191
}
11671192
denseVectorMapperBuilder.dimensions(modelSettings.dimensions());
11681193
denseVectorMapperBuilder.elementType(modelSettings.elementType());
1194+
// Here is where we persist index_options. If they are specified by the user, we will use those index_options,
1195+
// otherwise we will determine if we can set default index options. If we can't, we won't persist any index_options
1196+
// and the field will use the defaults for the dense_vector field.
11691197
if (indexOptions != null) {
11701198
DenseVectorFieldMapper.DenseVectorIndexOptions denseVectorIndexOptions =
11711199
(DenseVectorFieldMapper.DenseVectorIndexOptions) indexOptions.indexOptions();
@@ -1208,7 +1236,6 @@ static DenseVectorFieldMapper.DenseVectorIndexOptions defaultDenseVectorIndexOpt
12081236
// As embedding models for text perform better with BBQ, we aggressively default semantic_text fields to use optimized index
12091237
// options
12101238
if (indexVersionDefaultsToBbqHnsw(indexVersionCreated)) {
1211-
12121239
DenseVectorFieldMapper.DenseVectorIndexOptions defaultBbqHnswIndexOptions = defaultBbqHnswDenseVectorIndexOptions();
12131240
return defaultBbqHnswIndexOptions.validate(modelSettings.elementType(), modelSettings.dimensions(), false)
12141241
? defaultBbqHnswIndexOptions
@@ -1230,11 +1257,24 @@ static DenseVectorFieldMapper.DenseVectorIndexOptions defaultBbqHnswDenseVectorI
12301257
return new DenseVectorFieldMapper.BBQHnswIndexOptions(m, efConstruction, rescoreVector);
12311258
}
12321259

1233-
static SemanticTextIndexOptions defaultBbqHnswSemanticTextIndexOptions() {
1234-
return new SemanticTextIndexOptions(
1235-
SemanticTextIndexOptions.SupportedIndexOptions.DENSE_VECTOR,
1236-
defaultBbqHnswDenseVectorIndexOptions()
1237-
);
1260+
static SemanticTextIndexOptions defaultIndexOptions(IndexVersion indexVersionCreated, MinimalServiceSettings modelSettings) {
1261+
1262+
if (modelSettings == null) {
1263+
return null;
1264+
}
1265+
1266+
SemanticTextIndexOptions defaultIndexOptions = null;
1267+
if (modelSettings.taskType() == TaskType.TEXT_EMBEDDING) {
1268+
DenseVectorFieldMapper.DenseVectorIndexOptions denseVectorIndexOptions = defaultDenseVectorIndexOptions(
1269+
indexVersionCreated,
1270+
modelSettings
1271+
);
1272+
defaultIndexOptions = denseVectorIndexOptions == null
1273+
? null
1274+
: new SemanticTextIndexOptions(SemanticTextIndexOptions.SupportedIndexOptions.DENSE_VECTOR, denseVectorIndexOptions);
1275+
}
1276+
1277+
return defaultIndexOptions;
12381278
}
12391279

12401280
private static boolean canMergeModelSettings(MinimalServiceSettings previous, MinimalServiceSettings current, Conflicts conflicts) {

0 commit comments

Comments
 (0)