Skip to content

Commit 37e47c5

Browse files
committed
Merge branch 'main' into lucene_snapshot
2 parents cb0a0f0 + 6d6fc66 commit 37e47c5

File tree

30 files changed

+1088
-524
lines changed

30 files changed

+1088
-524
lines changed

docs/reference/search/search-your-data/semantic-search-semantic-text.asciidoc

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,16 @@ PUT semantic-embeddings
8989
It will be used to generate the embeddings based on the input text.
9090
Every time you ingest data into the related `semantic_text` field, this endpoint will be used for creating the vector representation of the text.
9191

92+
[NOTE]
93+
====
94+
If you're using web crawlers or connectors to generate indices, you have to
95+
<<indices-put-mapping,update the index mappings>> for these indices to
96+
include the `semantic_text` field. Once the mapping is updated, you'll need to run
97+
a full web crawl or a full connector sync. This ensures that all existing
98+
documents are reprocessed and updated with the new semantic embeddings,
99+
enabling semantic search on the updated data.
100+
====
101+
92102

93103
[discrete]
94104
[[semantic-text-load-data]]
@@ -118,6 +128,13 @@ Create the embeddings from the text by reindexing the data from the `test-data`
118128
The data in the `content` field will be reindexed into the `content` semantic text field of the destination index.
119129
The reindexed data will be processed by the {infer} endpoint associated with the `content` semantic text field.
120130

131+
[NOTE]
132+
====
133+
This step uses the reindex API to simulate data ingestion. If you are working with data that has already been indexed,
134+
rather than using the test-data set, reindexing is required to ensure that the data is processed by the {infer} endpoint
135+
and the necessary embeddings are generated.
136+
====
137+
121138
[source,console]
122139
------------------------------------------------------------
123140
POST _reindex?wait_for_completion=false

libs/simdvec/build.gradle

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
* License v3.0 only", or the "Server Side Public License, v 1".
88
*/
99

10+
import org.elasticsearch.gradle.internal.info.BuildParams
1011
import org.elasticsearch.gradle.internal.precommit.CheckForbiddenApisTask
1112

1213
apply plugin: 'elasticsearch.publish'
@@ -32,7 +33,7 @@ tasks.matching { it.name == "compileMain21Java" }.configureEach {
3233
}
3334

3435
tasks.named('test').configure {
35-
if (JavaVersion.current().majorVersion.toInteger() >= 21) {
36+
if (BuildParams.getRuntimeJavaVersion().majorVersion.toInteger() >= 21) {
3637
jvmArgs '--add-modules=jdk.incubator.vector'
3738
}
3839
}

modules/rest-root/src/main/java/org/elasticsearch/rest/root/TransportMainAction.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import org.elasticsearch.action.support.ActionFilters;
1515
import org.elasticsearch.action.support.TransportAction;
1616
import org.elasticsearch.cluster.ClusterState;
17+
import org.elasticsearch.cluster.metadata.Metadata;
1718
import org.elasticsearch.cluster.service.ClusterService;
1819
import org.elasticsearch.common.settings.Settings;
1920
import org.elasticsearch.common.util.concurrent.EsExecutors;
@@ -48,7 +49,7 @@ protected void doExecute(Task task, MainRequest request, ActionListener<MainResp
4849
nodeName,
4950
IndexVersion.current().luceneVersion().toString(),
5051
clusterState.getClusterName(),
51-
clusterState.metadata().clusterUUID(),
52+
clusterState.metadata().clusterUUIDCommitted() ? clusterState.metadata().clusterUUID() : Metadata.UNKNOWN_CLUSTER_UUID,
5253
Build.current()
5354
)
5455
);

modules/rest-root/src/test/java/org/elasticsearch/rest/root/MainActionTests.java

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,14 @@
99

1010
package org.elasticsearch.rest.root;
1111

12-
import org.elasticsearch.action.ActionListener;
1312
import org.elasticsearch.action.support.ActionFilters;
13+
import org.elasticsearch.action.support.ActionTestUtils;
1414
import org.elasticsearch.cluster.ClusterName;
1515
import org.elasticsearch.cluster.ClusterState;
1616
import org.elasticsearch.cluster.block.ClusterBlock;
1717
import org.elasticsearch.cluster.block.ClusterBlockLevel;
1818
import org.elasticsearch.cluster.block.ClusterBlocks;
19+
import org.elasticsearch.cluster.metadata.Metadata;
1920
import org.elasticsearch.cluster.service.ClusterService;
2021
import org.elasticsearch.common.settings.Settings;
2122
import org.elasticsearch.rest.RestStatus;
@@ -26,7 +27,7 @@
2627
import org.elasticsearch.transport.TransportService;
2728

2829
import java.util.Collections;
29-
import java.util.concurrent.atomic.AtomicReference;
30+
import java.util.concurrent.atomic.AtomicBoolean;
3031

3132
import static org.mockito.Mockito.mock;
3233
import static org.mockito.Mockito.times;
@@ -39,7 +40,7 @@ public void testMainActionClusterAvailable() {
3940
final ClusterService clusterService = mock(ClusterService.class);
4041
final ClusterName clusterName = new ClusterName("elasticsearch");
4142
final Settings settings = Settings.builder().put("node.name", "my-node").build();
42-
ClusterBlocks blocks;
43+
final ClusterBlocks blocks;
4344
if (randomBoolean()) {
4445
if (randomBoolean()) {
4546
blocks = ClusterBlocks.EMPTY_CLUSTER_BLOCK;
@@ -73,7 +74,12 @@ public void testMainActionClusterAvailable() {
7374
)
7475
.build();
7576
}
76-
ClusterState state = ClusterState.builder(clusterName).blocks(blocks).build();
77+
final Metadata.Builder metadata = new Metadata.Builder();
78+
if (randomBoolean()) {
79+
metadata.clusterUUID(randomUUID());
80+
metadata.clusterUUIDCommitted(randomBoolean());
81+
}
82+
final ClusterState state = ClusterState.builder(clusterName).metadata(metadata).blocks(blocks).build();
7783
when(clusterService.state()).thenReturn(state);
7884

7985
TransportService transportService = new TransportService(
@@ -85,21 +91,21 @@ public void testMainActionClusterAvailable() {
8591
null,
8692
Collections.emptySet()
8793
);
88-
TransportMainAction action = new TransportMainAction(settings, transportService, mock(ActionFilters.class), clusterService);
89-
AtomicReference<MainResponse> responseRef = new AtomicReference<>();
90-
action.doExecute(mock(Task.class), new MainRequest(), new ActionListener<>() {
91-
@Override
92-
public void onResponse(MainResponse mainResponse) {
93-
responseRef.set(mainResponse);
94-
}
95-
96-
@Override
97-
public void onFailure(Exception e) {
98-
logger.error("unexpected error", e);
99-
}
100-
});
94+
final AtomicBoolean listenerCalled = new AtomicBoolean();
95+
new TransportMainAction(settings, transportService, mock(ActionFilters.class), clusterService).doExecute(
96+
mock(Task.class),
97+
new MainRequest(),
98+
ActionTestUtils.assertNoFailureListener(mainResponse -> {
99+
assertNotNull(mainResponse);
100+
assertEquals(
101+
state.metadata().clusterUUIDCommitted() ? state.metadata().clusterUUID() : Metadata.UNKNOWN_CLUSTER_UUID,
102+
mainResponse.getClusterUuid()
103+
);
104+
assertFalse(listenerCalled.getAndSet(true));
105+
})
106+
);
101107

102-
assertNotNull(responseRef.get());
108+
assertTrue(listenerCalled.get());
103109
verify(clusterService, times(1)).state();
104110
}
105111
}

renovate.json

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
{
2+
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
3+
"extends": [
4+
"github>elastic/renovate-config:only-chainguard"
5+
],
6+
"customManagers": [
7+
{
8+
"description": "Extract Wolfi images from elasticsearch DockerBase configuration",
9+
"customType": "regex",
10+
"fileMatch": [
11+
"build\\-tools\\-internal\\/src\\/main\\/java\\/org\\/elasticsearch\\/gradle\\/internal\\/DockerBase\\.java$"
12+
],
13+
"matchStrings": [
14+
"\\s*\"?(?<depName>[^\\s:@\"]+)(?::(?<currentValue>[-a-zA-Z0-9.]+))?(?:@(?<currentDigest>sha256:[a-zA-Z0-9]+))?\"?"
15+
],
16+
"currentValueTemplate": "{{#if currentValue}}{{{currentValue}}}{{else}}latest{{/if}}",
17+
"autoReplaceStringTemplate": "\"{{{depName}}}{{#if newValue}}:{{{newValue}}}{{/if}}{{#if newDigest}}@{{{newDigest}}}{{/if}}\"",
18+
"datasourceTemplate": "docker"
19+
}
20+
]
21+
}

server/src/main/java/org/elasticsearch/TransportVersions.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ static TransportVersion def(int id) {
236236
public static final TransportVersion INGEST_GEO_DATABASE_PROVIDERS = def(8_760_00_0);
237237
public static final TransportVersion DATE_TIME_DOC_VALUES_LOCALES = def(8_761_00_0);
238238
public static final TransportVersion FAST_REFRESH_RCO = def(8_762_00_0);
239+
public static final TransportVersion TEXT_SIMILARITY_RERANKER_QUERY_REWRITE = def(8_763_00_0);
239240

240241
/*
241242
* STOP! READ THIS FIRST! No, really,

server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import org.elasticsearch.search.SearchHits;
3737
import org.elasticsearch.search.SearchPhaseResult;
3838
import org.elasticsearch.search.SearchService;
39+
import org.elasticsearch.search.SearchSortValues;
3940
import org.elasticsearch.search.aggregations.AggregationReduceContext;
4041
import org.elasticsearch.search.aggregations.AggregatorFactories;
4142
import org.elasticsearch.search.aggregations.InternalAggregations;
@@ -51,6 +52,7 @@
5152
import org.elasticsearch.search.query.QuerySearchResult;
5253
import org.elasticsearch.search.rank.RankDoc;
5354
import org.elasticsearch.search.rank.context.QueryPhaseRankCoordinatorContext;
55+
import org.elasticsearch.search.sort.ShardDocSortField;
5456
import org.elasticsearch.search.suggest.Suggest;
5557
import org.elasticsearch.search.suggest.Suggest.Suggestion;
5658
import org.elasticsearch.search.suggest.completion.CompletionSuggestion;
@@ -464,6 +466,13 @@ private static SearchHits getHits(
464466
assert shardDoc instanceof RankDoc;
465467
searchHit.setRank(((RankDoc) shardDoc).rank);
466468
searchHit.score(shardDoc.score);
469+
long shardAndDoc = ShardDocSortField.encodeShardAndDoc(shardDoc.shardIndex, shardDoc.doc);
470+
searchHit.sortValues(
471+
new SearchSortValues(
472+
new Object[] { shardDoc.score, shardAndDoc },
473+
new DocValueFormat[] { DocValueFormat.RAW, DocValueFormat.RAW }
474+
)
475+
);
467476
} else if (sortedTopDocs.isSortedByField) {
468477
FieldDoc fieldDoc = (FieldDoc) shardDoc;
469478
searchHit.sortValues(fieldDoc.fields, reducedQueryPhase.sortValueFormats);

server/src/main/java/org/elasticsearch/cluster/metadata/Metadata.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -695,6 +695,11 @@ public long version() {
695695
return this.version;
696696
}
697697

698+
/**
699+
* @return A UUID which identifies this cluster. Nodes record the UUID of the cluster they first join on disk, and will then refuse to
700+
* join clusters with different UUIDs. Note that when the cluster is forming for the first time this value may not yet be committed,
701+
* and therefore it may change. Check {@link #clusterUUIDCommitted()} to verify that the value is committed if needed.
702+
*/
698703
public String clusterUUID() {
699704
return this.clusterUUID;
700705
}

server/src/main/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorsFormat.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@
1919
import org.apache.lucene.index.SegmentReadState;
2020
import org.apache.lucene.index.SegmentWriteState;
2121
import org.apache.lucene.index.VectorSimilarityFunction;
22+
import org.apache.lucene.util.VectorUtil;
2223
import org.apache.lucene.util.hnsw.RandomVectorScorer;
2324
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
2425
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
25-
import org.elasticsearch.script.field.vectors.ESVectorUtil;
2626

2727
import java.io.IOException;
2828

@@ -103,7 +103,7 @@ public RandomVectorScorer getRandomVectorScorer(
103103
}
104104

105105
static float hammingScore(byte[] a, byte[] b) {
106-
return ((a.length * Byte.SIZE) - ESVectorUtil.xorBitCount(a, b)) / (float) (a.length * Byte.SIZE);
106+
return ((a.length * Byte.SIZE) - VectorUtil.xorBitCount(a, b)) / (float) (a.length * Byte.SIZE);
107107
}
108108

109109
static class HammingVectorScorer extends RandomVectorScorer.AbstractRandomVectorScorer {

server/src/main/java/org/elasticsearch/script/field/vectors/ByteBinaryDenseVector.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ public double l1Norm(List<Number> queryVector) {
103103

104104
@Override
105105
public int hamming(byte[] queryVector) {
106-
return ESVectorUtil.xorBitCount(queryVector, vectorValue);
106+
return VectorUtil.xorBitCount(queryVector, vectorValue);
107107
}
108108

109109
@Override

0 commit comments

Comments
 (0)