Skip to content

Commit b6c162d

Browse files
Merge branch 'main' into indexLike_v3
2 parents b3f65ed + ea2e7b4 commit b6c162d

File tree

76 files changed

+2310
-315
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

76 files changed

+2310
-315
lines changed

.buildkite/hooks/pre-command

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,14 @@ if [[ "${USE_LUCENE_SNAPSHOT_CREDS:-}" == "true" ]]; then
6464
unset data
6565
fi
6666

67+
if [[ "${USE_MAVEN_GPG:-}" == "true" ]]; then
68+
vault_path="kv/ci-shared/release-eng/team-release-secrets/es-delivery/gpg"
69+
ORG_GRADLE_PROJECT_signingKey=$(vault kv get --field="private_key" $vault_path)
70+
ORG_GRADLE_PROJECT_signingPassword=$(vault kv get --field="passphase" $vault_path)
71+
export ORG_GRADLE_PROJECT_signingKey
72+
export ORG_GRADLE_PROJECT_signingPassword
73+
fi
74+
6775
if [[ "${USE_DRA_CREDENTIALS:-}" == "true" ]]; then
6876
DRA_VAULT_ROLE_ID_SECRET=$(vault read -field=role-id secret/ci/elastic-elasticsearch/legacy-vault-credentials)
6977
export DRA_VAULT_ROLE_ID_SECRET

.buildkite/pipelines/dra-workflow.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ steps:
22
- command: .buildkite/scripts/dra-workflow.sh
33
env:
44
USE_DRA_CREDENTIALS: "true"
5+
USE_MAVEN_GPG: "true"
56
USE_PROD_DOCKER_CREDENTIALS: "true"
67
agents:
78
provider: gcp

.buildkite/scripts/run-bc-upgrade-tests.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ select(.active_release == true) |
2424
(.build_candidates | to_entries | sort_by(.value.completed_at))) |
2525
last | .value.manifest_url")"
2626

27-
if [[ -z "$MANIFEST_URL" ]]; then
27+
if [[ -z "$MANIFEST_URL" ]] || [[ "$MANIFEST_URL" == "null" ]]; then
2828
echo "No snapshots or build candidates for branch [$BUILDKITE_BRANCH]."
2929
echo "Skipping BC upgrade tests."
3030
exit 0

.buildkite/scripts/run-pr-upgrade-tests.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ fi
1818

1919
# Identify the merge base of the current commit (branch) and the base branch of the pull request.
2020
# PR upgrade tests are run from the merge base to the current commit.
21-
BASE_COMMIT=$(git merge-base $BUILDKITE_PULL_REQUEST_BASE_BRANCH $BUILDKITE_COMMIT)
21+
git fetch origin $BUILDKITE_PULL_REQUEST_BASE_BRANCH
22+
BASE_COMMIT=$(git merge-base origin/$BUILDKITE_PULL_REQUEST_BASE_BRANCH $BUILDKITE_COMMIT)
2223

2324
VERSION=$(sed -n 's/^elasticsearch[[:space:]]*=[[:space:]]*\(.*\)/\1/p' build-tools-internal/version.properties)
2425

build-conventions/src/main/java/org/elasticsearch/gradle/internal/conventions/PublishPlugin.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,11 @@
1010
package org.elasticsearch.gradle.internal.conventions;
1111

1212
import groovy.util.Node;
13+
import nmcp.NmcpPlugin;
1314

1415
import com.github.jengelman.gradle.plugins.shadow.ShadowExtension;
1516
import com.github.jengelman.gradle.plugins.shadow.ShadowPlugin;
1617

17-
import nmcp.NmcpPlugin;
18-
1918
import org.elasticsearch.gradle.internal.conventions.info.GitInfo;
2019
import org.elasticsearch.gradle.internal.conventions.precommit.PomValidationPrecommitPlugin;
2120
import org.elasticsearch.gradle.internal.conventions.util.Util;
@@ -41,6 +40,8 @@
4140
import org.gradle.api.tasks.bundling.Jar;
4241
import org.gradle.initialization.layout.BuildLayout;
4342
import org.gradle.language.base.plugins.LifecycleBasePlugin;
43+
import org.gradle.plugins.signing.SigningExtension;
44+
import org.gradle.plugins.signing.SigningPlugin;
4445
import org.w3c.dom.Element;
4546

4647
import java.io.File;
@@ -69,6 +70,7 @@ public void apply(Project project) {
6970
project.getPluginManager().apply(PomValidationPrecommitPlugin.class);
7071
project.getPluginManager().apply(LicensingPlugin.class);
7172
project.getPluginManager().apply(NmcpPlugin.class);
73+
project.getPluginManager().apply(SigningPlugin.class);
7274
configureJavadocJar(project);
7375
configureSourcesJar(project);
7476
configurePomGeneration(project);
@@ -79,6 +81,13 @@ public void apply(Project project) {
7981
private void configurePublications(Project project) {
8082
var publishingExtension = project.getExtensions().getByType(PublishingExtension.class);
8183
var publication = publishingExtension.getPublications().create("elastic", MavenPublication.class);
84+
Provider<String> signingKey = project.getProviders().gradleProperty("signingKey");
85+
if (signingKey.isPresent()) {
86+
SigningExtension signing = project.getExtensions().getByType(SigningExtension.class);
87+
signing.useInMemoryPgpKeys(signingKey.get(), project.getProviders().gradleProperty("signingPassword").get());
88+
signing.sign(publication);
89+
}
90+
8291
project.afterEvaluate(project1 -> {
8392
if (project1.getPlugins().hasPlugin(ShadowPlugin.class)) {
8493
configureWithShadowPlugin(project1, publication);

docs/changelog/125921.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
pr: 125921
2+
summary: Allow direct IO for BBQ rescoring
3+
area: Vector Search
4+
type: feature
5+
highlight:
6+
title: Allow direct IO for BBQ rescoring
7+
body: |-
8+
BBQ rescoring performance can be drastically affected by the amount of available
9+
off-heap RAM for use by the system page cache. When there is not enough off-heap RAM
10+
to fit all the vector data in memory, BBQ search latencies can be affected by as much as 5000x.
11+
Specifying the `vector.rescoring.directio=true` Java option on all vector search
12+
nodes modifies rescoring to use direct IO, which eliminates these very high latencies
13+
from searches in low-memory scenarios, at a cost of a reduction
14+
in vector search performance for BBQ indices when the vectors do all fit in memory.
15+
16+
This option is released in 9.1 as a tech preview whilst we analyse its effect
17+
for a variety of use cases.
18+
issues: []

docs/changelog/129990.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 129990
2+
summary: Make forecast write load accurate when shard numbers change
3+
area: Allocation
4+
type: bug
5+
issues: []

docs/reference/elasticsearch/configuration-reference/thread-pool-settings.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ $$$search-throttled$$$`search_throttled`
7171
`system_write`
7272
: For write operations on system indices. Thread pool type is `fixed` with a default maximum size of `min(5, (`[`# of allocated processors`](#node.processors)`) / 2)`.
7373

74+
`system_write_coordination`
75+
: For bulk request coordination operations on system indices. Thread pool type is `fixed` with a default maximum size of `min(5, (`[`# of allocated processors`](#node.processors)`) / 2)`.
76+
7477
`system_critical_read`
7578
: For critical read operations on system indices. Thread pool type is `fixed` with a default maximum size of `min(5, (`[`# of allocated processors`](#node.processors)`) / 2)`.
7679

docs/reference/query-languages/query-dsl/query-dsl-knn-query.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,36 @@ A sample query can look like below:
229229

230230
Note that nested `knn` only supports `score_mode=max`.
231231

232+
## Knn query on a semantic_text field [knn-query-with-semantic-text]
233+
234+
Elasticsearch supports knn queries over a [
235+
`semantic_text` field](/reference/elasticsearch/mapping-reference/semantic-text.md).
236+
237+
Here is an example using the `query_vector_builder`:
238+
239+
```json
240+
{
241+
"query": {
242+
"knn": {
243+
"field": "inference_field",
244+
"k": 10,
245+
"num_candidates": 100,
246+
"query_vector_builder": {
247+
"text_embedding": {
248+
"model_text": "test"
249+
}
250+
}
251+
}
252+
}
253+
}
254+
```
255+
256+
Note that for `semantic_text` fields, the `model_id` does not have to be
257+
provided as it can be inferred from the `semantic_text` field mapping.
258+
259+
Knn search using query vectors over `semantic_text` fields is also supported,
260+
with no change to the API.
261+
232262
## Knn query with aggregations [knn-query-aggregations]
233263

234264
`knn` query calculates aggregations on top `k` documents from each shard. Thus, the final results from aggregations contain `k * number_of_shards` documents. This is different from the [top level knn section](docs-content://solutions/search/vector/knn.md) where aggregations are calculated on the global top `k` nearest documents.

docs/reference/search-connectors/es-connectors-mongodb.md

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,8 +249,23 @@ The full host in this example will look like this:
249249

250250
A bug introduced in **8.12.0** causes the Connectors docker image to error out if run using MongoDB as its source. The command line will output the error `cannot import name 'coroutine' from 'asyncio'`. *** This issue is fixed in versions *8.12.2** and **8.13.0**. ** This bug does not affect Elastic managed connectors.
251251

252-
See [Known issues](/release-notes/known-issues.md) for any issues affecting all connectors.
252+
#### UUIDs are not correctly deserialized, causing problems with ingesting documents into Elasticsearch
253+
254+
MongoDB has special handling of UUID type: there is a legacy and a modern approach. You can read [official docs](https://pymongo.readthedocs.io/en/stable/examples/uuid.html) about the details.
255+
256+
With connector framework version 9.0.3, we improved how standard UUIDs are handled. Now, the MongoDB connector can correctly deserialize UUIDs into valid Elasticsearch values. However, for legacy UUIDs or older connector versions, you might need to adjust the connection string to specify the UUID representation.
257+
258+
For example, if you are using the modern UUID representation, adding the `uuidRepresentation=standard` query parameter to the MongoDB connection URI in the `host` Rich Configurable Field will allow the connector to properly handle UUIDs. With this change, the full `host` Rich Configurable Field value could look like this:`mongodb+srv://my_username:[email protected]/mydb?w=majority&uuidRepresentation=standard`
253259

260+
If you’re using a legacy UUID representation, you should adjust the connection URI accordingly. For example:
261+
262+
- C#: `uuidRepresentation=csharpLegacy`
263+
- Java: `uuidRepresentation=javaLegacy`
264+
- Python: `uuidRepresentation=pythonLegacy`
265+
266+
You can find a full explanation in the [official docs](https://pymongo.readthedocs.io/en/stable/examples/uuid.html#configuring-a-uuid-representation).
267+
268+
See [Known issues](/release-notes/known-issues.md) for any issues affecting all connectors.
254269

255270
### Troubleshooting [es-connectors-mongodb-client-troubleshooting]
256271

0 commit comments

Comments
 (0)