Skip to content

Commit 938c578

Browse files
committed
Merge remote-tracking branch 'upstream/main' into bc_upgrades/expand_bcUpgradeTask_test
2 parents d15c863 + 32d0546 commit 938c578

File tree

30 files changed

+1416
-87
lines changed

30 files changed

+1416
-87
lines changed

.buildkite/hooks/pre-command

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -95,13 +95,11 @@ if [[ "${USE_PROD_DOCKER_CREDENTIALS:-}" == "true" ]]; then
9595
fi
9696

9797
if [[ "${USE_PERF_CREDENTIALS:-}" == "true" ]]; then
98-
PERF_METRICS_HOST=$(vault read -field=es_host /secret/ci/elastic-elasticsearch/esbench-metics)
99-
PERF_METRICS_INDEX="dummy-micro-benchmarks"
100-
PERF_METRICS_USERNAME=$(vault read -field=es_username /secret/ci/elastic-elasticsearch/esbench-metics)
101-
PERF_METRICS_PASSWORD=$(vault read -field=es_password /secret/ci/elastic-elasticsearch/esbench-metics)
98+
PERF_METRICS_HOST=$(vault read -field=es_host /secret/ci/elastic-elasticsearch/microbenchmarks-metrics)
99+
PERF_METRICS_USERNAME=$(vault read -field=es_username /secret/ci/elastic-elasticsearch/microbenchmarks-metrics)
100+
PERF_METRICS_PASSWORD=$(vault read -field=es_password /secret/ci/elastic-elasticsearch/microbenchmarks-metrics)
102101

103102
export PERF_METRICS_HOST
104-
export PERF_METRICS_INDEX
105103
export PERF_METRICS_USERNAME
106104
export PERF_METRICS_PASSWORD
107105
fi

.buildkite/scripts/index-micro-benchmark-results.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
jq -c '.[]' "benchmarks/build/result.json" | while read -r doc; do
44
doc=$(echo "$doc" | jq --argjson timestamp "$(date +%s000)" '. + {"@timestamp": $timestamp}')
55
echo "Indexing $(echo "$doc" | jq -r '.benchmark')"
6-
curl -s -X POST "https://$PERF_METRICS_HOST/$PERF_METRICS_INDEX/_doc" \
6+
curl -s -X POST "https://$PERF_METRICS_HOST/metrics-microbenchmarks-default/_doc" \
77
-u "$PERF_METRICS_USERNAME:$PERF_METRICS_PASSWORD" \
88
-H 'Content-Type: application/json' \
99
-d "$doc"

distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/JvmErgonomics.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
*/
2929
final class JvmErgonomics {
3030

31+
static final double DIRECT_MEMORY_TO_HEAP_FACTOR = 0.5;
32+
3133
private JvmErgonomics() {
3234
throw new AssertionError("No instances intended");
3335
}
@@ -44,7 +46,7 @@ static List<String> choose(final List<String> userDefinedJvmOptions, Settings no
4446
final long heapSize = JvmOption.extractMaxHeapSize(finalJvmOptions);
4547
final long maxDirectMemorySize = JvmOption.extractMaxDirectMemorySize(finalJvmOptions);
4648
if (maxDirectMemorySize == 0) {
47-
ergonomicChoices.add("-XX:MaxDirectMemorySize=" + heapSize / 2);
49+
ergonomicChoices.add("-XX:MaxDirectMemorySize=" + (long) (DIRECT_MEMORY_TO_HEAP_FACTOR * heapSize));
4850
}
4951

5052
final boolean tuneG1GCForSmallHeap = tuneG1GCForSmallHeap(heapSize);

distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/MachineDependentHeap.java

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import org.elasticsearch.cluster.node.DiscoveryNodeRole;
1313
import org.elasticsearch.common.settings.Settings;
14+
import org.elasticsearch.common.util.FeatureFlag;
1415
import org.elasticsearch.node.NodeRoleSettings;
1516

1617
import java.io.IOException;
@@ -37,6 +38,8 @@ public class MachineDependentHeap {
3738
protected static final long MAX_HEAP_SIZE = GB * 31; // 31GB
3839
protected static final long MIN_HEAP_SIZE = 1024 * 1024 * 128; // 128MB
3940

41+
private static final FeatureFlag NEW_ML_MEMORY_COMPUTATION_FEATURE_FLAG = new FeatureFlag("new_ml_memory_computation");
42+
4043
public MachineDependentHeap() {}
4144

4245
/**
@@ -76,12 +79,16 @@ protected int getHeapSizeMb(Settings nodeSettings, MachineNodeRole role, long av
7679
/*
7780
* Machine learning only node.
7881
*
79-
* <p>Heap is computed as:
80-
* <ul>
81-
* <li>40% of total system memory when total system memory 16 gigabytes or less.</li>
82-
* <li>40% of the first 16 gigabytes plus 10% of memory above that when total system memory is more than 16 gigabytes.</li>
83-
* <li>The absolute maximum heap size is 31 gigabytes.</li>
84-
* </ul>
82+
* The memory reserved for Java is computed as:
83+
* - 40% of total system memory when total system memory 16 gigabytes or less.
84+
* - 40% of the first 16 gigabytes plus 10% of memory above that when total system memory is more than 16 gigabytes.
85+
* - The absolute maximum heap size is 31 gigabytes.
86+
*
87+
* This Java memory is divided as follows:
88+
* - 2/3 of the Java memory is reserved for the Java heap.
89+
* - 1/3 of the Java memory is reserved for the Java direct memory.
90+
*
91+
* The direct memory being half of the heap is set by the JvmErgonomics class.
8592
*
8693
* In all cases the result is rounded down to the next whole multiple of 4 megabytes.
8794
* The reason for doing this is that Java will round requested heap sizes to a multiple
@@ -95,13 +102,22 @@ protected int getHeapSizeMb(Settings nodeSettings, MachineNodeRole role, long av
95102
*
96103
* If this formula is changed then corresponding changes must be made to the {@code NativeMemoryCalculator} and
97104
* {@code MlAutoscalingDeciderServiceTests} classes in the ML plugin code. Failure to keep the logic synchronized
98-
* could result in repeated autoscaling up and down.
105+
* could result in ML processes crashing with OOM errors or repeated autoscaling up and down.
99106
*/
100107
case ML_ONLY -> {
101-
if (availableMemory <= (GB * 16)) {
102-
yield mb((long) (availableMemory * .4), 4);
108+
double heapFractionBelow16GB = 0.4;
109+
double heapFractionAbove16GB = 0.1;
110+
if (NEW_ML_MEMORY_COMPUTATION_FEATURE_FLAG.isEnabled()) {
111+
heapFractionBelow16GB = 0.4 / (1.0 + JvmErgonomics.DIRECT_MEMORY_TO_HEAP_FACTOR);
112+
heapFractionAbove16GB = 0.1 / (1.0 + JvmErgonomics.DIRECT_MEMORY_TO_HEAP_FACTOR);
113+
}
114+
if (availableMemory <= GB * 16) {
115+
yield mb((long) (availableMemory * heapFractionBelow16GB), 4);
103116
} else {
104-
yield mb((long) min((GB * 16) * .4 + (availableMemory - GB * 16) * .1, MAX_HEAP_SIZE), 4);
117+
yield mb(
118+
(long) min(GB * 16 * heapFractionBelow16GB + (availableMemory - GB * 16) * heapFractionAbove16GB, MAX_HEAP_SIZE),
119+
4
120+
);
105121
}
106122
}
107123
/*

distribution/tools/server-cli/src/test/java/org/elasticsearch/server/cli/MachineDependentHeapTests.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,13 +56,13 @@ public void testMasterOnlyOptions() throws Exception {
5656
}
5757

5858
public void testMlOnlyOptions() throws Exception {
59-
assertHeapOptions(1, containsInAnyOrder("-Xmx408m", "-Xms408m"), "ml");
60-
assertHeapOptions(4, containsInAnyOrder("-Xmx1636m", "-Xms1636m"), "ml");
61-
assertHeapOptions(32, containsInAnyOrder("-Xmx8192m", "-Xms8192m"), "ml");
62-
assertHeapOptions(64, containsInAnyOrder("-Xmx11468m", "-Xms11468m"), "ml");
59+
assertHeapOptions(1, containsInAnyOrder("-Xmx272m", "-Xms272m"), "ml");
60+
assertHeapOptions(4, containsInAnyOrder("-Xmx1092m", "-Xms1092m"), "ml");
61+
assertHeapOptions(32, containsInAnyOrder("-Xmx5460m", "-Xms5460m"), "ml");
62+
assertHeapOptions(64, containsInAnyOrder("-Xmx7644m", "-Xms7644m"), "ml");
6363
// We'd never see a node this big in Cloud, but this assertion proves that the 31GB absolute maximum
6464
// eventually kicks in (because 0.4 * 16 + 0.1 * (263 - 16) > 31)
65-
assertHeapOptions(263, containsInAnyOrder("-Xmx31744m", "-Xms31744m"), "ml");
65+
assertHeapOptions(263, containsInAnyOrder("-Xmx21228m", "-Xms21228m"), "ml");
6666
}
6767

6868
public void testDataNodeOptions() throws Exception {

docs/changelog/128742.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 128742
2+
summary: "Account for Java direct memory on machine learning nodes to prevent out-of-memory crashes."
3+
area: Machine Learning
4+
type: bug
5+
issues: []

server/src/main/java/org/elasticsearch/TransportVersions.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ static TransportVersion def(int id) {
193193
public static final TransportVersion ESQL_QUERY_PLANNING_DURATION_8_19 = def(8_841_0_45);
194194
public static final TransportVersion SEARCH_SOURCE_EXCLUDE_VECTORS_PARAM_8_19 = def(8_841_0_46);
195195
public static final TransportVersion ML_INFERENCE_MISTRAL_CHAT_COMPLETION_ADDED_8_19 = def(8_841_0_47);
196+
public static final TransportVersion ML_INFERENCE_ELASTIC_RERANK_ADDED_8_19 = def(8_841_0_48);
196197
public static final TransportVersion V_9_0_0 = def(9_000_0_09);
197198
public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_1 = def(9_000_0_10);
198199
public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_2 = def(9_000_0_11);
@@ -290,6 +291,7 @@ static TransportVersion def(int id) {
290291
public static final TransportVersion IDP_CUSTOM_SAML_ATTRIBUTES_ALLOW_LIST = def(9_091_0_00);
291292
public static final TransportVersion SEARCH_SOURCE_EXCLUDE_VECTORS_PARAM = def(9_092_0_00);
292293
public static final TransportVersion SNAPSHOT_INDEX_SHARD_STATUS_MISSING_STATS = def(9_093_0_00);
294+
public static final TransportVersion ML_INFERENCE_ELASTIC_RERANK = def(9_094_0_00);
293295

294296
/*
295297
* STOP! READ THIS FIRST! No, really,

server/src/main/java/org/elasticsearch/monitor/jvm/JvmInfo.java

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,7 @@ public class JvmInfo implements ReportingService.Info {
4343
long nonHeapInit = memoryMXBean.getNonHeapMemoryUsage().getInit() < 0 ? 0 : memoryMXBean.getNonHeapMemoryUsage().getInit();
4444
long nonHeapMax = memoryMXBean.getNonHeapMemoryUsage().getMax() < 0 ? 0 : memoryMXBean.getNonHeapMemoryUsage().getMax();
4545
long directMemoryMax = 0;
46-
try {
47-
Class<?> vmClass = Class.forName("sun.misc.VM");
48-
directMemoryMax = (Long) vmClass.getMethod("maxDirectMemory").invoke(null);
49-
} catch (Exception t) {
50-
// ignore
51-
}
5246
String[] inputArguments = runtimeMXBean.getInputArguments().toArray(new String[runtimeMXBean.getInputArguments().size()]);
53-
Mem mem = new Mem(heapInit, heapMax, nonHeapInit, nonHeapMax, directMemoryMax);
5447

5548
String bootClassPath;
5649
try {
@@ -130,6 +123,11 @@ public class JvmInfo implements ReportingService.Info {
130123
configuredMaxHeapSize = Long.parseLong((String) valueMethod.invoke(maxHeapSizeVmOptionObject));
131124
} catch (Exception ignored) {}
132125

126+
try {
127+
Object maxDirectMemorySizeVmOptionObject = vmOptionMethod.invoke(hotSpotDiagnosticMXBean, "MaxDirectMemorySize");
128+
directMemoryMax = Long.parseLong((String) valueMethod.invoke(maxDirectMemorySizeVmOptionObject));
129+
} catch (Exception ignored) {}
130+
133131
try {
134132
Object useSerialGCVmOptionObject = vmOptionMethod.invoke(hotSpotDiagnosticMXBean, "UseSerialGC");
135133
useSerialGC = (String) valueMethod.invoke(useSerialGCVmOptionObject);
@@ -139,6 +137,8 @@ public class JvmInfo implements ReportingService.Info {
139137

140138
}
141139

140+
Mem mem = new Mem(heapInit, heapMax, nonHeapInit, nonHeapMax, directMemoryMax);
141+
142142
INSTANCE = new JvmInfo(
143143
ProcessHandle.current().pid(),
144144
System.getProperty("java.version"),
@@ -496,5 +496,8 @@ public ByteSizeValue getHeapMax() {
496496
return ByteSizeValue.ofBytes(heapMax);
497497
}
498498

499+
public ByteSizeValue getTotalMax() {
500+
return ByteSizeValue.ofBytes(heapMax + nonHeapMax + directMemoryMax);
501+
}
499502
}
500503
}

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
import org.elasticsearch.xpack.inference.services.custom.response.TextEmbeddingResponseParser;
7171
import org.elasticsearch.xpack.inference.services.deepseek.DeepSeekChatCompletionModel;
7272
import org.elasticsearch.xpack.inference.services.elastic.completion.ElasticInferenceServiceCompletionServiceSettings;
73+
import org.elasticsearch.xpack.inference.services.elastic.rerank.ElasticInferenceServiceRerankServiceSettings;
7374
import org.elasticsearch.xpack.inference.services.elastic.sparseembeddings.ElasticInferenceServiceSparseEmbeddingsServiceSettings;
7475
import org.elasticsearch.xpack.inference.services.elasticsearch.CustomElandInternalServiceSettings;
7576
import org.elasticsearch.xpack.inference.services.elasticsearch.CustomElandInternalTextEmbeddingServiceSettings;
@@ -166,7 +167,7 @@ public static List<NamedWriteableRegistry.Entry> getNamedWriteables() {
166167
addAnthropicNamedWritables(namedWriteables);
167168
addAmazonBedrockNamedWriteables(namedWriteables);
168169
addAwsNamedWriteables(namedWriteables);
169-
addEisNamedWriteables(namedWriteables);
170+
addElasticNamedWriteables(namedWriteables);
170171
addAlibabaCloudSearchNamedWriteables(namedWriteables);
171172
addJinaAINamedWriteables(namedWriteables);
172173
addVoyageAINamedWriteables(namedWriteables);
@@ -742,20 +743,32 @@ private static void addVoyageAINamedWriteables(List<NamedWriteableRegistry.Entry
742743
);
743744
}
744745

745-
private static void addEisNamedWriteables(List<NamedWriteableRegistry.Entry> namedWriteables) {
746+
private static void addElasticNamedWriteables(List<NamedWriteableRegistry.Entry> namedWriteables) {
747+
// Sparse Text Embeddings
746748
namedWriteables.add(
747749
new NamedWriteableRegistry.Entry(
748750
ServiceSettings.class,
749751
ElasticInferenceServiceSparseEmbeddingsServiceSettings.NAME,
750752
ElasticInferenceServiceSparseEmbeddingsServiceSettings::new
751753
)
752754
);
755+
756+
// Completion
753757
namedWriteables.add(
754758
new NamedWriteableRegistry.Entry(
755759
ServiceSettings.class,
756760
ElasticInferenceServiceCompletionServiceSettings.NAME,
757761
ElasticInferenceServiceCompletionServiceSettings::new
758762
)
759763
);
764+
765+
// Rerank
766+
namedWriteables.add(
767+
new NamedWriteableRegistry.Entry(
768+
ServiceSettings.class,
769+
ElasticInferenceServiceRerankServiceSettings.NAME,
770+
ElasticInferenceServiceRerankServiceSettings::new
771+
)
772+
);
760773
}
761774
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.inference.external.request.elastic.rerank;
9+
10+
import org.apache.http.HttpHeaders;
11+
import org.apache.http.client.methods.HttpPost;
12+
import org.apache.http.client.methods.HttpRequestBase;
13+
import org.apache.http.entity.ByteArrayEntity;
14+
import org.apache.http.message.BasicHeader;
15+
import org.elasticsearch.common.Strings;
16+
import org.elasticsearch.xcontent.XContentType;
17+
import org.elasticsearch.xpack.inference.external.request.Request;
18+
import org.elasticsearch.xpack.inference.services.elastic.request.ElasticInferenceServiceRequest;
19+
import org.elasticsearch.xpack.inference.services.elastic.request.ElasticInferenceServiceRequestMetadata;
20+
import org.elasticsearch.xpack.inference.services.elastic.rerank.ElasticInferenceServiceRerankModel;
21+
import org.elasticsearch.xpack.inference.telemetry.TraceContext;
22+
import org.elasticsearch.xpack.inference.telemetry.TraceContextHandler;
23+
24+
import java.net.URI;
25+
import java.nio.charset.StandardCharsets;
26+
import java.util.List;
27+
import java.util.Objects;
28+
29+
public class ElasticInferenceServiceRerankRequest extends ElasticInferenceServiceRequest {
30+
31+
private final String query;
32+
private final List<String> documents;
33+
private final Integer topN;
34+
private final TraceContextHandler traceContextHandler;
35+
private final ElasticInferenceServiceRerankModel model;
36+
37+
public ElasticInferenceServiceRerankRequest(
38+
String query,
39+
List<String> documents,
40+
Integer topN,
41+
ElasticInferenceServiceRerankModel model,
42+
TraceContext traceContext,
43+
ElasticInferenceServiceRequestMetadata metadata
44+
) {
45+
super(metadata);
46+
this.query = query;
47+
this.documents = documents;
48+
this.topN = topN;
49+
this.model = Objects.requireNonNull(model);
50+
this.traceContextHandler = new TraceContextHandler(traceContext);
51+
}
52+
53+
@Override
54+
public HttpRequestBase createHttpRequestBase() {
55+
var httpPost = new HttpPost(getURI());
56+
var requestEntity = Strings.toString(
57+
new ElasticInferenceServiceRerankRequestEntity(query, documents, model.getServiceSettings().modelId(), topN)
58+
);
59+
60+
ByteArrayEntity byteEntity = new ByteArrayEntity(requestEntity.getBytes(StandardCharsets.UTF_8));
61+
httpPost.setEntity(byteEntity);
62+
63+
traceContextHandler.propagateTraceContext(httpPost);
64+
httpPost.setHeader(new BasicHeader(HttpHeaders.CONTENT_TYPE, XContentType.JSON.mediaType()));
65+
66+
return httpPost;
67+
}
68+
69+
public TraceContext getTraceContext() {
70+
return traceContextHandler.traceContext();
71+
}
72+
73+
@Override
74+
public String getInferenceEntityId() {
75+
return model.getInferenceEntityId();
76+
}
77+
78+
@Override
79+
public URI getURI() {
80+
return model.uri();
81+
}
82+
83+
@Override
84+
public Request truncate() {
85+
// no truncation
86+
return this;
87+
}
88+
89+
@Override
90+
public boolean[] getTruncationInfo() {
91+
// no truncation
92+
return null;
93+
}
94+
}

0 commit comments

Comments
 (0)