elastic
diff --git a/‎.buildkite/hooks/pre-command‎
Lines changed: 3 additions & 5 deletions b/‎.buildkite/hooks/pre-command‎
Lines changed: 3 additions & 5 deletions
diff --git a/‎.buildkite/scripts/index-micro-benchmark-results.sh‎
Lines changed: 1 addition & 1 deletion b/‎.buildkite/scripts/index-micro-benchmark-results.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/JvmErgonomics.java‎
Lines changed: 3 additions & 1 deletion b/‎distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/JvmErgonomics.java‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/MachineDependentHeap.java‎
Lines changed: 26 additions & 10 deletions b/‎distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/MachineDependentHeap.java‎
Lines changed: 26 additions & 10 deletions
diff --git a/‎distribution/tools/server-cli/src/test/java/org/elasticsearch/server/cli/MachineDependentHeapTests.java‎
Lines changed: 5 additions & 5 deletions b/‎distribution/tools/server-cli/src/test/java/org/elasticsearch/server/cli/MachineDependentHeapTests.java‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎docs/changelog/128742.yaml‎
Lines changed: 5 additions & 0 deletions b/‎docs/changelog/128742.yaml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎server/src/main/java/org/elasticsearch/TransportVersions.java‎
Lines changed: 2 additions & 0 deletions b/‎server/src/main/java/org/elasticsearch/TransportVersions.java‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎server/src/main/java/org/elasticsearch/monitor/jvm/JvmInfo.java‎
Lines changed: 10 additions & 7 deletions b/‎server/src/main/java/org/elasticsearch/monitor/jvm/JvmInfo.java‎
Lines changed: 10 additions & 7 deletions
diff --git a/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java‎
Lines changed: 15 additions & 2 deletions b/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java‎
Lines changed: 15 additions & 2 deletions
diff --git a/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/elastic/rerank/ElasticInferenceServiceRerankRequest.java‎
Lines changed: 94 additions & 0 deletions b/‎x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/elastic/rerank/ElasticInferenceServiceRerankRequest.java‎
Lines changed: 94 additions & 0 deletions
@@ -95,13 +95,11 @@ if [[ "${USE_PROD_DOCKER_CREDENTIALS:-}" == "true" ]]; then
 fi
 
 if [[ "${USE_PERF_CREDENTIALS:-}" == "true" ]]; then
-  PERF_METRICS_HOST=$(vault read -field=es_host /secret/ci/elastic-elasticsearch/esbench-metics)
-  PERF_METRICS_INDEX="dummy-micro-benchmarks"
-  PERF_METRICS_USERNAME=$(vault read -field=es_username /secret/ci/elastic-elasticsearch/esbench-metics)
-  PERF_METRICS_PASSWORD=$(vault read -field=es_password /secret/ci/elastic-elasticsearch/esbench-metics)
+  PERF_METRICS_HOST=$(vault read -field=es_host /secret/ci/elastic-elasticsearch/microbenchmarks-metrics)
+  PERF_METRICS_USERNAME=$(vault read -field=es_username /secret/ci/elastic-elasticsearch/microbenchmarks-metrics)
+  PERF_METRICS_PASSWORD=$(vault read -field=es_password /secret/ci/elastic-elasticsearch/microbenchmarks-metrics)
 
   export PERF_METRICS_HOST
-  export PERF_METRICS_INDEX
   export PERF_METRICS_USERNAME
   export PERF_METRICS_PASSWORD
 fi
 
@@ -3,7 +3,7 @@
 jq -c '.[]' "benchmarks/build/result.json" | while read -r doc; do
   doc=$(echo "$doc" | jq --argjson timestamp "$(date +%s000)" '. + {"@timestamp": $timestamp}')
   echo "Indexing $(echo "$doc" | jq -r '.benchmark')"
-  curl -s -X POST "https://$PERF_METRICS_HOST/$PERF_METRICS_INDEX/_doc" \
+  curl -s -X POST "https://$PERF_METRICS_HOST/metrics-microbenchmarks-default/_doc" \
     -u "$PERF_METRICS_USERNAME:$PERF_METRICS_PASSWORD" \
     -H 'Content-Type: application/json' \
     -d "$doc"
 
@@ -28,6 +28,8 @@
  */
 final class JvmErgonomics {
 
+    static final double DIRECT_MEMORY_TO_HEAP_FACTOR = 0.5;
+
     private JvmErgonomics() {
         throw new AssertionError("No instances intended");
     }
@@ -44,7 +46,7 @@ static List<String> choose(final List<String> userDefinedJvmOptions, Settings no
         final long heapSize = JvmOption.extractMaxHeapSize(finalJvmOptions);
         final long maxDirectMemorySize = JvmOption.extractMaxDirectMemorySize(finalJvmOptions);
         if (maxDirectMemorySize == 0) {
-            ergonomicChoices.add("-XX:MaxDirectMemorySize=" + heapSize / 2);
+            ergonomicChoices.add("-XX:MaxDirectMemorySize=" + (long) (DIRECT_MEMORY_TO_HEAP_FACTOR * heapSize));
         }
 
         final boolean tuneG1GCForSmallHeap = tuneG1GCForSmallHeap(heapSize);
 
@@ -11,6 +11,7 @@
 
 import org.elasticsearch.cluster.node.DiscoveryNodeRole;
 import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.util.FeatureFlag;
 import org.elasticsearch.node.NodeRoleSettings;
 
 import java.io.IOException;
@@ -37,6 +38,8 @@ public class MachineDependentHeap {
     protected static final long MAX_HEAP_SIZE = GB * 31; // 31GB
     protected static final long MIN_HEAP_SIZE = 1024 * 1024 * 128; // 128MB
 
+    private static final FeatureFlag NEW_ML_MEMORY_COMPUTATION_FEATURE_FLAG = new FeatureFlag("new_ml_memory_computation");
+
     public MachineDependentHeap() {}
 
     /**
@@ -76,12 +79,16 @@ protected int getHeapSizeMb(Settings nodeSettings, MachineNodeRole role, long av
             /*
              * Machine learning only node.
              *
-             * <p>Heap is computed as:
-             * <ul>
-             *     <li>40% of total system memory when total system memory 16 gigabytes or less.</li>
-             *     <li>40% of the first 16 gigabytes plus 10% of memory above that when total system memory is more than 16 gigabytes.</li>
-             *     <li>The absolute maximum heap size is 31 gigabytes.</li>
-             * </ul>
+             * The memory reserved for Java is computed as:
+             *   - 40% of total system memory when total system memory 16 gigabytes or less.
+             *   - 40% of the first 16 gigabytes plus 10% of memory above that when total system memory is more than 16 gigabytes.
+             *   - The absolute maximum heap size is 31 gigabytes.
+             *
+             * This Java memory is divided as follows:
+             *     - 2/3 of the Java memory is reserved for the Java heap.
+             *     - 1/3 of the Java memory is reserved for the Java direct memory.
+             *
+             * The direct memory being half of the heap is set by the JvmErgonomics class.
              *
              * In all cases the result is rounded down to the next whole multiple of 4 megabytes.
              * The reason for doing this is that Java will round requested heap sizes to a multiple
@@ -95,13 +102,22 @@ protected int getHeapSizeMb(Settings nodeSettings, MachineNodeRole role, long av
              *
              * If this formula is changed then corresponding changes must be made to the {@code NativeMemoryCalculator} and
              * {@code MlAutoscalingDeciderServiceTests} classes in the ML plugin code. Failure to keep the logic synchronized
-             * could result in repeated autoscaling up and down.
+             * could result in ML processes crashing with OOM errors or repeated autoscaling up and down.
              */
             case ML_ONLY -> {
-                if (availableMemory <= (GB * 16)) {
-                    yield mb((long) (availableMemory * .4), 4);
+                double heapFractionBelow16GB = 0.4;
+                double heapFractionAbove16GB = 0.1;
+                if (NEW_ML_MEMORY_COMPUTATION_FEATURE_FLAG.isEnabled()) {
+                    heapFractionBelow16GB = 0.4 / (1.0 + JvmErgonomics.DIRECT_MEMORY_TO_HEAP_FACTOR);
+                    heapFractionAbove16GB = 0.1 / (1.0 + JvmErgonomics.DIRECT_MEMORY_TO_HEAP_FACTOR);
+                }
+                if (availableMemory <= GB * 16) {
+                    yield mb((long) (availableMemory * heapFractionBelow16GB), 4);
                 } else {
-                    yield mb((long) min((GB * 16) * .4 + (availableMemory - GB * 16) * .1, MAX_HEAP_SIZE), 4);
+                    yield mb(
+                        (long) min(GB * 16 * heapFractionBelow16GB + (availableMemory - GB * 16) * heapFractionAbove16GB, MAX_HEAP_SIZE),
+                        4
+                    );
                 }
             }
             /*
 
@@ -56,13 +56,13 @@ public void testMasterOnlyOptions() throws Exception {
     }
 
     public void testMlOnlyOptions() throws Exception {
-        assertHeapOptions(1, containsInAnyOrder("-Xmx408m", "-Xms408m"), "ml");
-        assertHeapOptions(4, containsInAnyOrder("-Xmx1636m", "-Xms1636m"), "ml");
-        assertHeapOptions(32, containsInAnyOrder("-Xmx8192m", "-Xms8192m"), "ml");
-        assertHeapOptions(64, containsInAnyOrder("-Xmx11468m", "-Xms11468m"), "ml");
+        assertHeapOptions(1, containsInAnyOrder("-Xmx272m", "-Xms272m"), "ml");
+        assertHeapOptions(4, containsInAnyOrder("-Xmx1092m", "-Xms1092m"), "ml");
+        assertHeapOptions(32, containsInAnyOrder("-Xmx5460m", "-Xms5460m"), "ml");
+        assertHeapOptions(64, containsInAnyOrder("-Xmx7644m", "-Xms7644m"), "ml");
         // We'd never see a node this big in Cloud, but this assertion proves that the 31GB absolute maximum
         // eventually kicks in (because 0.4 * 16 + 0.1 * (263 - 16) > 31)
-        assertHeapOptions(263, containsInAnyOrder("-Xmx31744m", "-Xms31744m"), "ml");
+        assertHeapOptions(263, containsInAnyOrder("-Xmx21228m", "-Xms21228m"), "ml");
     }
 
     public void testDataNodeOptions() throws Exception {
 
@@ -0,0 +1,5 @@
+pr: 128742
+summary: "Account for Java direct memory on machine learning nodes to prevent out-of-memory crashes."
+area: Machine Learning
+type: bug
+issues: []
@@ -193,6 +193,7 @@ static TransportVersion def(int id) {
     public static final TransportVersion ESQL_QUERY_PLANNING_DURATION_8_19 = def(8_841_0_45);
     public static final TransportVersion SEARCH_SOURCE_EXCLUDE_VECTORS_PARAM_8_19 = def(8_841_0_46);
     public static final TransportVersion ML_INFERENCE_MISTRAL_CHAT_COMPLETION_ADDED_8_19 = def(8_841_0_47);
+    public static final TransportVersion ML_INFERENCE_ELASTIC_RERANK_ADDED_8_19 = def(8_841_0_48);
     public static final TransportVersion V_9_0_0 = def(9_000_0_09);
     public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_1 = def(9_000_0_10);
     public static final TransportVersion INITIAL_ELASTICSEARCH_9_0_2 = def(9_000_0_11);
@@ -290,6 +291,7 @@ static TransportVersion def(int id) {
     public static final TransportVersion IDP_CUSTOM_SAML_ATTRIBUTES_ALLOW_LIST = def(9_091_0_00);
     public static final TransportVersion SEARCH_SOURCE_EXCLUDE_VECTORS_PARAM = def(9_092_0_00);
     public static final TransportVersion SNAPSHOT_INDEX_SHARD_STATUS_MISSING_STATS = def(9_093_0_00);
+    public static final TransportVersion ML_INFERENCE_ELASTIC_RERANK = def(9_094_0_00);
 
     /*
      * STOP! READ THIS FIRST! No, really,
 
@@ -43,14 +43,7 @@ public class JvmInfo implements ReportingService.Info {
         long nonHeapInit = memoryMXBean.getNonHeapMemoryUsage().getInit() < 0 ? 0 : memoryMXBean.getNonHeapMemoryUsage().getInit();
         long nonHeapMax = memoryMXBean.getNonHeapMemoryUsage().getMax() < 0 ? 0 : memoryMXBean.getNonHeapMemoryUsage().getMax();
         long directMemoryMax = 0;
-        try {
-            Class<?> vmClass = Class.forName("sun.misc.VM");
-            directMemoryMax = (Long) vmClass.getMethod("maxDirectMemory").invoke(null);
-        } catch (Exception t) {
-            // ignore
-        }
         String[] inputArguments = runtimeMXBean.getInputArguments().toArray(new String[runtimeMXBean.getInputArguments().size()]);
-        Mem mem = new Mem(heapInit, heapMax, nonHeapInit, nonHeapMax, directMemoryMax);
 
         String bootClassPath;
         try {
@@ -130,6 +123,11 @@ public class JvmInfo implements ReportingService.Info {
                 configuredMaxHeapSize = Long.parseLong((String) valueMethod.invoke(maxHeapSizeVmOptionObject));
             } catch (Exception ignored) {}
 
+            try {
+                Object maxDirectMemorySizeVmOptionObject = vmOptionMethod.invoke(hotSpotDiagnosticMXBean, "MaxDirectMemorySize");
+                directMemoryMax = Long.parseLong((String) valueMethod.invoke(maxDirectMemorySizeVmOptionObject));
+            } catch (Exception ignored) {}
+
             try {
                 Object useSerialGCVmOptionObject = vmOptionMethod.invoke(hotSpotDiagnosticMXBean, "UseSerialGC");
                 useSerialGC = (String) valueMethod.invoke(useSerialGCVmOptionObject);
@@ -139,6 +137,8 @@ public class JvmInfo implements ReportingService.Info {
 
         }
 
+        Mem mem = new Mem(heapInit, heapMax, nonHeapInit, nonHeapMax, directMemoryMax);
+
         INSTANCE = new JvmInfo(
             ProcessHandle.current().pid(),
             System.getProperty("java.version"),
@@ -496,5 +496,8 @@ public ByteSizeValue getHeapMax() {
             return ByteSizeValue.ofBytes(heapMax);
         }
 
+        public ByteSizeValue getTotalMax() {
+            return ByteSizeValue.ofBytes(heapMax + nonHeapMax + directMemoryMax);
+        }
     }
 }
@@ -70,6 +70,7 @@
 import org.elasticsearch.xpack.inference.services.custom.response.TextEmbeddingResponseParser;
 import org.elasticsearch.xpack.inference.services.deepseek.DeepSeekChatCompletionModel;
 import org.elasticsearch.xpack.inference.services.elastic.completion.ElasticInferenceServiceCompletionServiceSettings;
+import org.elasticsearch.xpack.inference.services.elastic.rerank.ElasticInferenceServiceRerankServiceSettings;
 import org.elasticsearch.xpack.inference.services.elastic.sparseembeddings.ElasticInferenceServiceSparseEmbeddingsServiceSettings;
 import org.elasticsearch.xpack.inference.services.elasticsearch.CustomElandInternalServiceSettings;
 import org.elasticsearch.xpack.inference.services.elasticsearch.CustomElandInternalTextEmbeddingServiceSettings;
@@ -166,7 +167,7 @@ public static List<NamedWriteableRegistry.Entry> getNamedWriteables() {
         addAnthropicNamedWritables(namedWriteables);
         addAmazonBedrockNamedWriteables(namedWriteables);
         addAwsNamedWriteables(namedWriteables);
-        addEisNamedWriteables(namedWriteables);
+        addElasticNamedWriteables(namedWriteables);
         addAlibabaCloudSearchNamedWriteables(namedWriteables);
         addJinaAINamedWriteables(namedWriteables);
         addVoyageAINamedWriteables(namedWriteables);
@@ -742,20 +743,32 @@ private static void addVoyageAINamedWriteables(List<NamedWriteableRegistry.Entry
         );
     }
 
-    private static void addEisNamedWriteables(List<NamedWriteableRegistry.Entry> namedWriteables) {
+    private static void addElasticNamedWriteables(List<NamedWriteableRegistry.Entry> namedWriteables) {
+        // Sparse Text Embeddings
         namedWriteables.add(
             new NamedWriteableRegistry.Entry(
                 ServiceSettings.class,
                 ElasticInferenceServiceSparseEmbeddingsServiceSettings.NAME,
                 ElasticInferenceServiceSparseEmbeddingsServiceSettings::new
             )
         );
+
+        // Completion
         namedWriteables.add(
             new NamedWriteableRegistry.Entry(
                 ServiceSettings.class,
                 ElasticInferenceServiceCompletionServiceSettings.NAME,
                 ElasticInferenceServiceCompletionServiceSettings::new
             )
         );
+
+        // Rerank
+        namedWriteables.add(
+            new NamedWriteableRegistry.Entry(
+                ServiceSettings.class,
+                ElasticInferenceServiceRerankServiceSettings.NAME,
+                ElasticInferenceServiceRerankServiceSettings::new
+            )
+        );
     }
 }
@@ -0,0 +1,94 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.external.request.elastic.rerank;
+
+import org.apache.http.HttpHeaders;
+import org.apache.http.client.methods.HttpPost;
+import org.apache.http.client.methods.HttpRequestBase;
+import org.apache.http.entity.ByteArrayEntity;
+import org.apache.http.message.BasicHeader;
+import org.elasticsearch.common.Strings;
+import org.elasticsearch.xcontent.XContentType;
+import org.elasticsearch.xpack.inference.external.request.Request;
+import org.elasticsearch.xpack.inference.services.elastic.request.ElasticInferenceServiceRequest;
+import org.elasticsearch.xpack.inference.services.elastic.request.ElasticInferenceServiceRequestMetadata;
+import org.elasticsearch.xpack.inference.services.elastic.rerank.ElasticInferenceServiceRerankModel;
+import org.elasticsearch.xpack.inference.telemetry.TraceContext;
+import org.elasticsearch.xpack.inference.telemetry.TraceContextHandler;
+
+import java.net.URI;
+import java.nio.charset.StandardCharsets;
+import java.util.List;
+import java.util.Objects;
+
+public class ElasticInferenceServiceRerankRequest extends ElasticInferenceServiceRequest {
+
+    private final String query;
+    private final List<String> documents;
+    private final Integer topN;
+    private final TraceContextHandler traceContextHandler;
+    private final ElasticInferenceServiceRerankModel model;
+
+    public ElasticInferenceServiceRerankRequest(
+        String query,
+        List<String> documents,
+        Integer topN,
+        ElasticInferenceServiceRerankModel model,
+        TraceContext traceContext,
+        ElasticInferenceServiceRequestMetadata metadata
+    ) {
+        super(metadata);
+        this.query = query;
+        this.documents = documents;
+        this.topN = topN;
+        this.model = Objects.requireNonNull(model);
+        this.traceContextHandler = new TraceContextHandler(traceContext);
+    }
+
+    @Override
+    public HttpRequestBase createHttpRequestBase() {
+        var httpPost = new HttpPost(getURI());
+        var requestEntity = Strings.toString(
+            new ElasticInferenceServiceRerankRequestEntity(query, documents, model.getServiceSettings().modelId(), topN)
+        );
+
+        ByteArrayEntity byteEntity = new ByteArrayEntity(requestEntity.getBytes(StandardCharsets.UTF_8));
+        httpPost.setEntity(byteEntity);
+
+        traceContextHandler.propagateTraceContext(httpPost);
+        httpPost.setHeader(new BasicHeader(HttpHeaders.CONTENT_TYPE, XContentType.JSON.mediaType()));
+
+        return httpPost;
+    }
+
+    public TraceContext getTraceContext() {
+        return traceContextHandler.traceContext();
+    }
+
+    @Override
+    public String getInferenceEntityId() {
+        return model.getInferenceEntityId();
+    }
+
+    @Override
+    public URI getURI() {
+        return model.uri();
+    }
+
+    @Override
+    public Request truncate() {
+        // no truncation
+        return this;
+    }
+
+    @Override
+    public boolean[] getTruncationInfo() {
+        // no truncation
+        return null;
+    }
+}