elastic · elasticsearchmachine · Jan 22, 2025 · Jan 22, 2025 · Jan 22, 2025 · Jan 22, 2025
diff --git a/muted-tests.yml b/muted-tests.yml
@@ -199,12 +199,6 @@ tests:
 - class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT
   method: test {categorize.Categorize ASYNC}
   issue: https://github.com/elastic/elasticsearch/issues/116373
-- class: org.elasticsearch.xpack.inference.TextEmbeddingCrudIT
-  method: testPutE5WithTrainedModelAndInference
-  issue: https://github.com/elastic/elasticsearch/issues/114023
-- class: org.elasticsearch.xpack.inference.TextEmbeddingCrudIT
-  method: testPutE5Small_withPlatformAgnosticVariant
-  issue: https://github.com/elastic/elasticsearch/issues/113983
 - class: org.elasticsearch.datastreams.LazyRolloverDuringDisruptionIT
   method: testRolloverIsExecutedOnce
   issue: https://github.com/elastic/elasticsearch/issues/112634
@@ -214,9 +208,6 @@ tests:
 - class: org.elasticsearch.xpack.remotecluster.RemoteClusterSecurityWithApmTracingRestIT
   method: testTracingCrossCluster
   issue: https://github.com/elastic/elasticsearch/issues/112731
-- class: org.elasticsearch.xpack.inference.TextEmbeddingCrudIT
-  method: testPutE5Small_withPlatformSpecificVariant
-  issue: https://github.com/elastic/elasticsearch/issues/113950
 - class: org.elasticsearch.smoketest.DocsClientYamlTestSuiteIT
   method: test {yaml=reference/rest-api/usage/line_38}
   issue: https://github.com/elastic/elasticsearch/issues/113694
@@ -226,9 +217,6 @@ tests:
 - class: org.elasticsearch.reservedstate.service.FileSettingsServiceTests
   method: testProcessFileChanges
   issue: https://github.com/elastic/elasticsearch/issues/115280
-- class: org.elasticsearch.xpack.inference.DefaultEndPointsIT
-  method: testInferDeploysDefaultE5
-  issue: https://github.com/elastic/elasticsearch/issues/115361
 - class: org.elasticsearch.xpack.inference.InferenceCrudIT
   method: testSupportedStream
   issue: https://github.com/elastic/elasticsearch/issues/113430
@@ -285,9 +273,6 @@ tests:
 - class: org.elasticsearch.xpack.esql.qa.mixed.EsqlClientYamlIT
   method: test {p0=esql/61_enrich_ip/IP strings}
   issue: https://github.com/elastic/elasticsearch/issues/116529
-- class: org.elasticsearch.xpack.inference.DefaultEndPointsIT
-  method: testInferDeploysDefaultElser
-  issue: https://github.com/elastic/elasticsearch/issues/114913
 - class: org.elasticsearch.threadpool.SimpleThreadPoolIT
   method: testThreadPoolMetrics
   issue: https://github.com/elastic/elasticsearch/issues/108320
@@ -336,9 +321,6 @@ tests:
 - class: org.elasticsearch.xpack.searchablesnapshots.RetrySearchIntegTests
   method: testRetryPointInTime
   issue: https://github.com/elastic/elasticsearch/issues/117116
-- class: org.elasticsearch.xpack.inference.DefaultEndPointsIT
-  method: testMultipleInferencesTriggeringDownloadAndDeploy
-  issue: https://github.com/elastic/elasticsearch/issues/117208
 - class: org.elasticsearch.xpack.spatial.search.GeoGridAggAndQueryConsistencyIT
   method: testGeoPointGeoTile
   issue: https://github.com/elastic/elasticsearch/issues/115818

diff --git a/test/framework/src/main/java/org/elasticsearch/test/fixture/HttpHeaderParser.java b/test/framework/src/main/java/org/elasticsearch/test/fixture/HttpHeaderParser.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.test.fixture;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public enum HttpHeaderParser {
+    ;
+
+    private static final Pattern RANGE_HEADER_PATTERN = Pattern.compile("bytes=([0-9]+)-([0-9]+)");
+
+    /**
+     * Parse a "Range" header
+     *
+     * Note: only a single bounded range is supported (e.g. <code>Range: bytes={range_start}-{range_end}</code>)
+     *
+     * @see <a href="https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Range">MDN: Range header</a>
+     * @param rangeHeaderValue The header value as a string
+     * @return a {@link Range} instance representing the parsed value, or null if the header is malformed
+     */
+    public static Range parseRangeHeader(String rangeHeaderValue) {
+        final Matcher matcher = RANGE_HEADER_PATTERN.matcher(rangeHeaderValue);
+        if (matcher.matches()) {
+            try {
+                return new Range(Long.parseLong(matcher.group(1)), Long.parseLong(matcher.group(2)));
+            } catch (NumberFormatException e) {
+                return null;
+            }
+        }
+        return null;
+    }
+
+    public record Range(long start, long end) {}
+}
diff --git a/test/framework/src/test/java/org/elasticsearch/http/HttpHeaderParserTests.java b/test/framework/src/test/java/org/elasticsearch/http/HttpHeaderParserTests.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.http;
+
+import org.elasticsearch.common.Strings;
+import org.elasticsearch.test.ESTestCase;
+import org.elasticsearch.test.fixture.HttpHeaderParser;
+
+import java.math.BigInteger;
+
+public class HttpHeaderParserTests extends ESTestCase {
+
+    public void testParseRangeHeader() {
+        final long start = randomLongBetween(0, 10_000);
+        final long end = randomLongBetween(start, start + 10_000);
+        assertEquals(new HttpHeaderParser.Range(start, end), HttpHeaderParser.parseRangeHeader("bytes=" + start + "-" + end));
+    }
+
+    public void testParseRangeHeaderInvalidLong() {
+        final BigInteger longOverflow = BigInteger.valueOf(Long.MAX_VALUE).add(BigInteger.ONE).add(randomBigInteger());
+        assertNull(HttpHeaderParser.parseRangeHeader("bytes=123-" + longOverflow));
+        assertNull(HttpHeaderParser.parseRangeHeader("bytes=" + longOverflow + "-123"));
+    }
+
+    public void testParseRangeHeaderMultipleRangesNotMatched() {
+        assertNull(
+            HttpHeaderParser.parseRangeHeader(
+                Strings.format(
+                    "bytes=%d-%d,%d-%d",
+                    randomIntBetween(0, 99),
+                    randomIntBetween(100, 199),
+                    randomIntBetween(200, 299),
+                    randomIntBetween(300, 399)
+                )
+            )
+        );
+    }
+
+    public void testParseRangeHeaderEndlessRangeNotMatched() {
+        assertNull(HttpHeaderParser.parseRangeHeader(Strings.format("bytes=%d-", randomLongBetween(0, Long.MAX_VALUE))));
+    }
+
+    public void testParseRangeHeaderSuffixLengthNotMatched() {
+        assertNull(HttpHeaderParser.parseRangeHeader(Strings.format("bytes=-%d", randomLongBetween(0, Long.MAX_VALUE))));
+    }
+}
diff --git a/x-pack/plugin/inference/qa/inference-service-tests/build.gradle b/x-pack/plugin/inference/qa/inference-service-tests/build.gradle
@@ -1,6 +1,7 @@
 apply plugin: 'elasticsearch.internal-java-rest-test'
 
 dependencies {
+  javaRestTestImplementation project(path: xpackModule('core'))
   javaRestTestImplementation project(path: xpackModule('inference'))
   clusterPlugins project(':x-pack:plugin:inference:qa:test-service-plugin')
 }

diff --git a/...-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceBaseRestTest.java b/...-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceBaseRestTest.java
@@ -22,6 +22,7 @@
 import org.elasticsearch.test.cluster.local.distribution.DistributionType;
 import org.elasticsearch.test.rest.ESRestTestCase;
 import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEvent;
+import org.junit.Before;
 import org.junit.ClassRule;
 
 import java.io.IOException;
@@ -37,6 +38,7 @@
 import static org.hamcrest.Matchers.hasSize;
 
 public class InferenceBaseRestTest extends ESRestTestCase {
+
     @ClassRule
     public static ElasticsearchCluster cluster = ElasticsearchCluster.local()
         .distribution(DistributionType.DEFAULT)
@@ -46,6 +48,22 @@ public class InferenceBaseRestTest extends ESRestTestCase {
         .user("x_pack_rest_user", "x-pack-test-password")
         .build();
 
+    @ClassRule
+    public static MlModelServer mlModelServer = new MlModelServer();
+
+    @Before
+    public void setMlModelRepository() throws IOException {
+        logger.info("setting ML model repository to: {}", mlModelServer.getUrl());
+        var request = new Request("PUT", "/_cluster/settings");
+        request.setJsonEntity(Strings.format("""
+            {
+              "persistent": {
+                "xpack.ml.model_repository": "%s"
+              }
+            }""", mlModelServer.getUrl()));
+        assertOK(client().performRequest(request));
+    }
+
     @Override
     protected String getTestRestCluster() {
         return cluster.getHttpAddresses();

diff --git a/...-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/MlModelServer.java b/...-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/MlModelServer.java
@@ -0,0 +1,142 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference;
+
+import com.sun.net.httpserver.HttpExchange;
+import com.sun.net.httpserver.HttpServer;
+
+import org.apache.http.HttpHeaders;
+import org.apache.http.HttpStatus;
+import org.apache.http.client.utils.URIBuilder;
+import org.elasticsearch.logging.LogManager;
+import org.elasticsearch.logging.Logger;
+import org.elasticsearch.test.fixture.HttpHeaderParser;
+import org.elasticsearch.xcontent.XContentParser;
+import org.elasticsearch.xcontent.XContentParserConfiguration;
+import org.elasticsearch.xcontent.XContentType;
+import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ModelPackageConfig;
+import org.junit.rules.TestRule;
+import org.junit.runner.Description;
+import org.junit.runners.model.Statement;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.InetSocketAddress;
+import java.nio.charset.StandardCharsets;
+import java.util.Random;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+/**
+ * Simple model server to serve ML models.
+ * The URL path corresponds to a file name in this class's resources.
+ * If the file is found, its content is returned, otherwise 404.
+ * Respects a range header to serve partial content.
+ */
+public class MlModelServer implements TestRule {
+
+    private static final String HOST = "localhost";
+    private static final Logger logger = LogManager.getLogger(MlModelServer.class);
+
+    private int port;
+
+    public String getUrl() {
+        return new URIBuilder().setScheme("http").setHost(HOST).setPort(port).toString();
+    }
+
+    private void handle(HttpExchange exchange) throws IOException {
+        String rangeHeader = exchange.getRequestHeaders().getFirst(HttpHeaders.RANGE);
+        HttpHeaderParser.Range range = rangeHeader != null ? HttpHeaderParser.parseRangeHeader(rangeHeader) : null;
+        logger.info("request: {} range={}", exchange.getRequestURI().getPath(), range);
+
+        try (InputStream is = getInputStream(exchange)) {
+            int httpStatus;
+            long numBytes;
+            if (is == null) {
+                httpStatus = HttpStatus.SC_NOT_FOUND;
+                numBytes = 0;
+            } else if (range == null) {
+                httpStatus = HttpStatus.SC_OK;
+                numBytes = is.available();
+            } else {
+                httpStatus = HttpStatus.SC_PARTIAL_CONTENT;
+                is.skipNBytes(range.start());
+                numBytes = range.end() - range.start() + 1;
+            }
+            logger.info("response: {} {}", exchange.getRequestURI().getPath(), httpStatus);
+            exchange.sendResponseHeaders(httpStatus, numBytes);
+            try (OutputStream os = exchange.getResponseBody()) {
+                while (numBytes > 0) {
+                    byte[] bytes = is.readNBytes((int) Math.min(1 << 20, numBytes));
+                    os.write(bytes);
+                    numBytes -= bytes.length;
+                }
+            }
+        }
+    }
+
+    private InputStream getInputStream(HttpExchange exchange) throws IOException {
+        String path = exchange.getRequestURI().getPath().substring(1);  // Strip leading slash
+        String modelId = path.substring(0, path.indexOf('.'));
+        String extension = path.substring(path.indexOf('.') + 1);
+
+        // If a model specifically optimized for some platform is requested,
+        // serve the default non-optimized model instead, which is compatible.
+        String defaultModelId = modelId.replace("_linux-x86_64", "");
+
+        ClassLoader classloader = Thread.currentThread().getContextClassLoader();
+        InputStream is = classloader.getResourceAsStream(defaultModelId + "." + extension);
+        if (is != null && modelId.equals(defaultModelId) == false && extension.equals("metadata.json")) {
+            // When an optimized version is requested, fix the default metadata,
+            // so that it contains the correct model ID.
+            try (XContentParser parser = XContentType.JSON.xContent().createParser(XContentParserConfiguration.EMPTY, is.readAllBytes())) {
+                is.close();
+                ModelPackageConfig packageConfig = ModelPackageConfig.fromXContentLenient(parser);
+                packageConfig = new ModelPackageConfig.Builder(packageConfig).setPackedModelId(modelId).build();
+                is = new ByteArrayInputStream(packageConfig.toString().getBytes(StandardCharsets.UTF_8));
+            }
+        }
+        return is;
+    }
+
+    @Override
+    public Statement apply(Statement statement, Description description) {
+        return new Statement() {
+            @Override
+            public void evaluate() throws Throwable {
+                logger.info("Starting ML model server");
+                HttpServer server = HttpServer.create();
+                while (true) {
+                    port = new Random().nextInt(10000, 65536);
+                    try {
+                        server.bind(new InetSocketAddress(HOST, port), 1);
+                    } catch (Exception e) {
+                        continue;
+                    }
+                    break;
+                }
+                logger.info("Bound ML model server to port {}", port);
+
+                ExecutorService executor = Executors.newCachedThreadPool();
+                server.setExecutor(executor);
+                server.createContext("/", MlModelServer.this::handle);
+                server.start();
+
+                try {
+                    statement.evaluate();
+                } finally {
+                    logger.info("Stopping ML model server on port {}", port);
+                    server.stop(1);
+                    executor.shutdown();
+                }
+            }
+        };
+    }
+}
diff --git a/...ce-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/TextEmbeddingCrudIT.java b/...ce-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/TextEmbeddingCrudIT.java
@@ -18,8 +18,6 @@
 
 import static org.hamcrest.Matchers.containsString;
 
-// This test was previously disabled in CI due to the models being too large
-// See "https://github.com/elastic/elasticsearch/issues/105198".
 public class TextEmbeddingCrudIT extends InferenceBaseRestTest {
 
     public void testPutE5Small_withNoModelVariant() {

diff --git a/...ference/qa/inference-service-tests/src/javaRestTest/resources/elser_model_2.metadata.json b/...ference/qa/inference-service-tests/src/javaRestTest/resources/elser_model_2.metadata.json
@@ -0,0 +1,25 @@
+{
+  "packaged_model_id": "elser_model_2",
+  "minimum_version": "11.0.0",
+  "size": 1859242,
+  "sha256": "602dbccfb2746e5700bf65d8019b06fb2ec1e3c5bfb980eb2005fc17c1bfe0c0",
+  "description": "Elastic Learned Sparse EncodeR v2",
+  "model_type": "pytorch",
+  "tags": [
+    "elastic"
+  ],
+  "inference_config": {
+    "text_expansion": {
+      "tokenization": {
+        "bert": {
+          "do_lower_case": true,
+          "with_special_tokens": true,
+          "max_sequence_length": 512,
+          "truncate": "first",
+          "span": -1
+        }
+      }
+    }
+  },
+  "vocabulary_file": "elser_model_2.vocab.json"
+}
diff --git a/...k/plugin/inference/qa/inference-service-tests/src/javaRestTest/resources/elser_model_2.pt b/...k/plugin/inference/qa/inference-service-tests/src/javaRestTest/resources/elser_model_2.pt
diff --git a/.../inference/qa/inference-service-tests/src/javaRestTest/resources/elser_model_2.vocab.json b/.../inference/qa/inference-service-tests/src/javaRestTest/resources/elser_model_2.vocab.json