diff --git a/docs/changelog/132646.yaml b/docs/changelog/132646.yaml new file mode 100644 index 0000000000000..83c31282619a8 --- /dev/null +++ b/docs/changelog/132646.yaml @@ -0,0 +1,5 @@ +pr: 132646 +summary: Update EIS sparse and dense embedding max batch size to 16 +area: Machine Learning +type: bug +issues: [] diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceService.java index 640929b058760..c3c3d81be65c4 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceService.java @@ -89,7 +89,10 @@ public class ElasticInferenceService extends SenderService { public static final String NAME = "elastic"; public static final String ELASTIC_INFERENCE_SERVICE_IDENTIFIER = "Elastic Inference Service"; public static final Integer DENSE_TEXT_EMBEDDINGS_DIMENSIONS = 1024; - public static final Integer SPARSE_TEXT_EMBEDDING_MAX_BATCH_SIZE = 512; + // The maximum batch size for sparse text embeddings is set to 16. + // This value was reduced from 512 due to memory constraints; batch sizes above 32 can cause GPU out-of-memory errors. + // A batch size of 16 provides optimal throughput and stability, especially on lower-tier instance types. + public static final Integer SPARSE_TEXT_EMBEDDING_MAX_BATCH_SIZE = 16; private static final EnumSet IMPLEMENTED_TASK_TYPES = EnumSet.of( TaskType.SPARSE_EMBEDDING, @@ -99,8 +102,10 @@ public class ElasticInferenceService extends SenderService { ); private static final String SERVICE_NAME = "Elastic"; - // TODO: check with team, what makes the most sense - private static final Integer DENSE_TEXT_EMBEDDINGS_MAX_BATCH_SIZE = 32; + // TODO: revisit this value once EIS supports dense models + // The maximum batch size for dense text embeddings is proactively set to 16. + // This mirrors the memory constraints observed with sparse embeddings + private static final Integer DENSE_TEXT_EMBEDDINGS_MAX_BATCH_SIZE = 16; // rainbow-sprinkles static final String DEFAULT_CHAT_COMPLETION_MODEL_ID_V1 = "rainbow-sprinkles";