Skip to content

Commit 51e3249

Browse files
Update EIS sparse and dense embedding max batch size to 16 (#132646) (#132855)
(cherry picked from commit 81b4cce) # Conflicts: # x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceService.java Co-authored-by: Jonathan Buttner <[email protected]>
1 parent 0dc6305 commit 51e3249

File tree

2 files changed

+13
-2
lines changed

2 files changed

+13
-2
lines changed

docs/changelog/132646.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 132646
2+
summary: Update EIS sparse and dense embedding max batch size to 16
3+
area: Machine Learning
4+
type: bug
5+
issues: []

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceService.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,10 @@ public class ElasticInferenceService extends SenderService {
8787
public static final String NAME = "elastic";
8888
public static final String ELASTIC_INFERENCE_SERVICE_IDENTIFIER = "Elastic Inference Service";
8989
public static final Integer DENSE_TEXT_EMBEDDINGS_DIMENSIONS = 1024;
90-
public static final Integer SPARSE_TEXT_EMBEDDING_MAX_BATCH_SIZE = 512;
90+
// The maximum batch size for sparse text embeddings is set to 16.
91+
// This value was reduced from 512 due to memory constraints; batch sizes above 32 can cause GPU out-of-memory errors.
92+
// A batch size of 16 provides optimal throughput and stability, especially on lower-tier instance types.
93+
public static final Integer SPARSE_TEXT_EMBEDDING_MAX_BATCH_SIZE = 16;
9194

9295
private static final EnumSet<TaskType> IMPLEMENTED_TASK_TYPES = EnumSet.of(
9396
TaskType.SPARSE_EMBEDDING,
@@ -97,7 +100,10 @@ public class ElasticInferenceService extends SenderService {
97100
);
98101
private static final String SERVICE_NAME = "Elastic";
99102

100-
private static final Integer DENSE_TEXT_EMBEDDINGS_MAX_BATCH_SIZE = 32;
103+
// TODO: revisit this value once EIS supports dense models
104+
// The maximum batch size for dense text embeddings is proactively set to 16.
105+
// This mirrors the memory constraints observed with sparse embeddings
106+
private static final Integer DENSE_TEXT_EMBEDDINGS_MAX_BATCH_SIZE = 16;
101107

102108
// rainbow-sprinkles
103109
static final String DEFAULT_CHAT_COMPLETION_MODEL_ID_V1 = "rainbow-sprinkles";

0 commit comments

Comments
 (0)