Skip to content

Commit 258443d

Browse files
committed
Mirror for dense embeddings and add comments
1 parent e3c9927 commit 258443d

File tree

1 file changed

+7
-2
lines changed

1 file changed

+7
-2
lines changed

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceService.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,9 @@ public class ElasticInferenceService extends SenderService {
9191
public static final String NAME = "elastic";
9292
public static final String ELASTIC_INFERENCE_SERVICE_IDENTIFIER = "Elastic Inference Service";
9393
public static final Integer DENSE_TEXT_EMBEDDINGS_DIMENSIONS = 1024;
94+
// The maximum batch size for sparse text embeddings is set to 16.
95+
// This value was reduced from 512 due to memory constraints; batch sizes above 32 can cause GPU out-of-memory errors.
96+
// A batch size of 16 provides optimal throughput and stability, especially on lower-tier instance types.
9497
public static final Integer SPARSE_TEXT_EMBEDDING_MAX_BATCH_SIZE = 16;
9598

9699
private static final EnumSet<TaskType> IMPLEMENTED_TASK_TYPES = EnumSet.of(
@@ -101,8 +104,10 @@ public class ElasticInferenceService extends SenderService {
101104
);
102105
private static final String SERVICE_NAME = "Elastic";
103106

104-
// TODO: check with team, what makes the most sense
105-
private static final Integer DENSE_TEXT_EMBEDDINGS_MAX_BATCH_SIZE = 32;
107+
// TODO: revisit this value once EIS supports dense models
108+
// The maximum batch size for dense text embeddings is proactively set to 16.
109+
// This mirrors the memory constraints observed with sparse embeddings
110+
private static final Integer DENSE_TEXT_EMBEDDINGS_MAX_BATCH_SIZE = 16;
106111

107112
// rainbow-sprinkles
108113
static final String DEFAULT_CHAT_COMPLETION_MODEL_ID_V1 = "rainbow-sprinkles";

0 commit comments

Comments
 (0)