Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/132646.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 132646
summary: Update EIS sparse and dense embedding max batch size to 16
area: Machine Learning
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,10 @@ public class ElasticInferenceService extends SenderService {
public static final String NAME = "elastic";
public static final String ELASTIC_INFERENCE_SERVICE_IDENTIFIER = "Elastic Inference Service";
public static final Integer DENSE_TEXT_EMBEDDINGS_DIMENSIONS = 1024;
public static final Integer SPARSE_TEXT_EMBEDDING_MAX_BATCH_SIZE = 512;
// The maximum batch size for sparse text embeddings is set to 16.
// This value was reduced from 512 due to memory constraints; batch sizes above 32 can cause GPU out-of-memory errors.
// A batch size of 16 provides optimal throughput and stability, especially on lower-tier instance types.
public static final Integer SPARSE_TEXT_EMBEDDING_MAX_BATCH_SIZE = 16;

private static final EnumSet<TaskType> IMPLEMENTED_TASK_TYPES = EnumSet.of(
TaskType.SPARSE_EMBEDDING,
Expand All @@ -99,8 +102,10 @@ public class ElasticInferenceService extends SenderService {
);
private static final String SERVICE_NAME = "Elastic";

// TODO: check with team, what makes the most sense
private static final Integer DENSE_TEXT_EMBEDDINGS_MAX_BATCH_SIZE = 32;
// TODO: revisit this value once EIS supports dense models
// The maximum batch size for dense text embeddings is proactively set to 16.
// This mirrors the memory constraints observed with sparse embeddings
private static final Integer DENSE_TEXT_EMBEDDINGS_MAX_BATCH_SIZE = 16;

// rainbow-sprinkles
static final String DEFAULT_CHAT_COMPLETION_MODEL_ID_V1 = "rainbow-sprinkles";
Expand Down
Loading