Skip to content

Commit 5b5938e

Browse files
authored
Improve log messages around the max sequence length (#103)
#### Motivation The existing messages were confusing to the users. #### Modifications In the router the error message was rephrased to make it more understandable for users who arent familiar with the internals. In the server we now print the maximum possible sequence length limited by the model sequence length. The existing print was showing how much output tokens can fit into the memory if you pass max_sequence_length input tokens and vice-versa. I don't know what I was thinking when I wrote that. #### Related Issues https://github.ibm.com/ai-foundation/watson-fm-stack-tracker/issues/958 --------- Signed-off-by: Max de Bayser <[email protected]> Signed-off-by: Maximilien de Bayser <[email protected]>
1 parent 009a2ba commit 5b5938e

File tree

2 files changed

+10
-8
lines changed

2 files changed

+10
-8
lines changed

router/src/server.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -222,8 +222,8 @@ impl<'a, B: BatchType> BatchConfigValidator<'a, B> {
222222
self.batch_type.prefill_weight(&single_request_stats, 1);
223223
if max_batch_weight < single_request_prefill_weight {
224224
panic!(
225-
"max_batch_weight ({}) not large enough for (prefill) max_sequence_length ({})",
226-
max_batch_weight, max_sequence_length
225+
"The provided max_sequence length ({}) results in a prefill batch weight that exceeds the estimated capacity ({})",
226+
max_sequence_length, max_batch_weight
227227
)
228228
}
229229

@@ -232,8 +232,8 @@ impl<'a, B: BatchType> BatchConfigValidator<'a, B> {
232232
.batch_initial_weight(&single_request_stats, 1);
233233
if max_batch_weight < single_request_nexttoken_weight {
234234
panic!(
235-
"max_batch_weight ({}) not large enough for (next-token) max_sequence_length ({})",
236-
max_batch_weight, max_sequence_length
235+
"The provided max_sequence length ({}) results in a next-token batch weight that exceeds the estimated capacity ({})",
236+
max_sequence_length, max_batch_weight
237237
)
238238
}
239239
}

server/text_generation_server/server.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -409,13 +409,15 @@ def estimate_memory():
409409
memory_scaling_model = estimate_memory()
410410
compile()
411411

412-
max_input = memory_scaling_model.max_input_len_for_nt(1, max_sequence_length-1, sys.maxsize)
413-
max_output = memory_scaling_model.max_output_len_for_nt(1, max_sequence_length-1, sys.maxsize)
414-
415412
if local_rank == 0:
413+
# For a batch of size 1 and an output of 1, get max input limited by max_sequence_length
414+
max_input = memory_scaling_model.max_input_len_for_nt(1, 1, max_sequence_length)
415+
# For a batch of size 1 and an input of 1, get max output limited by max_sequence_length
416+
max_output = memory_scaling_model.max_output_len_for_nt(1, 1, max_sequence_length)
417+
max_theoretical_len = min(max_input, max_output) + 1
416418
print(
417419
"Maximum possible sequence length given available memory (for batch size 1): "
418-
f"{min(max_input, max_output)}"
420+
f"{max_theoretical_len}"
419421
)
420422

421423
elif ESTIMATE_MEMORY == "manual":

0 commit comments

Comments
 (0)