Skip to content

Commit 8f88e44

Browse files
dcaoxfredricz-20070104
authored andcommitted
[None][fix] Fix bug of undefined py_topk_logprobs_vals (NVIDIA#8789)
Signed-off-by: Dong Cao <[email protected]> Signed-off-by: FredricZ-2007 <[email protected]>
1 parent 331f062 commit 8f88e44

File tree

3 files changed

+25
-3
lines changed

3 files changed

+25
-3
lines changed

tensorrt_llm/_torch/pyexecutor/executor_request_queue.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,10 +441,12 @@ def _handle_request_broadcasting(self,
441441
new_requests, "py_multimodal_data")
442442
py_scheduling_params = self._collect_py_objects_from_requests(
443443
new_requests, "py_scheduling_params")
444+
py_num_logprobs = self._collect_py_objects_from_requests(
445+
new_requests, "py_num_logprobs")
444446
py_request_objects = tuple(
445447
filter(None, [
446448
py_logits_post_processors, py_multimodal_data,
447-
py_scheduling_params
449+
py_scheduling_params, py_num_logprobs
448450
]))
449451
else:
450452
py_request_objects = None

tensorrt_llm/_torch/pyexecutor/llm_request.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -733,8 +733,7 @@ def executor_request_to_llm_request(
733733
mrope_position_deltas=mrope_position_deltas,
734734
lookahead_config=None,
735735
return_log_probs=executor_request.output_config.return_log_probs,
736-
num_logprobs=executor_request.py_num_logprobs if hasattr(
737-
executor_request, "py_num_logprobs") else 0,
736+
num_logprobs=getattr(executor_request, "py_num_logprobs", 0),
738737
return_context_logits=executor_request.output_config.
739738
return_context_logits,
740739
return_perf_metrics=executor_request.output_config.return_perf_metrics,

tests/unittest/llmapi/test_llm_multi_gpu_pytorch.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
check_llama_7b_multi_lora_from_request_test_harness,
1212
check_phi3_lora_fused_modules_output_tp2_identical_to_tp1)
1313
from .test_llm import (_test_llm_capture_request_error, llama_model_path,
14+
llm_return_logprobs_test_harness,
1415
tinyllama_logits_processor_test_harness)
1516
from .test_llm_pytorch import llama_7b_lora_from_dir_test_harness
1617

@@ -104,3 +105,23 @@ async def test_llm_rpc_streaming_tp2():
104105
sampling_params=SamplingParams(
105106
max_tokens=10, end_id=-1)):
106107
print(f"get result: {output}")
108+
109+
110+
@skip_ray
111+
@pytest.mark.gpu2
112+
@pytest.mark.parametrize(
113+
"prompt_logprobs, logprobs, return_context_logits, return_generation_logits",
114+
[
115+
(None, 1, False,
116+
False), # generation logprobs only (top-1, PyTorch limit)
117+
])
118+
def test_llm_return_logprobs_streaming_tp2(prompt_logprobs, logprobs,
119+
return_context_logits,
120+
return_generation_logits):
121+
llm_return_logprobs_test_harness(prompt_logprobs,
122+
logprobs,
123+
return_context_logits,
124+
return_generation_logits,
125+
streaming=True,
126+
backend="pytorch",
127+
tp_size=2)

0 commit comments

Comments
 (0)