We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 902fb85 commit a8d3481Copy full SHA for a8d3481
tensorrt_llm/_torch/pyexecutor/model_engine.py
@@ -1218,9 +1218,7 @@ def _prepare_tp_inputs(
1218
dtype=torch.int32,
1219
pin_memory=True)
1220
mrope_config['mrope_position_deltas'].append(
1221
- torch.tensor([mrope_position_deltas],
1222
- dtype=torch.int32).to('cuda',
1223
- non_blocking=True))
+ mrope_position_deltas.to('cuda', non_blocking=True))
1224
1225
extend_requests = extend_cuda_graph_dummy_requests + extend_requests
1226
if not self._disable_overlap_scheduler and self.is_spec_decode:
0 commit comments