[None][fix] change allreduce workspace dtype to torch.int64 to avoid overflow (NVIDIA#9479)

dc3671 · web-flow · commit e47927e8473b · 2025-11-27T17:08:41.000+08:00
Signed-off-by: Zhenhuan Chen &lt;zhenhuanc@nvidia.com&gt;
diff --git a/tensorrt_llm/plugin/plugin.py b/tensorrt_llm/plugin/plugin.py
@@ -738,7 +738,7 @@ def allocate_allreduce_fusion_workspace(
                 3 * lamport_buffers_size,
             )
         flag_buffer = torch.tensor([0, 0, 0, lamport_buffers_size, 0],
-                                   dtype=torch.int,
+                                   dtype=torch.int64,
                                    device="cuda")
         buffers = [ipc_buffers, ipc_barriers, lamport_buffers, flag_buffer]
 

Original file line number	Diff line number	Diff line change
`@@ -738,7 +738,7 @@ def allocate_allreduce_fusion_workspace(`
`738`	`738`	`3 * lamport_buffers_size,`
`739`	`739`	`)`
`740`	`740`	`flag_buffer = torch.tensor([0, 0, 0, lamport_buffers_size, 0],`
`741`		`- dtype=torch.int,`
	`741`	`+ dtype=torch.int64,`
`742`	`742`	`device="cuda")`
`743`	`743`	`buffers = [ipc_buffers, ipc_barriers, lamport_buffers, flag_buffer]`
`744`	`744`