addressing cat empty tensor case.Fixes gpt2 data distributed example

apbose · apbose · commit 0f15e7da5794 · 2025-11-20T16:56:38.000-08:00
diff --git a/examples/distributed_inference/data_parallel_stable_diffusion.py b/examples/distributed_inference/data_parallel_stable_diffusion.py
@@ -53,7 +53,5 @@
 
 # Assume there are 2 processes (2 devices)
 with distributed_state.split_between_processes(["a dog", "a cat"]) as prompt:
-    print("before \n")
     result = pipe(prompt).images[0]
-    print("after ")
     result.save(f"result_{distributed_state.process_index}.png")
diff --git a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py
@@ -218,7 +218,17 @@ def aten_ops_native_group_norm(
     )
 
 
-@dynamo_tensorrt_converter(torch.ops.aten.cat.default, supports_dynamic_shapes=True)
+def cat_validator(node: Node, settings: Optional[CompilationSettings] = None) -> bool:
+    # Validate only one user, which is a getitem node that accesses the first element in the list
+    for each_input in node.args[0]:
+        if isinstance(each_input, TRTTensor) and any(s == 0 for s in each_input.shape):
+            return False
+    return True
+
+
+@dynamo_tensorrt_converter(
+    torch.ops.aten.cat.default, supports_dynamic_shapes=True, validator=cat_validator
+)
 def aten_ops_cat(
     ctx: ConversionContext,
     target: Target,
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/cat.py b/py/torch_tensorrt/dynamo/conversion/impl/cat.py
@@ -14,6 +14,8 @@
     set_layer_name,
 )
 
+logger = logging.getLogger(__name__)
+
 
 def unify_and_concat_trt_tensors(
     ctx: ConversionContext,
diff --git a/tests/py/dynamo/conversion/test_cat_aten.py b/tests/py/dynamo/conversion/test_cat_aten.py
@@ -25,6 +25,60 @@ def forward(self, x, y, z):
             inputs,
         )
 
+    @parameterized.expand(
+        [
+            ("pos", 0),
+            ("neg", -3),
+        ]
+    )
+    def test_cat_with_scalar_inputs(self, _, dim):
+        # Ensure scalar tensor wrap works
+        class Cat(nn.Module):
+            def forward(self, x, y):
+                # y is a scalar, x is a tensor
+                return torch.ops.aten.cat.default((x, y), dim)
+
+        x = torch.randn(1, 2, 3, device="cuda")
+        y = torch.ones_like(x) * 5.0  # simulate scalar broadcast
+        inputs = [x, y]
+        self.run_test(Cat(), inputs)
+
+    @parameterized.expand(
+        [
+            ("pos", 0),
+            ("neg", -3),
+        ]
+    )
+    def test_cat_with_empty_tensor(self, _, dim):
+        # Handle empty tensor in concat
+        class Cat(nn.Module):
+            def forward(self, x):
+                y = torch.empty(0, 2, 3, device="cuda")
+                return torch.ops.aten.cat.default((x, y), dim)
+
+        inputs = [
+            torch.randn(1, 2, 3, device="cuda"),
+        ]
+        self.run_test(Cat(), inputs)
+
+    @parameterized.expand(
+        [
+            ("pos", 2),
+            ("neg", -1),
+        ]
+    )
+    def test_cat_with_different_dtypes(self, _, dim):
+        # check dtype promotion path in concat
+        class Cat(nn.Module):
+            def forward(self, x, y):
+                return torch.ops.aten.cat.default((x, y), dim)
+
+        inputs = [
+            torch.ones(1, 2, 3, dtype=torch.float32, device="cuda"),
+            torch.ones(1, 2, 3, dtype=torch.float16, device="cuda"),
+        ]
+        self.run_test(Cat(), inputs)
+
     @parameterized.expand(
         [
             ("pos", 1),

Original file line number	Diff line number	Diff line change
`@@ -14,6 +14,8 @@`
`14`	`14`	`set_layer_name,`
`15`	`15`	`)`
`16`	`16`
	`17`	`+logger = logging.getLogger(__name__)`
	`18`	`+`
`17`	`19`
`18`	`20`	`def unify_and_concat_trt_tensors(`
`19`	`21`	`ctx: ConversionContext,`