NVIDIA-AI-IOT
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 1 deletion b/‎.gitignore‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎torch2trt/converters/BatchNorm1d.py‎
Lines changed: 4 additions & 3 deletions b/‎torch2trt/converters/BatchNorm1d.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎torch2trt/converters/Conv1d.py‎
Lines changed: 6 additions & 2 deletions b/‎torch2trt/converters/Conv1d.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎torch2trt/converters/Linear.py‎
Lines changed: 1 addition & 1 deletion b/‎torch2trt/converters/Linear.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎torch2trt/converters/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎torch2trt/converters/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎torch2trt/converters/add.py‎
Lines changed: 1 addition & 1 deletion b/‎torch2trt/converters/add.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎torch2trt/converters/batch_norm.py‎
Lines changed: 5 additions & 2 deletions b/‎torch2trt/converters/batch_norm.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎torch2trt/converters/cat.py‎
Lines changed: 5 additions & 2 deletions b/‎torch2trt/converters/cat.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎torch2trt/converters/chunk.py‎
Lines changed: 5 additions & 0 deletions b/‎torch2trt/converters/chunk.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎torch2trt/converters/div.py‎
Lines changed: 2 additions & 2 deletions b/‎torch2trt/converters/div.py‎
Lines changed: 2 additions & 2 deletions
@@ -16,4 +16,6 @@ __pycache__/
 *.ipynb_checkpoints
 *.pth
 docs/converters.md
-site
+site
+ToJetsonGrp
+.vscode
@@ -17,20 +17,21 @@ def convert_BatchNorm2d(ctx):
     layer = ctx.network.add_shuffle(input_trt)
 
     if len(input.shape) == 2:
-        layer.reshape_dims = (input.shape[1], 1, 1)
+        layer.reshape_dims = (input.shape[0], input.shape[1], 1, 1)
     else:
-        layer.reshape_dims = (input.shape[1], input.shape[2], 1)
+        layer.reshape_dims = (input.shape[0], input.shape[1], input.shape[2], 1)
 
     layer = ctx.network.add_scale(layer.get_output(0), trt.ScaleMode.CHANNEL, bias, scale, power)
 
     # reshape back to 1D
     layer = ctx.network.add_shuffle(layer.get_output(0))
-    layer.reshape_dims = tuple(output.shape[1:])
+    layer.reshape_dims = tuple(output.shape)
 
     output._trt = layer.get_output(0)
 
 
 @add_module_test(torch.float32, torch.device('cuda'), [(1, 10)])
 @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3)])
+@add_module_test(torch.float32, torch.device('cuda'), [(2, 10, 3)], max_batch_size=2)
 def test_BatchNorm1d_basic():
     return torch.nn.BatchNorm1d(10)
@@ -22,7 +22,7 @@ def convert_Conv1d(ctx):
 
     # reshape to 2D
     layer = ctx.network.add_shuffle(input_trt)
-    layer.reshape_dims = (-1, input.shape[-1], 1)
+    layer.reshape_dims = (input.shape[0], -1, input.shape[-1], 1)
 
     layer = ctx.network.add_convolution(
         input=layer.get_output(0),
@@ -39,26 +39,30 @@ def convert_Conv1d(ctx):
 
     # reshape back to 1D
     layer = ctx.network.add_shuffle(layer.get_output(0))
-    layer.reshape_dims = (-1, output.shape[-1])
+    layer.reshape_dims = (input.shape[0], -1, output.shape[-1])
 
     output._trt = layer.get_output(0)
 
 
 @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224)])
+@add_module_test(torch.float32, torch.device('cuda'), [(2, 10, 224)], max_batch_size=2)
 def test_Conv1d_basic():
     return torch.nn.Conv1d(10, 5, kernel_size=1, stride=1, padding=0)
 
 
 @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224)])
+@add_module_test(torch.float32, torch.device('cuda'), [(2, 10, 224)], max_batch_size=2)
 def test_Conv1d_stride2():
     return torch.nn.Conv1d(10, 5, kernel_size=1, stride=2, padding=0)
 
 
 @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224)])
+@add_module_test(torch.float32, torch.device('cuda'), [(2, 10, 224)], max_batch_size=2)
 def test_Conv1d_kernel3():
     return torch.nn.Conv1d(10, 5, kernel_size=3, stride=2, padding=1)
 
 
 @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 224)])
+@add_module_test(torch.float32, torch.device('cuda'), [(2, 10, 224)], max_batch_size=2)
 def test_Conv1d_dilation2():
     return torch.nn.Conv1d(10, 5, kernel_size=3, stride=1, padding=1, dilation=2)
@@ -27,7 +27,7 @@ def convert_Linear(ctx):
 
     # reshape back to N
     layer = ctx.network.add_shuffle(layer.get_output(0))
-    layer.reshape_dims = tuple(output.shape[1:])
+    layer.reshape_dims = tuple(output.shape)
 
     output._trt = layer.get_output(0)
 
 
@@ -29,6 +29,7 @@
 from .clamp import *
 from .compare import *
 from .div import *
+from .einsum import *
 from .expand import *
 from .floordiv import *
 from .gelu import *
 
@@ -11,7 +11,7 @@ def convert_add(ctx):
     input_b = ctx.method_args[1]
     output = ctx.method_return
     input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b])
-    input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1)
+    input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape))
     layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.SUM)
     output._trt = layer.get_output(0)
 
 
@@ -18,23 +18,26 @@ def convert_batch_norm_trt7(ctx):
     scale = weight.detach().cpu().numpy() / np.sqrt(running_var.detach().cpu().numpy() + eps)
     bias = bias.detach().cpu().numpy() - running_mean.detach().cpu().numpy() * scale
     power = np.ones_like(scale)
-    
-    layer = ctx.network.add_scale_nd(input_trt, trt.ScaleMode.CHANNEL, bias, scale, power, 0)
+
+    layer = ctx.network.add_scale_nd(input_trt, trt.ScaleMode.CHANNEL, bias, scale, power, 1)
     output._trt = layer.get_output(0)
 
 
 
 @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3)], enabled=trt_version() >= '7.0')
+@add_module_test(torch.float32, torch.device('cuda'), [(2, 10, 3, 3)], enabled=trt_version() >= '7.0', max_batch_size=2)
 def test_batch_norm_2d_trt7():
     return torch.nn.BatchNorm2d(10)
 
 
 @add_module_test(torch.float32, torch.device('cuda'), [(1, 10, 3, 3, 3)], enabled=trt_version() >= '7.0')
+@add_module_test(torch.float32, torch.device('cuda'), [(2, 10, 3, 3, 3)], enabled=trt_version() >= '7.0', max_batch_size=2)
 def test_batch_norm_3d_2_trt7():
     return torch.nn.BatchNorm3d(10)
 
 
 @add_module_test(torch.float32, torch.device('cuda'), [(1, 32, 2, 36, 47)], enabled=trt_version() >= '7.0')
+@add_module_test(torch.float32, torch.device('cuda'), [(2, 32, 2, 36, 47)], enabled=trt_version() >= '7.0', max_batch_size=2)
 def test_batch_norm_3d_trt7():
     return torch.nn.BatchNorm3d(32)
 
@@ -13,10 +13,10 @@ def convert_cat(ctx):
 
     output = ctx.method_return
     trt_inputs = add_missing_trt_tensors(ctx.network, inputs)
-    trt_inputs = broadcast_trt_tensors(ctx.network, trt_inputs, len(output.shape) - 1)
+    trt_inputs = broadcast_trt_tensors(ctx.network, trt_inputs, len(output.shape))
 
     layer = ctx.network.add_concatenation(inputs=trt_inputs)
-    layer.axis = dim - 1
+    layer.axis = dim
     output._trt = layer.get_output(0)
 
 
@@ -30,15 +30,18 @@ def forward(self, *x):
 
 
 @add_module_test(torch.float32, torch.device('cuda'), [(1, 4, 4), (1, 3, 4), (1, 17, 4)])
+@add_module_test(torch.float32, torch.device('cuda'), [(2, 4, 4), (2, 3, 4), (2, 17, 4)], max_batch_size=2)
 def test_Cat_basic():
     return Cat(1)
 
 
 @add_module_test(torch.float32, torch.device('cuda'), [(1, 4, 4), (1, 4, 4), (1, 4, 4)])
+@add_module_test(torch.float32, torch.device('cuda'), [(2, 4, 4), (2, 4, 4), (2, 4, 4)], max_batch_size=2)
 def test_Cat_neg1_dim():
     return Cat(-1)
 
 
 @add_module_test(torch.float32, torch.device('cuda'), [(1, 4, 4), (1, 4, 4), (1, 4, 4)])
+@add_module_test(torch.float32, torch.device('cuda'), [(2, 4, 4), (2, 4, 4), (2, 4, 4)], max_batch_size=2)
 def test_Cat_neg2_dim():
     return Cat(-2)
@@ -33,28 +33,33 @@ def forward(self, x):
 
 @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
 @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
+@add_module_test(torch.float32, torch.device('cuda'), [(2, 3, 3, 3)], max_batch_size=2)
 def test_torch_chunk_1_1():
     return TorchChunk(1, 1)
 
 
 @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
 @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
+@add_module_test(torch.float32, torch.device('cuda'), [(2, 3, 3, 3)], max_batch_size=2)
 def test_torch_chunk_2_1():
     return TorchChunk(2, 1)
 
 
 @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
 @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
+@add_module_test(torch.float32, torch.device('cuda'), [(2, 3, 3, 3)], max_batch_size=2)
 def test_torch_chunk_3_1():
     return TorchChunk(3, 1)
 
 
 @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3)])
 @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
+@add_module_test(torch.float32, torch.device('cuda'), [(2, 3, 3, 3)], max_batch_size=2)
 def test_torch_chunk_3_2():
     return TorchChunk(3, 2)
 
 
 @add_module_test(torch.float32, torch.device('cuda'), [(1, 3, 3, 3)])
+@add_module_test(torch.float32, torch.device('cuda'), [(2, 3, 3, 3)], max_batch_size=2)
 def test_tensor_chunk_3_2():
     return TensorChunk(3, 2)
@@ -12,7 +12,7 @@ def convert_div(ctx):
     input_b = ctx.method_args[1]
     output = ctx.method_return
     input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b])
-    input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1)
+    input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape))
     layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.DIV)
     output._trt = layer.get_output(0)
 
@@ -24,7 +24,7 @@ def convert_rdiv(ctx):
     input_b = ctx.method_args[0]
     output = ctx.method_return
     input_a_trt, input_b_trt = add_missing_trt_tensors(ctx.network, [input_a, input_b])
-    input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape) - 1)
+    input_a_trt, input_b_trt = broadcast_trt_tensors(ctx.network, [input_a_trt, input_b_trt], len(output.shape))
     layer = ctx.network.add_elementwise(input_a_trt, input_b_trt, trt.ElementWiseOperation.DIV)
     output._trt = layer.get_output(0)