[fx2trt] Handle shapes like [batch_size] and scalars for binary ops properly (#74)

khabinov · Wei Wei · commit 0edba55d1517 · 2022-06-03T17:54:12.000-07:00
Summary: Pull Request resolved: https://github.com/pytorch/fx2trt/pull/74 ## Root cause We have code like: ``` x = ... # result shape is [batch_size, N] y = mean(y, dim=1, keepdim=False) # result shape is [batch_size] z = y + 0.5 # result shape is [batch_size] ``` For TRT with implicit batch dimension it should look like: ``` x = ... # result shape is [N] y = mean(y, dim=1, keepdim=False) # result shape is [] z = y + 0.5 # result shape is [] ``` However, because we convert scalar to `TRTTensor` and don't do dimensions squeeze for it, the resulting tensor `z` would have shape `[1]`, and this is gonna break the rest of the net. ## Solution Convert the scalar value to `torch.Tensor`, because we have dimensions squeeze logic implemented for them. ## P.S.: Also added support for `sqrt` tracing. Reviewed By: yinghai, houseroad Differential Revision: D36336816 fbshipit-source-id: 412e44e99f25ab3549df540a87bd005e6b3fe08a
diff --git a/fx/converters/converter_utils.py b/fx/converters/converter_utils.py
@@ -452,6 +452,21 @@ def add_binary_elementwise_layer(
         )
         return get_python_op_from_trt_elementwise_op(op_type)(lhs_val, rhs_val)
 
+    # If the following conditions are true:
+    #  1. the network has implicit batch dimension,
+    #  2. one operand has shape [] (real shape is [batch_size]),
+    #  3. another operand is a scalar,
+    # then the result should also have shape [] (real shape is [batch_size]).
+    #
+    # In such case, we need to convert the scalar operand to tensor, because
+    # this way the shape will become [1], and then will be properly squeezed
+    # into [], meaning that the result will have shape [], which is what we
+    # expect.
+    if is_lhs_trt_tensor and isinstance(rhs_val, (float, int)):
+        rhs_val = torch.tensor([rhs_val], dtype=dtype)
+    if is_rhs_trt_tensor and isinstance(lhs_val, (float, int)):
+        lhs_val = torch.tensor([lhs_val], dtype=dtype)
+
     # When lhs is scalar, and rhs has shape [1,], then currently the assert
     # will fail because lhs shape has fewer dimensions than rhs shape.  This
     # happens when using implicit batch dimension, when we removed the 1st
diff --git a/tracer/acc_tracer/acc_ops.py b/tracer/acc_tracer/acc_ops.py
@@ -1482,6 +1482,7 @@ def log(*, input):
 
 @register_acc_op_properties(AccOpProperty.pointwise, AccOpProperty.unary)
 @register_acc_op_mapping(op_and_target=("call_function", torch.sqrt))
+@register_acc_op_mapping(op_and_target=("call_method", "sqrt"))
 @register_acc_op
 def sqrt(*, input):
     return torch.sqrt(input=input)