Make broadcasting result shape more static

cathyzhyi · cathyzhyi · commit 732a76f45c49 · 2022-01-06T18:39:27.000-05:00
This involes the following 2 parts:
- Change refine type to propagate more static shape info.
- Get as much static shape info as possible when creating the result
tensor when converting to linalg.
diff --git a/e2e_testing/torchscript/elementwise.py b/e2e_testing/torchscript/elementwise.py
@@ -62,6 +62,28 @@ def ElementwiseBinaryModule_basic(module, tu: TestUtils):
 # ==============================================================================
 
 
+class ElementwiseBinaryStaticShapeModule(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    @export
+    @annotate_args([
+        None,
+        ([5, 4, 3, 3, 1], torch.float32, True),
+        ([4, 3, 1, 2], torch.float32, True),
+    ])
+    def forward(self, a, b):
+        return a * b
+
+@register_test_case(
+    module_factory=lambda: ElementwiseBinaryStaticShapeModule())
+def ElementwiseBinaryStaticShapeModule_basic(module, tu: TestUtils):
+    module.forward(tu.rand(5, 4, 3, 3, 1), tu.rand(4, 3, 1, 2))
+
+
+# ==============================================================================
+
+
 class ElementwiseTernaryModule(torch.nn.Module):
     def __init__(self):
         super().__init__()
@@ -171,8 +193,7 @@ def forward(self, a):
         return torch.unsqueeze(a, -3)
 
 
-@register_test_case(
-    module_factory=lambda: ElementwiseUnsqueezeNegDimsModule())
+@register_test_case(module_factory=lambda: ElementwiseUnsqueezeNegDimsModule())
 def ElementwiseUnsqueezeNegDimsModule_basic(module, tu: TestUtils):
     module.forward(tu.rand(4, 3))
 
@@ -255,7 +276,7 @@ def forward(self, x):
 
 @register_test_case(module_factory=lambda: ElementwiseGeluModule())
 def ElementwiseGeluModule_basic(module, tu: TestUtils):
-    module.forward(2*tu.rand(5, 3) - 0.5)
+    module.forward(2 * tu.rand(5, 3) - 0.5)
 
 
 # ==============================================================================
@@ -359,7 +380,7 @@ def forward(self, x):
 
 @register_test_case(module_factory=lambda: ElementwiseGtIntScalarModule())
 def ElementwiseGtIntScalarModule_basic(module, tu: TestUtils):
-    module.forward(torch.randint(-10, 15, (3,4)))
+    module.forward(torch.randint(-10, 15, (3, 4)))
 
 
 class ElementwiseGtMixed2ScalarModule(torch.nn.Module):
@@ -377,7 +398,7 @@ def forward(self, x):
 
 @register_test_case(module_factory=lambda: ElementwiseGtMixed2ScalarModule())
 def ElementwiseGtMixed2ScalarModule_basic(module, tu: TestUtils):
-    module.forward(torch.randint(-10, 15, (3,4)).to(torch.int32))
+    module.forward(torch.randint(-10, 15, (3, 4)).to(torch.int32))
 
 
 class ElementwiseGtFloatTensorModule(torch.nn.Module):
@@ -415,10 +436,12 @@ def forward(self, x, y):
 
 @register_test_case(module_factory=lambda: ElementwiseGtIntTensorModule())
 def ElementwiseGtIntTensorModule_basic(module, tu: TestUtils):
-    module.forward(torch.randint(10, (3, 5)), torch.randint(10, (5,)))
+    module.forward(torch.randint(10, (3, 5)), torch.randint(10, (5, )))
+
 
 # ==============================================================================
 
+
 class ElementwiseLtFloatScalarModule(torch.nn.Module):
     def __init__(self):
         super().__init__()
@@ -452,7 +475,7 @@ def forward(self, x):
 
 @register_test_case(module_factory=lambda: ElementwiseLtIntScalarModule())
 def ElementwiseLtIntScalarModule_basic(module, tu: TestUtils):
-    module.forward(torch.randint(-10, 15, (3,4)))
+    module.forward(torch.randint(-10, 15, (3, 4)))
 
 
 class ElementwiseLtDiffWidthScalarModule(torch.nn.Module):
@@ -468,9 +491,10 @@ def forward(self, x):
         return torch.lt(x, 2)
 
 
-@register_test_case(module_factory=lambda: ElementwiseLtDiffWidthScalarModule())
+@register_test_case(
+    module_factory=lambda: ElementwiseLtDiffWidthScalarModule())
 def ElementwiseLtDiffWidthScalarModule_basic(module, tu: TestUtils):
-    module.forward(torch.randint(-10, 15, (3,4)).to(torch.int32))
+    module.forward(torch.randint(-10, 15, (3, 4)).to(torch.int32))
 
 
 class ElementwiseLtFloatTensorModule(torch.nn.Module):
@@ -508,10 +532,12 @@ def forward(self, x, y):
 
 @register_test_case(module_factory=lambda: ElementwiseLtIntTensorModule())
 def ElementwiseLtIntTensorModule_basic(module, tu: TestUtils):
-    module.forward(torch.randint(10, (3, 5)), torch.randint(10, (5,)))
+    module.forward(torch.randint(10, (3, 5)), torch.randint(10, (5, )))
+
 
 # ==============================================================================
 
+
 class ElementwiseEqFloatScalarModule(torch.nn.Module):
     def __init__(self):
         super().__init__()
@@ -527,8 +553,8 @@ def forward(self, x):
 
 @register_test_case(module_factory=lambda: ElementwiseEqFloatScalarModule())
 def ElementwiseEqFloatScalarModule_basic(module, tu: TestUtils):
-    module.forward(torch.tensor([[1.0, 2.2, 6.0], [6.0, 2.0, 3.1]])
-                   .to(torch.float32))
+    module.forward(
+        torch.tensor([[1.0, 2.2, 6.0], [6.0, 2.0, 3.1]]).to(torch.float32))
 
 
 class ElementwiseEqIntScalarModule(torch.nn.Module):
@@ -546,7 +572,7 @@ def forward(self, x):
 
 @register_test_case(module_factory=lambda: ElementwiseEqIntScalarModule())
 def ElementwiseEqIntScalarModule_basic(module, tu: TestUtils):
-    module.forward(torch.randint(2, 4, (5,8)))
+    module.forward(torch.randint(2, 4, (5, 8)))
 
 
 class ElementwiseEqDiffWidthScalarModule(torch.nn.Module):
@@ -562,9 +588,10 @@ def forward(self, x):
         return torch.eq(x, 2)
 
 
-@register_test_case(module_factory=lambda: ElementwiseEqDiffWidthScalarModule())
+@register_test_case(
+    module_factory=lambda: ElementwiseEqDiffWidthScalarModule())
 def ElementwiseEqDiffWidthScalarModule_basic(module, tu: TestUtils):
-    module.forward(torch.randint(2, 4, (5,8)).to(torch.int32))
+    module.forward(torch.randint(2, 4, (5, 8)).to(torch.int32))
 
 
 class ElementwiseEqFloatTensorModule(torch.nn.Module):
@@ -583,9 +610,9 @@ def forward(self, x, y):
 
 @register_test_case(module_factory=lambda: ElementwiseEqFloatTensorModule())
 def ElementwiseEqFloatTensorModule_basic(module, tu: TestUtils):
-    module.forward(torch.tensor([[1.0, 2.2, 6.0], [6.0, 2.0, 3.1]])
-                   .to(torch.float32), 
-                   torch.tensor([1.0, 2.4, 6.0]).to(torch.float32))
+    module.forward(
+        torch.tensor([[1.0, 2.2, 6.0], [6.0, 2.0, 3.1]]).to(torch.float32),
+        torch.tensor([1.0, 2.4, 6.0]).to(torch.float32))
 
 
 class ElementwiseEqIntTensorModule(torch.nn.Module):
@@ -604,10 +631,12 @@ def forward(self, x, y):
 
 @register_test_case(module_factory=lambda: ElementwiseEqIntTensorModule())
 def ElementwiseEqIntTensorModule_basic(module, tu: TestUtils):
-    module.forward(torch.randint(2, 4, (8, 5)), torch.randint(2, 4, (5,)))
+    module.forward(torch.randint(2, 4, (8, 5)), torch.randint(2, 4, (5, )))
+
 
 # ==============================================================================
 
+
 class ElementwiseClampModule(torch.nn.Module):
     def __init__(self):
         super().__init__()
@@ -666,7 +695,7 @@ def forward(self, x):
 @register_test_case(module_factory=lambda: RsubModule_noalpha())
 def RsubModule_noalpha_basic(module, tu: TestUtils):
     module.forward(tu.rand(3, 4))
-    
+
 # ==============================================================================
 
 class ElementwiseMulScalarIntModule(torch.nn.Module):
@@ -734,12 +763,10 @@ def forward(self, a, b):
         return torch.mul(a, b)
 
 
-@register_test_case(
-    module_factory=lambda: ElementwiseMulTensorFloatModule())
+@register_test_case(module_factory=lambda: ElementwiseMulTensorFloatModule())
 def ElementwiseMulTensorFloatModule_basic(module, tu: TestUtils):
-    module.forward(
-        tu.rand(4),
-        tu.rand(4).type(torch.float64))
+    module.forward(tu.rand(4), tu.rand(4).type(torch.float64))
+
 
 class ElementwiseMulTensorIntModule(torch.nn.Module):
     def __init__(self):
@@ -755,12 +782,10 @@ def forward(self, a, b):
         return torch.mul(a, b)
 
 
-@register_test_case(
-    module_factory=lambda: ElementwiseMulTensorIntModule())
+@register_test_case(module_factory=lambda: ElementwiseMulTensorIntModule())
 def ElementwiseMulTensorIntModule_basic(module, tu: TestUtils):
     module.forward(
-      torch.randint(10, [4]).type(torch.int32),
-      torch.randint(10, [4]))
+        torch.randint(10, [4]).type(torch.int32), torch.randint(10, [4]))
 
 
 # ==============================================================================
@@ -783,7 +808,7 @@ def ElementwiseLogModule_basic(module, tu: TestUtils):
 
 
 class ElementwiseSqrtModule(torch.nn.Module):
-    def __init__(self): 
+    def __init__(self):
         super().__init__()
 
     @export
@@ -898,7 +923,7 @@ def ElementwiseLog2Module_basic(module, tu: TestUtils):
     module.forward(tu.rand(3, 4))
 
 class ElementwiseRsqrtModule(torch.nn.Module):
-    def __init__(self): 
+    def __init__(self):
         super().__init__()
 
     @export
@@ -984,12 +1009,9 @@ def forward(self, a, b):
         return torch.div(a, b)
 
 
-@register_test_case(
-    module_factory=lambda: ElementwiseDivTensorFloatModule())
+@register_test_case(module_factory=lambda: ElementwiseDivTensorFloatModule())
 def ElementwiseDivTensorFloatModule_basic(module, tu: TestUtils):
-    module.forward(
-        tu.rand(4),
-        tu.rand(4).type(torch.float64))
+    module.forward(tu.rand(4), tu.rand(4).type(torch.float64))
 
 
 # ==============================================================================
@@ -1005,15 +1027,15 @@ def __init__(self):
         ([-1, -1], torch.int32, True),
         ([-1, -1], torch.int64, True),
     ])
-
     def forward(self, x, y):
         return torch.bitwise_and(x, y)
 
 
 @register_test_case(module_factory=lambda: ElementwiseAndIntegerModule())
 def ElementwiseAndIntegerModule_basic(module, tu: TestUtils):
-    module.forward(torch.randint(-10, 10, (3, 4)).to(torch.int32),
-                   torch.randint(-10, 10, (3, 4)))
+    module.forward(
+        torch.randint(-10, 10, (3, 4)).to(torch.int32),
+        torch.randint(-10, 10, (3, 4)))
 
 
 class ElementwiseSubScalarIntModule(torch.nn.Module):
@@ -1026,7 +1048,8 @@ def __init__(self):
         ([-1, -1], torch.int64, True),
     ])
     def forward(self, x):
-        return torch.sub(x, 2.1, alpha = 2)
+        return torch.sub(x, 2.1, alpha=2)
+
 
 @register_test_case(module_factory=lambda: ElementwiseSubScalarIntModule())
 def ElementwiseSubScalarIntModule_basic(module, tu: TestUtils):
@@ -1077,7 +1100,8 @@ def __init__(self):
         ([-1, -1], torch.float32, True),
     ])
     def forward(self, x):
-        return torch.add(x, 3.0, alpha = 2)
+        return torch.add(x, 3.0, alpha=2)
+
 
 @register_test_case(module_factory=lambda: ElementwiseAddScalarFloatModule())
 def ElementwiseAddScalarFloatModule_basic(module, tu: TestUtils):
diff --git a/e2e_testing/torchscript/xfail_sets.py b/e2e_testing/torchscript/xfail_sets.py
@@ -28,6 +28,7 @@
     "ElementwiseReluModule_basic",
     "ElementwiseFloorModule_basic",
     "ElementwiseLogModule_basic",
+    "ElementwiseBinaryStaticShapeModule_basic",
     "TanhBackward_basic",
     "ElementwiseAddModule_basic",
     "ReturnThreeTensorFloat32_basic",
diff --git a/lib/Conversion/TorchToLinalg/TorchToLinalg.cpp b/lib/Conversion/TorchToLinalg/TorchToLinalg.cpp
@@ -2345,7 +2345,7 @@ struct ConvertElementwiseOp : ConversionPattern {
         // undefined behavior, by doing appropriate checks against the current
         // dimension size.
         auto currentDimSize =
-            rewriter.create<tensor::DimOp>(loc, tensorOperand, size.index());
+            getDimOp(rewriter, loc, tensorOperand, size.index());
 
         // If the result size of this dimension has so far only hit the
         // statically-known-to-be-1 case above (i.e., we have not yet assigned a
@@ -2372,12 +2372,13 @@ struct ConvertElementwiseOp : ConversionPattern {
           /*dimCount=*/resultRank, /*symbolCount=*/0, exprs, getContext()));
     }
 
-    SmallVector<StringRef> iteratorTypes(resultRank, "parallel");
+    SmallVector<StringRef> iteratorTypes(resultRank,
+                                         getParallelIteratorTypeName());
     // Add the indexing map for the outs init tensor.
     indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultRank));
 
     Value initTensor = rewriter.create<linalg::InitTensorOp>(
-        loc, resultShape, resultType.getElementType());
+        loc, getAsOpFoldResult(resultShape), resultType.getElementType());
     bool hadErrorCreatingPayload = false;
     auto generic = rewriter.create<linalg::GenericOp>(
         loc, /*resultTensorTypes=*/initTensor.getType(),
diff --git a/lib/Dialect/Torch/Transforms/RefineTypes.cpp b/lib/Dialect/Torch/Transforms/RefineTypes.cpp
@@ -759,6 +759,47 @@ static void fillInSizesGivenSizesList(ValueKnowledge &knowledge, Value sizes) {
   }
 }
 
+static void fillInSizesForBinaryBroadcastingOp(ValueKnowledge &lhs,
+                                               ValueKnowledge &rhs,
+                                               ValueKnowledge &knowledge) {
+  if (lhs.hasSizes && rhs.hasSizes) {
+    knowledge.hasSizes = true;
+    knowledge.sizes.resize(std::max(lhs.sizes.size(), rhs.sizes.size()),
+                           kUnknownSize);
+
+    int64_t resultRank = knowledge.sizes.size();
+    auto increaseRankToResultRank =
+        [&](const std::vector<int64_t> &sizes) -> std::vector<int64_t> {
+      int offset = resultRank - sizes.size();
+      std::vector<int64_t> newSizes(std::max(offset, 0), 1);
+      newSizes.insert(newSizes.end(), sizes.begin(), sizes.end());
+      return newSizes;
+    };
+
+    std::vector<int64_t> rankAdjustedSizesLhs =
+        increaseRankToResultRank(lhs.sizes);
+    std::vector<int64_t> rankAdjustedSizesRhs =
+        increaseRankToResultRank(rhs.sizes);
+
+    for (int64_t i = 0; i < resultRank; i++) {
+      int64_t lhsDimSize = rankAdjustedSizesLhs[i];
+      int64_t rhsDimSize = rankAdjustedSizesRhs[i];
+      // Dynamic shape can't be decided at compilation.
+      if (lhsDimSize == kUnknownSize || rhsDimSize == kUnknownSize)
+        continue;
+
+      // Incompatible broadcasting shape.
+      if (lhsDimSize != rhsDimSize && lhsDimSize != 1 && rhsDimSize != 1) {
+        knowledge.hasSizes = false;
+        knowledge.sizes.clear();
+        return;
+      }
+
+      knowledge.sizes[i] = std::max(lhsDimSize, rhsDimSize);
+    }
+  }
+}
+
 ChangeResult TypeAnalyzer::visitAtenMmOp(
     AtenMmOp op, ArrayRef<LatticeElement<ValueKnowledge> *> operands) {
   auto &lhs = operands[0]->getValue();
@@ -950,11 +991,7 @@ ChangeResult TypeAnalyzer::visitBinaryBroadcastingOp(
   auto rhs = operands[1]->getValue();
   auto knowledge =
       ValueKnowledge::getNotNonePessimisticValueState(getContext());
-  if (lhs.hasSizes && rhs.hasSizes) {
-    knowledge.hasSizes = true;
-    knowledge.sizes.resize(std::max(lhs.sizes.size(), rhs.sizes.size()),
-                           kUnknownSize);
-  }
+  fillInSizesForBinaryBroadcastingOp(lhs, rhs, knowledge);
 
   // The alpha in `aten.add.Tensor` and `aten.sub.Tensor` has to be lower type
   // category than the lhs and rhs and therefore doesn't really contribute to
@@ -969,12 +1006,8 @@ ChangeResult TypeAnalyzer::visitBinaryBroadcastingComparisonOp(
   auto rhs = operands[1]->getValue();
   auto knowledge =
       ValueKnowledge::getNotNonePessimisticValueState(getContext());
-  if (lhs.hasSizes && rhs.hasSizes) {
-    knowledge.hasSizes = true;
-    knowledge.sizes.resize(std::max(lhs.sizes.size(), rhs.sizes.size()),
-                           kUnknownSize);
-  }
-  knowledge.dtype = IntegerType::get(op->getContext(), 1); 
+  fillInSizesForBinaryBroadcastingOp(lhs, rhs, knowledge);
+  knowledge.dtype = IntegerType::get(op->getContext(), 1);
   return getLatticeElement(op->getResult(0)).join(knowledge);
 }
 
diff --git a/test/Dialect/Torch/promote-types.mlir b/test/Dialect/Torch/promote-types.mlir
diff --git a/test/Dialect/Torch/refine-types.mlir b/test/Dialect/Torch/refine-types.mlir