Restrict to bf16-f32-bf16

mgehre-amd · mgehre-amd · commit 729187cc2d68 · 2024-12-11T11:19:01.000+01:00
diff --git a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp
@@ -1008,11 +1008,13 @@ OpFoldResult CastOp::fold(FoldAdaptor adaptor) {
     }
   }
 
-  // cast-to-bf16(cast-to-f32(x)) -> cast-to-bf16(x)
+  // Fold cast from bf16 -> f32 -> bf16 into no-op.
   if (auto cast = getInput().getDefiningOp<CastOp>()) {
+    auto sourceElTy = cast.getInput().getType().getElementType();
     auto intermediateElTy = cast.getType().getElementType();
     auto finalElTy = getType().getElementType();
-    if (isa<Float32Type>(intermediateElTy) && isa<BFloat16Type>(finalElTy)) {
+    if (isa<BFloat16Type>(sourceElTy) && isa<Float32Type>(intermediateElTy) &&
+        isa<BFloat16Type>(finalElTy)) {
       getInputMutable().assign(cast.getInput());
       return getResult();
     }
diff --git a/mlir/test/Dialect/Tosa/canonicalize.mlir b/mlir/test/Dialect/Tosa/canonicalize.mlir
@@ -56,9 +56,9 @@ func.func @cast_fold_double(%arg0: tensor<?x1xf32>) -> tensor<?x1xi8> {
 }
 
 // CHECK-LABEL: @cast_fold_double
-func.func @cast_fold_double2(%arg0: tensor<?x1xf16>) -> tensor<?x1xbf16> {
-  // CHECK: tosa.cast{{.*}} (tensor<?x1xf16>) -> tensor<?x1xbf16>
-  %0 = tosa.cast %arg0 : (tensor<?x1xf16>) -> tensor<?x1xf32>
+func.func @cast_fold_double2(%arg0: tensor<?x1xbf16>) -> tensor<?x1xbf16> {
+  // CHECK: return %arg0
+  %0 = tosa.cast %arg0 : (tensor<?x1xbf16>) -> tensor<?x1xf32>
   %1 = tosa.cast %0 : (tensor<?x1xf32>) -> tensor<?x1xbf16>
   return %1 : tensor<?x1xbf16>
 }

Original file line number	Diff line number	Diff line change
`@@ -1008,11 +1008,13 @@ OpFoldResult CastOp::fold(FoldAdaptor adaptor) {`
`1008`	`1008`	`}`
`1009`	`1009`	`}`
`1010`	`1010`
`1011`		`- // cast-to-bf16(cast-to-f32(x)) -> cast-to-bf16(x)`
	`1011`	`+ // Fold cast from bf16 -> f32 -> bf16 into no-op.`
`1012`	`1012`	`if (auto cast = getInput().getDefiningOp<CastOp>()) {`
	`1013`	`+ auto sourceElTy = cast.getInput().getType().getElementType();`
`1013`	`1014`	`auto intermediateElTy = cast.getType().getElementType();`
`1014`	`1015`	`auto finalElTy = getType().getElementType();`
`1015`		`- if (isa<Float32Type>(intermediateElTy) && isa<BFloat16Type>(finalElTy)) {`
	`1016`	`+ if (isa<BFloat16Type>(sourceElTy) && isa<Float32Type>(intermediateElTy) &&`
	`1017`	`+ isa<BFloat16Type>(finalElTy)) {`
`1016`	`1018`	`getInputMutable().assign(cast.getInput());`
`1017`	`1019`	`return getResult();`
`1018`	`1020`	`}`
Original file line number	Diff line number	Diff line change
`@@ -56,9 +56,9 @@ func.func @cast_fold_double(%arg0: tensor<?x1xf32>) -> tensor<?x1xi8> {`
`56`	`56`	`}`
`57`	`57`
`58`	`58`	`// CHECK-LABEL: @cast_fold_double`
`59`		`-func.func @cast_fold_double2(%arg0: tensor<?x1xf16>) -> tensor<?x1xbf16> {`
`60`		`- // CHECK: tosa.cast{{.*}} (tensor<?x1xf16>) -> tensor<?x1xbf16>`
`61`		`- %0 = tosa.cast %arg0 : (tensor<?x1xf16>) -> tensor<?x1xf32>`
	`59`	`+func.func @cast_fold_double2(%arg0: tensor<?x1xbf16>) -> tensor<?x1xbf16> {`
	`60`	`+ // CHECK: return %arg0`
	`61`	`+ %0 = tosa.cast %arg0 : (tensor<?x1xbf16>) -> tensor<?x1xf32>`
`62`	`62`	`%1 = tosa.cast %0 : (tensor<?x1xf32>) -> tensor<?x1xbf16>`
`63`	`63`	`return %1 : tensor<?x1xbf16>`
`64`	`64`	`}`