iree-org
diff --git a/‎compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul.mlir‎
Lines changed: 2 additions & 2 deletions b/‎compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matmul.mlir‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matvec.mlir‎
Lines changed: 1 addition & 1 deletion b/‎compiler/src/iree/compiler/Codegen/SPIRV/test/config_amd_matvec.mlir‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ops.mlir‎
Lines changed: 1 addition & 5 deletions b/‎compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_linalg_ops.mlir‎
Lines changed: 1 addition & 5 deletions
diff --git a/‎compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir‎
Lines changed: 2 additions & 2 deletions b/‎compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_misc.mlir‎
Lines changed: 16 additions & 19 deletions b/‎compiler/src/iree/compiler/Codegen/SPIRV/test/config_default_misc.mlir‎
Lines changed: 16 additions & 19 deletions
diff --git a/‎compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir‎
Lines changed: 1 addition & 1 deletion b/‎compiler/src/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir‎
Lines changed: 1 addition & 2 deletions b/‎compiler/src/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎compiler/src/iree/compiler/Codegen/SPIRV/test/convert_gpu_target.mlir‎
Lines changed: 1 addition & 1 deletion b/‎compiler/src/iree/compiler/Codegen/SPIRV/test/convert_gpu_target.mlir‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎compiler/src/iree/compiler/Codegen/SPIRV/test/convert_to_spirv.mlir‎
Lines changed: 6 additions & 6 deletions b/‎compiler/src/iree/compiler/Codegen/SPIRV/test/convert_to_spirv.mlir‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎compiler/src/iree/compiler/Codegen/SPIRV/test/emulate_i64.mlir‎
Lines changed: 0 additions & 1 deletion b/‎compiler/src/iree/compiler/Codegen/SPIRV/test/emulate_i64.mlir‎
Lines changed: 0 additions & 1 deletion
@@ -32,7 +32,7 @@ func.func @matmul_f16_64x640x320(%3: tensor<64x320xf16>, %4: tensor<320x640xf16>
 
 // -----
 
-func.func @batch_matmul_f32_16x4096x40x4096(%3: tensor<16x4096x4096xf32>, %4: tensor<16x4096x48xf32>) -> tensor<16x4096x48xf32> {
+func.func @batch_matmul_f32_16x4096x48x4096(%3: tensor<16x4096x4096xf32>, %4: tensor<16x4096x48xf32>) -> tensor<16x4096x48xf32> {
   %cst = arith.constant 0.000000e+00 : f32
   %5 = tensor.empty() : tensor<16x4096x48xf32>
   %6 = linalg.fill ins(%cst : f32) outs(%5 : tensor<16x4096x48xf32>) -> tensor<16x4096x48xf32>
@@ -42,7 +42,7 @@ func.func @batch_matmul_f32_16x4096x40x4096(%3: tensor<16x4096x4096xf32>, %4: te
 
 //  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[1, 128, 16, 32]{{\]}}>
 //  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<pipeline = SPIRVMatmulPromoteVectorize workgroup_size = [4, 16, 1], {pipeline_depth = 2 : i64, store_stage = 0 : i64}>
-//      CHECK: func.func @batch_matmul_f32_16x4096x40x4096(
+//      CHECK: func.func @batch_matmul_f32_16x4096x48x4096(
 // CHECK-SAME:     translation_info = #[[TRANSLATION]]
 //      CHECK:   linalg.batch_matmul
 // CHECK-SAME:     lowering_config = #[[CONFIG]]
 
@@ -217,7 +217,7 @@ func.func @i4_dequant_matvec(%23: index, %26: tensor<11008x32x128xi4>, %27: tens
 
 // -----
 
-func.func @dynamic_batch_matvec(%11: index, %12: index, %15: tensor<32x1x?xf16>, %16: tensor<32x?x128xf16>) -> tensor<32x1x128xf16> {
+func.func @dynamic_batch_matvec(%15: tensor<32x1x?xf16>, %16: tensor<32x?x128xf16>) -> tensor<32x1x128xf16> {
   %cst = arith.constant 0.000000e+00 : f16
   %17 = tensor.empty() : tensor<32x1x128xf16>
   %18 = linalg.fill ins(%cst : f16) outs(%17 : tensor<32x1x128xf16>) -> tensor<32x1x128xf16>
 
@@ -176,7 +176,6 @@ func.func @dwconv_elementwise(%3: tensor<1x21x20x1xf32>) -> tensor<1x19x18x1x4xf
   %cst = arith.constant dense_resource<__elided__> : tensor<3x3x1x4xf32>
   %cst_0 = arith.constant 1.001000e+00 : f32
   %cst_1 = arith.constant 0.000000e+00 : f32
-  %c4 = arith.constant 4 : index
   %2 = tensor.empty() : tensor<1x19x18x1x4xf32>
   %4 = tensor.empty() : tensor<1x19x18x1x4xf32>
   %5 = linalg.fill ins(%cst_1 : f32) outs(%4 : tensor<1x19x18x1x4xf32>) -> tensor<1x19x18x1x4xf32>
@@ -238,11 +237,8 @@ func.func @outermost_reduction(%2: tensor<4x2048x512xf32>) -> tensor<2048x512xf3
 }>
 #map = affine_map<(d0, d1) -> (d0, d1)>
 #map1 = affine_map<(d0, d1) -> (d0)>
-func.func @innermost_reduction(%0: i32, %1: i32, %2: i32, %9: tensor<128x384xf32>, %10: tensor<128xf32>) -> tensor<128xf32> attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} {
+func.func @innermost_reduction(%9: tensor<128x384xf32>, %10: tensor<128xf32>) -> tensor<128xf32> attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} {
   %cst = arith.constant -0.000000e+00 : f32
-  %3 = arith.index_cast %0 {stream.alignment = 512 : index, stream.values = [0 : index, 394752 : index, 984064 : index]} : i32 to index
-  %4 = arith.index_cast %1 {stream.alignment = 512 : index, stream.values = [0 : index, 196608 : index, 197120 : index]} : i32 to index
-  %5 = arith.index_cast %2 {stream.alignment = 512 : index, stream.values = [512 : index, 197120 : index, 197632 : index]} : i32 to index
   %11 = tensor.empty() : tensor<128xf32>
   %12 = linalg.fill ins(%cst : f32) outs(%11 : tensor<128xf32>) -> tensor<128xf32>
   %13 = linalg.generic {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "reduction"]} ins(%9, %10 : tensor<128x384xf32>, tensor<128xf32>) outs(%12 : tensor<128xf32>) {
 
@@ -62,9 +62,9 @@ func.func @matmul_64x16xi8(%3: tensor<64x32xi8>, %4: tensor<32x16xi8>) -> tensor
     max_workgroup_counts = [65535, 65535, 65535]>>
 }>
 func.func @matmul_64x16xi64(%3: tensor<64x32xi64>, %4: tensor<32x16xi64>) -> tensor<64x16xi64> attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} {
-  %c0_i32 = arith.constant 0 : i32
+  %c0_i64 = arith.constant 0 : i64
   %5 = tensor.empty() : tensor<64x16xi64>
-  %6 = linalg.fill ins(%c0_i32 : i32) outs(%5 : tensor<64x16xi64>) -> tensor<64x16xi64>
+  %6 = linalg.fill ins(%c0_i64 : i64) outs(%5 : tensor<64x16xi64>) -> tensor<64x16xi64>
   %7 = linalg.matmul ins(%3, %4 : tensor<64x32xi64>, tensor<32x16xi64>) outs(%6 : tensor<64x16xi64>) -> tensor<64x16xi64>
   return %7 : tensor<64x16xi64>
 }
 
@@ -2,29 +2,26 @@
 
 #map = affine_map<(d0, d1, d2) -> (d1)>
 #map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
-func.func @complex_view_as_real(%4: tensor<1xi32>, %5: tensor<1x1x32x50x2xf32>, %9: tensor<50xcomplex<f32>>) -> tensor<32x50x2xf32> {
+func.func @complex_view_as_real(%arg0: tensor<1x1x32x50x2xf32>, %arg1: tensor<50xcomplex<f32>>) -> tensor<32x50x2xf32> {
   %c1 = arith.constant 1 : index
   %c0 = arith.constant 0 : index
-  %6 = tensor.empty() : tensor<32x50x2xf32>
-  %extracted = tensor.extract %4[%c0] : tensor<1xi32>
-  %7 = arith.extsi %extracted : i32 to i64
-  %8 = arith.index_cast %7 : i64 to index
-  %10 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%9 : tensor<50xcomplex<f32>>) outs(%6 : tensor<32x50x2xf32>) {
+  %0 = tensor.empty() : tensor<32x50x2xf32>
+  %1 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg1 : tensor<50xcomplex<f32>>) outs(%0 : tensor<32x50x2xf32>) {
   ^bb0(%in: complex<f32>, %out: f32):
-    %11 = linalg.index 0 : index
-    %12 = linalg.index 1 : index
-    %extracted_0 = tensor.extract %5[%c0, %c0, %11, %12, %c0] : tensor<1x1x32x50x2xf32>
-    %extracted_1 = tensor.extract %5[%c0, %c0, %11, %12, %c1] : tensor<1x1x32x50x2xf32>
-    %13 = complex.create %extracted_0, %extracted_1 : complex<f32>
-    %14 = complex.mul %13, %in : complex<f32>
-    %15 = complex.re %14 : complex<f32>
-    %16 = complex.im %14 : complex<f32>
-    %17 = linalg.index 2 : index
-    %18 = arith.cmpi eq, %17, %c0 : index
-    %19 = arith.select %18, %15, %16 : f32
-    linalg.yield %19 : f32
+    %2 = linalg.index 0 : index
+    %3 = linalg.index 1 : index
+    %extracted_0 = tensor.extract %arg0[%c0, %c0, %2, %3, %c0] : tensor<1x1x32x50x2xf32>
+    %extracted_1 = tensor.extract %arg0[%c0, %c0, %2, %3, %c1] : tensor<1x1x32x50x2xf32>
+    %4 = complex.create %extracted_0, %extracted_1 : complex<f32>
+    %5 = complex.mul %4, %in : complex<f32>
+    %6 = complex.re %5 : complex<f32>
+    %7 = complex.im %5 : complex<f32>
+    %8 = linalg.index 2 : index
+    %9 = arith.cmpi eq, %8, %c0 : index
+    %10 = arith.select %9, %6, %7 : f32
+    linalg.yield %10 : f32
   } -> tensor<32x50x2xf32>
-  return %10 : tensor<32x50x2xf32>
+  return %1 : tensor<32x50x2xf32>
 }
 
 //  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[16, 2, 2], [1, 1, 1]{{\]}}>
 
@@ -96,7 +96,7 @@ func.func @matmul_49x160x576(%3: tensor<49x576xf32>, %4: tensor<576x160xf32>) ->
 
 // Small matmul M to "shift" parallelism to N.
 
-func.func @matmul_2x1024x576(%4: tensor<2x576xf32>, %5: tensor<576x1024xf32>, %6: tensor<2x1024xf32>) -> tensor<2x1024xf32> {
+func.func @matmul_2x1024x576(%4: tensor<2x576xf32>, %5: tensor<576x1024xf32>) -> tensor<2x1024xf32> {
   %cst = arith.constant 0.000000e+00 : f32
   %7 = tensor.empty() : tensor<2x1024xf32>
   %8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<2x1024xf32>) -> tensor<2x1024xf32>
 
@@ -6,8 +6,7 @@
 func.func @matmul_256x1024x128_div_add(%5: tensor<256x1024xf16>, %6: tensor<256x1024xf16>, %8: tensor<256x128xf16>, %9: tensor<128x1024xf16>) -> tensor<256x1024xf16> {
   %cst = arith.constant 0.000000e+00 : f16
   %7 = tensor.empty() : tensor<256x1024xf16>
-  %10 = tensor.empty() : tensor<256x1024xf16>
-  %11 = linalg.fill ins(%cst : f16) outs(%10 : tensor<256x1024xf16>) -> tensor<256x1024xf16>
+  %11 = linalg.fill ins(%cst : f16) outs(%7 : tensor<256x1024xf16>) -> tensor<256x1024xf16>
   %12 = linalg.matmul ins(%8, %9 : tensor<256x128xf16>, tensor<128x1024xf16>) outs(%11 : tensor<256x1024xf16>) -> tensor<256x1024xf16>
   %13 = linalg.generic {indexing_maps = [#map, #map, #map, #map], iterator_types = ["parallel", "parallel"]} ins(%12, %5, %6 : tensor<256x1024xf16>, tensor<256x1024xf16>, tensor<256x1024xf16>) outs(%7 : tensor<256x1024xf16>) {
   ^bb0(%in: f16, %in_0: f16, %in_1: f16, %out: f16):
 
@@ -24,7 +24,7 @@ hal.executable.variant public @vulkan_spirv_fb target(<"vulkan-spirv", "vulkan-s
 // CHECK-SAME: spirv.target_env = #spirv.target_env<#spirv.vce<v1.6,
 // CHECK-SAME:   [Shader, Float64, Float16, Int64, Int16, Int8,
 // CHECK-SAME:    StorageBuffer16BitAccess, StorageUniform16, StoragePushConstant16,
-// CHECK-SMAE:    StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8,
+// CHECK-SAME:    StorageBuffer8BitAccess, UniformAndStorageBuffer8BitAccess, StoragePushConstant8,
 // CHECK-SAME:    GroupNonUniformShuffle, GroupNonUniformShuffleRelative, GroupNonUniformArithmetic,
 // CHECK-SAME:    DotProduct, DotProductInput4x8BitPacked, DotProductInputAll, DotProductInput4x8Bit,
 // CHECK-SAME:    CooperativeMatrixKHR],
 
@@ -258,10 +258,10 @@ hal.executable private @interface_wg_id {
 //       CHECK:   spirv.func
 //       CHECK:     %[[ADDR1:.+]] = spirv.mlir.addressof @[[WGID]]
 //       CHECK:     %[[VAL1:.+]] = spirv.Load "Input" %[[ADDR1]]
-//       CHECK:     %[[WGIDX:.+]] = spirv.CompositeExtract %[[VAL1]][0 : i32]
+//       CHECK:     {{.+}} = spirv.CompositeExtract %[[VAL1]][0 : i32]
 //       CHECK:     %[[ADDR2:.+]] = spirv.mlir.addressof @[[WGID]]
 //       CHECK:     %[[VAL2:.+]] = spirv.Load "Input" %[[ADDR2]]
-//       CHECK:     %[[WGIDY:.+]] = spirv.CompositeExtract %[[VAL2]][1 : i32]
+//       CHECK:     {{.+}} = spirv.CompositeExtract %[[VAL2]][1 : i32]
 
 // -----
 
@@ -326,17 +326,17 @@ hal.executable private @interface_wg_count {
 //       CHECK:   spirv.func
 //       CHECK:     %[[ADDR1:.+]] = spirv.mlir.addressof @[[WGCOUNT]]
 //       CHECK:     %[[VAL1:.+]] = spirv.Load "Input" %[[ADDR1]]
-//       CHECK:     %[[WGIDX:.+]] = spirv.CompositeExtract %[[VAL1]][0 : i32]
+//       CHECK:     {{.+}} = spirv.CompositeExtract %[[VAL1]][0 : i32]
 //       CHECK:     %[[ADDR2:.+]] = spirv.mlir.addressof @[[WGCOUNT]]
 //       CHECK:     %[[VAL2:.+]] = spirv.Load "Input" %[[ADDR2]]
-//       CHECK:     %[[WGIDY:.+]] = spirv.CompositeExtract %[[VAL2]][1 : i32]
+//       CHECK:     {{.+}} = spirv.CompositeExtract %[[VAL2]][1 : i32]
 //   INDEX64-DAG:   spirv.GlobalVariable @[[WGCOUNT:.+]] built_in("NumWorkgroups")
 //       INDEX64:   spirv.func
 //       INDEX64:     %[[ADDR1:.+]] = spirv.mlir.addressof @[[WGCOUNT]]
 //       INDEX64:     %[[VAL1:.+]] = spirv.Load "Input" %[[ADDR1]]
 //       INDEX64:     %[[WGIDX:.+]] = spirv.CompositeExtract %[[VAL1]][0 : i32]
-//       INDEX64:     %[[WGXEXT:.+]] = spirv.UConvert %[[WGIDX]] : i32 to i64
+//       INDEX64:     {{.+}} = spirv.UConvert %[[WGIDX]] : i32 to i64
 //       INDEX64:     %[[ADDR2:.+]] = spirv.mlir.addressof @[[WGCOUNT]]
 //       INDEX64:     %[[VAL2:.+]] = spirv.Load "Input" %[[ADDR2]]
 //       INDEX64:     %[[WGIDY:.+]] = spirv.CompositeExtract %[[VAL2]][1 : i32]
-//       INDEX64:     %[[WGYEXT:.+]] = spirv.UConvert %[[WGIDY]] : i32 to i64
+//       INDEX64:     {{.+}} = spirv.UConvert %[[WGIDY]] : i32 to i64
@@ -52,7 +52,6 @@ func.func @buffer_types() attributes {hal.executable.target = #executable_target
     max_workgroup_counts = [65535, 65535, 65535]>>
 }>
 func.func @splat_i64_with_assume() attributes {hal.executable.target = #executable_target_vulkan_spirv_fb} {
-  %c64 = arith.constant 64 : index
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
   %0 = hal.interface.constant.load layout(<constants = 1, bindings = [#hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32