@@ -58,7 +58,7 @@ func.func @matmul(%arg0: memref<?x64xi16>, %arg1: memref<?x64xi16>, %arg2: memre
5858 // CHECK-SAME: {index = 0 : i8, offset = 0 : i32}
5959 // CHECK-SAME: : memref<?x64xi16>, vector<16xi16>
6060 // CHECK: %[[VC:.*]] = aievec.ups %[[ACCk]] {shift = 0 : i8} : vector<16xi16>, vector<16xi48>
61- %2 = vector.transfer_read %arg0 [%arg3 , %arg5 ], %c0_i16 {permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
61+ %2 = vector.transfer_read %arg0 [%arg3 , %arg5 ], %c0_i16 {in_bounds = [ true ], permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
6262 %3 = vector.transfer_read %arg1 [%arg5 , %arg4 ], %c0_i16 : memref <?x64 xi16 >, vector <16 xi16 >
6363 %4 = arith.muli %2 , %3 : vector <16 xi16 >
6464 %5 = arith.addi %arg6 , %4 : vector <16 xi16 >
@@ -72,7 +72,7 @@ func.func @matmul(%arg0: memref<?x64xi16>, %arg1: memref<?x64xi16>, %arg2: memre
7272 // CHECK-SAME: xstart = "0", zoffsets = "0", zoffsets_hi = "0", zstart = "0", zstep = "1"}
7373 // CHECK-SAME: : vector<32xi16>, vector<16xi16>, vector<16xi48>
7474 %6 = affine.apply #map1 (%arg5 )
75- %7 = vector.transfer_read %arg0 [%arg3 , %6 ], %c0_i16 {permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
75+ %7 = vector.transfer_read %arg0 [%arg3 , %6 ], %c0_i16 {in_bounds = [ true ], permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
7676 %8 = vector.transfer_read %arg1 [%6 , %arg4 ], %c0_i16 : memref <?x64 xi16 >, vector <16 xi16 >
7777 %9 = arith.muli %7 , %8 : vector <16 xi16 >
7878 %10 = arith.addi %5 , %9 : vector <16 xi16 >
@@ -81,7 +81,7 @@ func.func @matmul(%arg0: memref<?x64xi16>, %arg1: memref<?x64xi16>, %arg2: memre
8181 // CHECK-SAME: {index = 0 : i8, offset = 0 : i32}
8282 // CHECK-SAME: : memref<?x64xi16>, vector<16xi16>
8383 %11 = affine.apply #map2 (%arg5 )
84- %12 = vector.transfer_read %arg0 [%arg3 , %11 ], %c0_i16 {permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
84+ %12 = vector.transfer_read %arg0 [%arg3 , %11 ], %c0_i16 {in_bounds = [ true ], permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
8585 %13 = vector.transfer_read %arg1 [%11 , %arg4 ], %c0_i16 : memref <?x64 xi16 >, vector <16 xi16 >
8686 %14 = arith.muli %12 , %13 : vector <16 xi16 >
8787 %15 = arith.addi %10 , %14 : vector <16 xi16 >
@@ -94,7 +94,7 @@ func.func @matmul(%arg0: memref<?x64xi16>, %arg1: memref<?x64xi16>, %arg2: memre
9494 // CHECK-SAME: {xoffsets = "0x73727170", xoffsets_hi = "0x77767574", xsquare = "0x3120",
9595 // CHECK-SAME: xstart = "0", zoffsets = "0", zoffsets_hi = "0", zstart = "2", zstep = "1"}
9696 %16 = affine.apply #map3 (%arg5 )
97- %17 = vector.transfer_read %arg0 [%arg3 , %16 ], %c0_i16 {permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
97+ %17 = vector.transfer_read %arg0 [%arg3 , %16 ], %c0_i16 {in_bounds = [ true ], permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
9898 %18 = vector.transfer_read %arg1 [%16 , %arg4 ], %c0_i16 : memref <?x64 xi16 >, vector <16 xi16 >
9999 %19 = arith.muli %17 , %18 : vector <16 xi16 >
100100 %20 = arith.addi %15 , %19 : vector <16 xi16 >
@@ -103,7 +103,7 @@ func.func @matmul(%arg0: memref<?x64xi16>, %arg1: memref<?x64xi16>, %arg2: memre
103103 // CHECK-SAME: {index = 0 : i8, offset = 0 : i32}
104104 // CHECK-SAME: : memref<?x64xi16>, vector<16xi16>
105105 %21 = affine.apply #map4 (%arg5 )
106- %22 = vector.transfer_read %arg0 [%arg3 , %21 ], %c0_i16 {permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
106+ %22 = vector.transfer_read %arg0 [%arg3 , %21 ], %c0_i16 {in_bounds = [ true ], permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
107107 %23 = vector.transfer_read %arg1 [%21 , %arg4 ], %c0_i16 : memref <?x64 xi16 >, vector <16 xi16 >
108108 %24 = arith.muli %22 , %23 : vector <16 xi16 >
109109 %25 = arith.addi %20 , %24 : vector <16 xi16 >
@@ -116,7 +116,7 @@ func.func @matmul(%arg0: memref<?x64xi16>, %arg1: memref<?x64xi16>, %arg2: memre
116116 // CHECK-SAME: {xoffsets = "0x73727170", xoffsets_hi = "0x77767574", xsquare = "0x3120",
117117 // CHECK-SAME: xstart = "0", zoffsets = "0", zoffsets_hi = "0", zstart = "4", zstep = "1"}
118118 %26 = affine.apply #map5 (%arg5 )
119- %27 = vector.transfer_read %arg0 [%arg3 , %26 ], %c0_i16 {permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
119+ %27 = vector.transfer_read %arg0 [%arg3 , %26 ], %c0_i16 {in_bounds = [ true ], permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
120120 %28 = vector.transfer_read %arg1 [%26 , %arg4 ], %c0_i16 : memref <?x64 xi16 >, vector <16 xi16 >
121121 %29 = arith.muli %27 , %28 : vector <16 xi16 >
122122 %30 = arith.addi %25 , %29 : vector <16 xi16 >
@@ -125,7 +125,7 @@ func.func @matmul(%arg0: memref<?x64xi16>, %arg1: memref<?x64xi16>, %arg2: memre
125125 // CHECK-SAME: {index = 0 : i8, offset = 0 : i32}
126126 // CHECK-SAME: : memref<?x64xi16>, vector<16xi16>
127127 %31 = affine.apply #map6 (%arg5 )
128- %32 = vector.transfer_read %arg0 [%arg3 , %31 ], %c0_i16 {permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
128+ %32 = vector.transfer_read %arg0 [%arg3 , %31 ], %c0_i16 {in_bounds = [ true ], permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
129129 %33 = vector.transfer_read %arg1 [%31 , %arg4 ], %c0_i16 : memref <?x64 xi16 >, vector <16 xi16 >
130130 %34 = arith.muli %32 , %33 : vector <16 xi16 >
131131 %35 = arith.addi %30 , %34 : vector <16 xi16 >
@@ -138,7 +138,7 @@ func.func @matmul(%arg0: memref<?x64xi16>, %arg1: memref<?x64xi16>, %arg2: memre
138138 // CHECK-SAME: {xoffsets = "0x73727170", xoffsets_hi = "0x77767574", xsquare = "0x3120",
139139 // CHECK-SAME: xstart = "0", zoffsets = "0", zoffsets_hi = "0", zstart = "6", zstep = "1"}
140140 %36 = affine.apply #map7 (%arg5 )
141- %37 = vector.transfer_read %arg0 [%arg3 , %36 ], %c0_i16 {permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
141+ %37 = vector.transfer_read %arg0 [%arg3 , %36 ], %c0_i16 {in_bounds = [ true ], permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
142142 %38 = vector.transfer_read %arg1 [%36 , %arg4 ], %c0_i16 : memref <?x64 xi16 >, vector <16 xi16 >
143143 %39 = arith.muli %37 , %38 : vector <16 xi16 >
144144 %40 = arith.addi %35 , %39 : vector <16 xi16 >
@@ -147,7 +147,7 @@ func.func @matmul(%arg0: memref<?x64xi16>, %arg1: memref<?x64xi16>, %arg2: memre
147147 // CHECK-SAME: {index = 0 : i8, offset = 0 : i32}
148148 // CHECK-SAME: : memref<?x64xi16>, vector<16xi16>
149149 %41 = affine.apply #map8 (%arg5 )
150- %42 = vector.transfer_read %arg0 [%arg3 , %41 ], %c0_i16 {permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
150+ %42 = vector.transfer_read %arg0 [%arg3 , %41 ], %c0_i16 {in_bounds = [ true ], permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
151151 %43 = vector.transfer_read %arg1 [%41 , %arg4 ], %c0_i16 : memref <?x64 xi16 >, vector <16 xi16 >
152152 %44 = arith.muli %42 , %43 : vector <16 xi16 >
153153 %45 = arith.addi %40 , %44 : vector <16 xi16 >
@@ -160,7 +160,7 @@ func.func @matmul(%arg0: memref<?x64xi16>, %arg1: memref<?x64xi16>, %arg2: memre
160160 // CHECK-SAME: {xoffsets = "0x73727170", xoffsets_hi = "0x77767574", xsquare = "0x3120",
161161 // CHECK-SAME: xstart = "0", zoffsets = "0", zoffsets_hi = "0", zstart = "8", zstep = "1"}
162162 %46 = affine.apply #map9 (%arg5 )
163- %47 = vector.transfer_read %arg0 [%arg3 , %46 ], %c0_i16 {permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
163+ %47 = vector.transfer_read %arg0 [%arg3 , %46 ], %c0_i16 {in_bounds = [ true ], permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
164164 %48 = vector.transfer_read %arg1 [%46 , %arg4 ], %c0_i16 : memref <?x64 xi16 >, vector <16 xi16 >
165165 %49 = arith.muli %47 , %48 : vector <16 xi16 >
166166 %50 = arith.addi %45 , %49 : vector <16 xi16 >
@@ -169,7 +169,7 @@ func.func @matmul(%arg0: memref<?x64xi16>, %arg1: memref<?x64xi16>, %arg2: memre
169169 // CHECK-SAME: {index = 0 : i8, offset = 0 : i32}
170170 // CHECK-SAME: : memref<?x64xi16>, vector<16xi16>
171171 %51 = affine.apply #map10 (%arg5 )
172- %52 = vector.transfer_read %arg0 [%arg3 , %51 ], %c0_i16 {permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
172+ %52 = vector.transfer_read %arg0 [%arg3 , %51 ], %c0_i16 {in_bounds = [ true ], permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
173173 %53 = vector.transfer_read %arg1 [%51 , %arg4 ], %c0_i16 : memref <?x64 xi16 >, vector <16 xi16 >
174174 %54 = arith.muli %52 , %53 : vector <16 xi16 >
175175 %55 = arith.addi %50 , %54 : vector <16 xi16 >
@@ -182,7 +182,7 @@ func.func @matmul(%arg0: memref<?x64xi16>, %arg1: memref<?x64xi16>, %arg2: memre
182182 // CHECK-SAME: {xoffsets = "0x73727170", xoffsets_hi = "0x77767574", xsquare = "0x3120",
183183 // CHECK-SAME: xstart = "0", zoffsets = "0", zoffsets_hi = "0", zstart = "10", zstep = "1"}
184184 %56 = affine.apply #map11 (%arg5 )
185- %57 = vector.transfer_read %arg0 [%arg3 , %56 ], %c0_i16 {permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
185+ %57 = vector.transfer_read %arg0 [%arg3 , %56 ], %c0_i16 {in_bounds = [ true ], permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
186186 %58 = vector.transfer_read %arg1 [%56 , %arg4 ], %c0_i16 : memref <?x64 xi16 >, vector <16 xi16 >
187187 %59 = arith.muli %57 , %58 : vector <16 xi16 >
188188 %60 = arith.addi %55 , %59 : vector <16 xi16 >
@@ -191,7 +191,7 @@ func.func @matmul(%arg0: memref<?x64xi16>, %arg1: memref<?x64xi16>, %arg2: memre
191191 // CHECK-SAME: {index = 0 : i8, offset = 0 : i32}
192192 // CHECK-SAME: : memref<?x64xi16>, vector<16xi16>
193193 %61 = affine.apply #map12 (%arg5 )
194- %62 = vector.transfer_read %arg0 [%arg3 , %61 ], %c0_i16 {permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
194+ %62 = vector.transfer_read %arg0 [%arg3 , %61 ], %c0_i16 {in_bounds = [ true ], permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
195195 %63 = vector.transfer_read %arg1 [%61 , %arg4 ], %c0_i16 : memref <?x64 xi16 >, vector <16 xi16 >
196196 %64 = arith.muli %62 , %63 : vector <16 xi16 >
197197 %65 = arith.addi %60 , %64 : vector <16 xi16 >
@@ -204,7 +204,7 @@ func.func @matmul(%arg0: memref<?x64xi16>, %arg1: memref<?x64xi16>, %arg2: memre
204204 // CHECK-SAME: {xoffsets = "0x73727170", xoffsets_hi = "0x77767574", xsquare = "0x3120",
205205 // CHECK-SAME: xstart = "0", zoffsets = "0", zoffsets_hi = "0", zstart = "12", zstep = "1"}
206206 %66 = affine.apply #map13 (%arg5 )
207- %67 = vector.transfer_read %arg0 [%arg3 , %66 ], %c0_i16 {permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
207+ %67 = vector.transfer_read %arg0 [%arg3 , %66 ], %c0_i16 {in_bounds = [ true ], permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
208208 %68 = vector.transfer_read %arg1 [%66 , %arg4 ], %c0_i16 : memref <?x64 xi16 >, vector <16 xi16 >
209209 %69 = arith.muli %67 , %68 : vector <16 xi16 >
210210 %70 = arith.addi %65 , %69 : vector <16 xi16 >
@@ -213,7 +213,7 @@ func.func @matmul(%arg0: memref<?x64xi16>, %arg1: memref<?x64xi16>, %arg2: memre
213213 // CHECK-SAME: {index = 0 : i8, offset = 0 : i32}
214214 // CHECK-SAME: : memref<?x64xi16>, vector<16xi16>
215215 %71 = affine.apply #map14 (%arg5 )
216- %72 = vector.transfer_read %arg0 [%arg3 , %71 ], %c0_i16 {permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
216+ %72 = vector.transfer_read %arg0 [%arg3 , %71 ], %c0_i16 {in_bounds = [ true ], permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
217217 %73 = vector.transfer_read %arg1 [%71 , %arg4 ], %c0_i16 : memref <?x64 xi16 >, vector <16 xi16 >
218218 %74 = arith.muli %72 , %73 : vector <16 xi16 >
219219 %75 = arith.addi %70 , %74 : vector <16 xi16 >
@@ -227,14 +227,14 @@ func.func @matmul(%arg0: memref<?x64xi16>, %arg1: memref<?x64xi16>, %arg2: memre
227227 // CHECK-SAME: xstart = "0", zoffsets = "0", zoffsets_hi = "0", zstart = "14", zstep = "1"}
228228 // CHECK: %[[ACC:.*]] = aievec.srs %[[ACCk14]], %[[C0I32]] : vector<16xi48>, i32, vector<16xi16>
229229 %76 = affine.apply #map15 (%arg5 )
230- %77 = vector.transfer_read %arg0 [%arg3 , %76 ], %c0_i16 {permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
230+ %77 = vector.transfer_read %arg0 [%arg3 , %76 ], %c0_i16 {in_bounds = [ true ], permutation_map = #map } : memref <?x64 xi16 >, vector <16 xi16 >
231231 %78 = vector.transfer_read %arg1 [%76 , %arg4 ], %c0_i16 : memref <?x64 xi16 >, vector <16 xi16 >
232232 %79 = arith.muli %77 , %78 : vector <16 xi16 >
233233 %80 = arith.addi %75 , %79 : vector <16 xi16 >
234234 // CHECK: scf.yield %[[ACC]] : vector<16xi16>
235235 affine.yield %80 : vector <16 xi16 >
236236 }
237- // CHECK: vector.transfer_write %[[ACCn]], %[[MC]][%[[I]], %[[J]]] {in_bounds = [true]} : vector<16xi16>, memref<?x64xi16>
237+ // CHECK: vector.transfer_write %[[ACCn]], %[[MC]][%[[I]], %[[J]]] : vector<16xi16>, memref<?x64xi16>
238238 vector.transfer_write %1 , %arg2 [%arg3 , %arg4 ] : vector <16 xi16 >, memref <?x64 xi16 >
239239 }
240240 }
0 commit comments