@@ -378,3 +378,76 @@ func.func @vector_broadcast_with_tailing_unit_dim(%v: vector<4x1xf32>) -> vector
378378// CHECK: [[b3:%.+]] = vector.broadcast [[s3]] : vector<2x1xf32> to vector<2x2xf32>
379379// CHECK: [[r3:%.+]] = vector.insert_strided_slice [[b3]], [[r2]] {offsets = [2, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
380380// CHECK: return [[r3]] : vector<4x4xf32>
381+
382+
383+ // CHECK-LABEL: func.func @unroll_2D_vector_load(
384+ // CHECK-SAME: %[[ARG:.*]]: memref<4x4xf16>) -> vector<4x4xf16> {
385+ func.func @unroll_2D_vector_load (%arg0: memref <4 x4 xf16 >) -> vector <4 x4 xf16 > {
386+ // CHECK: %[[C3:.*]] = arith.constant 3 : index
387+ // CHECK: %[[C2:.*]] = arith.constant 2 : index
388+ // CHECK: %[[C1:.*]] = arith.constant 1 : index
389+ // CHECK: %[[C0:.*]] = arith.constant 0 : index
390+ // CHECK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<4x4xf16>
391+ // CHECK: %[[V0:.*]] = vector.load %[[ARG]][%[[C0]], %[[C0]]] : memref<4x4xf16>, vector<4xf16>
392+ // CHECK: %[[V1:.*]] = vector.insert_strided_slice %[[V0]], %[[CST]] {offsets = [0, 0], strides = [1]} : vector<4xf16> into vector<4x4xf16>
393+ // CHECK: %[[V2:.*]] = vector.load %[[ARG]][%[[C1]], %[[C0]]] : memref<4x4xf16>, vector<4xf16>
394+ // CHECK: %[[V3:.*]] = vector.insert_strided_slice %[[V2]], %[[V1]] {offsets = [1, 0], strides = [1]} : vector<4xf16> into vector<4x4xf16>
395+ // CHECK: %[[V4:.*]] = vector.load %[[ARG]][%[[C2]], %[[C0]]] : memref<4x4xf16>, vector<4xf16>
396+ // CHECK: %[[V5:.*]] = vector.insert_strided_slice %[[V4]], %[[V3]] {offsets = [2, 0], strides = [1]} : vector<4xf16> into vector<4x4xf16>
397+ // CHECK: %[[V6:.*]] = vector.load %[[ARG]][%[[C3]], %[[C0]]] : memref<4x4xf16>, vector<4xf16>
398+ // CHECK: %[[V7:.*]] = vector.insert_strided_slice %[[V6]], %[[V5]] {offsets = [3, 0], strides = [1]} : vector<4xf16> into vector<4x4xf16>
399+ // CHECK: return %[[V7]] : vector<4x4xf16>
400+ %c0 = arith.constant 0 : index
401+ %0 = vector.load %arg0 [%c0 , %c0 ] : memref <4 x4 xf16 >, vector <4 x4 xf16 >
402+ return %0 : vector <4 x4 xf16 >
403+ }
404+
405+ // CHECK-LABEL: func.func @unroll_2D_vector_store(
406+ // CHECK-SAME: %[[ARG0:.*]]: memref<4x4xf16>, %[[ARG1:.*]]: vector<4x4xf16>) {
407+ func.func @unroll_2D_vector_store (%arg0: memref <4 x4 xf16 >, %arg1: vector <4 x4 xf16 >) {
408+ // CHECK: %[[C3:.*]] = arith.constant 3 : index
409+ // CHECK: %[[C2:.*]] = arith.constant 2 : index
410+ // CHECK: %[[C1:.*]] = arith.constant 1 : index
411+ // CHECK: %[[C0:.*]] = arith.constant 0 : index
412+ // CHECK: %[[V0:.*]] = vector.extract %[[ARG1]][0] : vector<4xf16> from vector<4x4xf16>
413+ // CHECK: vector.store %[[V0]], %[[ARG0]][%[[C0]], %[[C0]]] : memref<4x4xf16>, vector<4xf16>
414+ // CHECK: %[[V1:.*]] = vector.extract %[[ARG1]][1] : vector<4xf16> from vector<4x4xf16>
415+ // CHECK: vector.store %[[V1]], %[[ARG0]][%[[C1]], %[[C0]]] : memref<4x4xf16>, vector<4xf16>
416+ // CHECK: %[[V2:.*]] = vector.extract %[[ARG1]][2] : vector<4xf16> from vector<4x4xf16>
417+ // CHECK: vector.store %[[V2]], %[[ARG0]][%[[C2]], %[[C0]]] : memref<4x4xf16>, vector<4xf16>
418+ // CHECK: %[[V3:.*]] = vector.extract %[[ARG1]][3] : vector<4xf16> from vector<4x4xf16>
419+ // CHECK: vector.store %[[V3]], %[[ARG0]][%[[C3]], %[[C0]]] : memref<4x4xf16>, vector<4xf16>
420+ %c0 = arith.constant 0 : index
421+ vector.store %arg1 , %arg0 [%c0 , %c0 ] : memref <4 x4 xf16 >, vector <4 x4 xf16 >
422+ return
423+ }
424+
425+ // CHECK-LABEL: func.func @unroll_vector_load(
426+ // CHECK-SAME: %[[ARG:.*]]: memref<4x4x4x4xf16>) -> vector<2x2xf16> {
427+ func.func @unroll_vector_load (%arg0: memref <4 x4 x4 x4 xf16 >) -> vector <2 x2 xf16 > {
428+ // CHECK: %[[C2:.*]] = arith.constant 2 : index
429+ // CHECK: %[[C1:.*]] = arith.constant 1 : index
430+ // CHECK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<2x2xf16>
431+ // CHECK: %[[V0:.*]] = vector.load %[[ARG]][%[[C1]], %[[C1]], %[[C1]], %[[C1]]] : memref<4x4x4x4xf16>, vector<2xf16>
432+ // CHECK: %[[V1:.*]] = vector.insert_strided_slice %[[V0]], %[[CST]] {offsets = [0, 0], strides = [1]} : vector<2xf16> into vector<2x2xf16>
433+ // CHECK: %[[V2:.*]] = vector.load %[[ARG]][%[[C1]], %[[C1]], %[[C2]], %[[C1]]] : memref<4x4x4x4xf16>, vector<2xf16>
434+ // CHECK: %[[V3:.*]] = vector.insert_strided_slice %[[V2]], %[[V1]] {offsets = [1, 0], strides = [1]} : vector<2xf16> into vector<2x2xf16>
435+ // CHECK: return %[[V3]] : vector<2x2xf16>
436+ %c1 = arith.constant 1 : index
437+ %0 = vector.load %arg0 [%c1 , %c1 , %c1 , %c1 ] : memref <4 x4 x4 x4 xf16 >, vector <2 x2 xf16 >
438+ return %0 : vector <2 x2 xf16 >
439+ }
440+
441+ // CHECK-LABEL: func.func @unroll_vector_store(
442+ // CHECK-SAME: %[[ARG0:.*]]: memref<4x4x4x4xf16>, %[[ARG1:.*]]: vector<2x2xf16>) {
443+ func.func @unroll_vector_store (%arg0: memref <4 x4 x4 x4 xf16 >, %arg1: vector <2 x2 xf16 >) {
444+ // CHECK: %[[C2:.*]] = arith.constant 2 : index
445+ // CHECK: %[[C1:.*]] = arith.constant 1 : index
446+ // CHECK: %[[V0:.*]] = vector.extract %[[ARG1]][0] : vector<2xf16> from vector<2x2xf16>
447+ // CHECK: vector.store %[[V0]], %[[ARG0]][%[[C1]], %[[C1]], %[[C1]], %[[C1]]] : memref<4x4x4x4xf16>, vector<2xf16>
448+ // CHECK: %[[V1:.*]] = vector.extract %[[ARG1]][1] : vector<2xf16> from vector<2x2xf16>
449+ // CHECK: vector.store %[[V1]], %[[ARG0]][%[[C1]], %[[C1]], %[[C2]], %[[C1]]] : memref<4x4x4x4xf16>, vector<2xf16>
450+ %c1 = arith.constant 1 : index
451+ vector.store %arg1 , %arg0 [%c1 , %c1 , %c1 , %c1 ] : memref <4 x4 x4 x4 xf16 >, vector <2 x2 xf16 >
452+ return
453+ }
0 commit comments