@@ -497,94 +497,60 @@ func.func @elementwise_4D_to_2D(%v1: vector<2x2x2x2xf32>, %v2: vector<2x2x2x2xf3
497497// CHECK-NOT: arith.addf
498498// CHECK: return
499499
500- //CHECK-LABEL: func @shape_cast_1D_to_2D
501- // CHECK-SAME: (%[[ARG0:.*]]: vector<16xf32>) -> vector<4x4xf32>
502- // CHECK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<4x4xf32>
500+ func.func @shape_cast_1D_to_2D (%v: vector <8 xf32 >) -> vector <4 x2 xf32 > {
501+ %0 = vector.shape_cast %v : vector <8 xf32 > to vector <4 x2 xf32 >
502+ return %0 : vector <4 x2 xf32 >
503+ }
504+
505+ // CHECK-LABEL: func @shape_cast_1D_to_2D
506+ // CHECK-SAME: (%[[ARG0:.*]]: vector<8xf32>) -> vector<4x2xf32>
507+ // CHECK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<4x2xf32>
503508// CHECK: %[[CST_0:.*]] = arith.constant dense<0.000000e+00> : vector<2x2xf32>
504- // CHECK: %[[E0:.*]] = vector.extract %[[ARG0]][0] : f32 from vector<16xf32 >
509+ // CHECK: %[[E0:.*]] = vector.extract %[[ARG0]][0] : f32 from vector<8xf32 >
505510// CHECK: %[[INS0:.*]] = vector.insert %[[E0]], %[[CST_0]] [0, 0] : f32 into vector<2x2xf32>
506- // CHECK: %[[E1:.*]] = vector.extract %[[ARG0]][1] : f32 from vector<16xf32 >
511+ // CHECK: %[[E1:.*]] = vector.extract %[[ARG0]][1] : f32 from vector<8xf32 >
507512// CHECK: %[[INS1:.*]] = vector.insert %[[E1]], %[[INS0]] [0, 1] : f32 into vector<2x2xf32>
508- // CHECK: %[[E2:.*]] = vector.extract %[[ARG0]][4 ] : f32 from vector<16xf32 >
513+ // CHECK: %[[E2:.*]] = vector.extract %[[ARG0]][2 ] : f32 from vector<8xf32 >
509514// CHECK: %[[INS2:.*]] = vector.insert %[[E2]], %[[INS1]] [1, 0] : f32 into vector<2x2xf32>
510- // CHECK: %[[E3:.*]] = vector.extract %[[ARG0]][5 ] : f32 from vector<16xf32 >
515+ // CHECK: %[[E3:.*]] = vector.extract %[[ARG0]][3 ] : f32 from vector<8xf32 >
511516// CHECK: %[[V0:.*]] = vector.insert %[[E3]], %[[INS2]] [1, 1] : f32 into vector<2x2xf32>
512- // CHECK: %[[I0:.*]] = vector.insert_strided_slice %[[V0]], %[[CST]] {offsets = [0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32 >
513- // CHECK: %[[E4:.*]] = vector.extract %[[ARG0]][2 ] : f32 from vector<16xf32 >
517+ // CHECK: %[[I0:.*]] = vector.insert_strided_slice %[[V0]], %[[CST]] {offsets = [0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x2xf32 >
518+ // CHECK: %[[E4:.*]] = vector.extract %[[ARG0]][4 ] : f32 from vector<8xf32 >
514519// CHECK: %[[INS3:.*]] = vector.insert %[[E4]], %[[CST_0]] [0, 0] : f32 into vector<2x2xf32>
515- // CHECK: %[[E5:.*]] = vector.extract %[[ARG0]][3 ] : f32 from vector<16xf32 >
520+ // CHECK: %[[E5:.*]] = vector.extract %[[ARG0]][5 ] : f32 from vector<8xf32 >
516521// CHECK: %[[INS4:.*]] = vector.insert %[[E5]], %[[INS3]] [0, 1] : f32 into vector<2x2xf32>
517- // CHECK: %[[E6:.*]] = vector.extract %[[ARG0]][6] : f32 from vector<16xf32 >
522+ // CHECK: %[[E6:.*]] = vector.extract %[[ARG0]][6] : f32 from vector<8xf32 >
518523// CHECK: %[[INS5:.*]] = vector.insert %[[E6]], %[[INS4]] [1, 0] : f32 into vector<2x2xf32>
519- // CHECK: %[[E7:.*]] = vector.extract %[[ARG0]][7] : f32 from vector<16xf32 >
524+ // CHECK: %[[E7:.*]] = vector.extract %[[ARG0]][7] : f32 from vector<8xf32 >
520525// CHECK: %[[V1:.*]] = vector.insert %[[E7]], %[[INS5]] [1, 1] : f32 into vector<2x2xf32>
521- // CHECK: %[[I1:.*]] = vector.insert_strided_slice %[[V1]], %[[I0]] {offsets = [0, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
522- // CHECK: %[[E8:.*]] = vector.extract %[[ARG0]][8] : f32 from vector<16xf32>
523- // CHECK: %[[INS6:.*]] = vector.insert %[[E8]], %[[CST_0]] [0, 0] : f32 into vector<2x2xf32>
524- // CHECK: %[[E9:.*]] = vector.extract %[[ARG0]][9] : f32 from vector<16xf32>
525- // CHECK: %[[INS7:.*]] = vector.insert %[[E9]], %[[INS6]] [0, 1] : f32 into vector<2x2xf32>
526- // CHECK: %[[E10:.*]] = vector.extract %[[ARG0]][12] : f32 from vector<16xf32>
527- // CHECK: %[[INS8:.*]] = vector.insert %[[E10]], %[[INS7]] [1, 0] : f32 into vector<2x2xf32>
528- // CHECK: %[[E11:.*]] = vector.extract %[[ARG0]][13] : f32 from vector<16xf32>
529- // CHECK: %[[V2:.*]] = vector.insert %[[E11]], %[[INS8]] [1, 1] : f32 into vector<2x2xf32>
530- // CHECK: %[[I2:.*]] = vector.insert_strided_slice %[[V2]], %[[I1]] {offsets = [2, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
531- // CHECK: %[[E12:.*]] = vector.extract %[[ARG0]][10] : f32 from vector<16xf32>
532- // CHECK: %[[INS9:.*]] = vector.insert %[[E12]], %[[CST_0]] [0, 0] : f32 into vector<2x2xf32>
533- // CHECK: %[[E13:.*]] = vector.extract %[[ARG0]][11] : f32 from vector<16xf32>
534- // CHECK: %[[INS10:.*]] = vector.insert %[[E13]], %[[INS9]] [0, 1] : f32 into vector<2x2xf32>
535- // CHECK: %[[E14:.*]] = vector.extract %[[ARG0]][14] : f32 from vector<16xf32>
536- // CHECK: %[[INS11:.*]] = vector.insert %[[E14]], %[[INS10]] [1, 0] : f32 into vector<2x2xf32>
537- // CHECK: %[[E15:.*]] = vector.extract %[[ARG0]][15] : f32 from vector<16xf32>
538- // CHECK: %[[V3:.*]] = vector.insert %[[E15]], %[[INS11]] [1, 1] : f32 into vector<2x2xf32>
539- // CHECK: %[[I3:.*]] = vector.insert_strided_slice %[[V3]], %[[I2]] {offsets = [2, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
540- // CHECK: return %[[I3]] : vector<4x4xf32>
541- func.func @shape_cast_1D_to_2D (%v: vector <16 xf32 >) -> vector <4 x4 xf32 > {
542- %0 = vector.shape_cast %v : vector <16 xf32 > to vector <4 x4 xf32 >
543- return %0 : vector <4 x4 xf32 >
526+ // CHECK: %[[I1:.*]] = vector.insert_strided_slice %[[V1]], %[[I0]] {offsets = [2, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x2xf32>
527+ // CHECK: return %[[I1]] : vector<4x2xf32>
528+
529+ func.func @shape_cast_2D (%v: vector <2 x4 xf32 >) -> vector <4 x2 xf32 > {
530+ %0 = vector.shape_cast %v : vector <2 x4 xf32 > to vector <4 x2 xf32 >
531+ return %0 : vector <4 x2 xf32 >
544532}
545533
546- //CHECK-LABEL: func @shape_cast_2D
547- // CHECK-SAME: (%[[ARG0:.*]]: vector<2x8xf32 >) -> vector<4x4xf32 >
548- // CHECK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<4x4xf32 >
534+ // CHECK-LABEL: func @shape_cast_2D
535+ // CHECK-SAME: (%[[ARG0:.*]]: vector<2x4xf32 >) -> vector<4x2xf32 >
536+ // CHECK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<4x2xf32 >
549537// CHECK: %[[CST_0:.*]] = arith.constant dense<0.000000e+00> : vector<2x2xf32>
550- // CHECK: %[[E0:.*]] = vector.extract %[[ARG0]][0, 0] : f32 from vector<2x8xf32 >
538+ // CHECK: %[[E0:.*]] = vector.extract %[[ARG0]][0, 0] : f32 from vector<2x4xf32 >
551539// CHECK: %[[INS0:.*]] = vector.insert %[[E0]], %[[CST_0]] [0, 0] : f32 into vector<2x2xf32>
552- // CHECK: %[[E1:.*]] = vector.extract %[[ARG0]][0, 1] : f32 from vector<2x8xf32 >
540+ // CHECK: %[[E1:.*]] = vector.extract %[[ARG0]][0, 1] : f32 from vector<2x4xf32 >
553541// CHECK: %[[INS1:.*]] = vector.insert %[[E1]], %[[INS0]] [0, 1] : f32 into vector<2x2xf32>
554- // CHECK: %[[E2:.*]] = vector.extract %[[ARG0]][0, 4 ] : f32 from vector<2x8xf32 >
542+ // CHECK: %[[E2:.*]] = vector.extract %[[ARG0]][0, 2 ] : f32 from vector<2x4xf32 >
555543// CHECK: %[[INS2:.*]] = vector.insert %[[E2]], %[[INS1]] [1, 0] : f32 into vector<2x2xf32>
556- // CHECK: %[[E3:.*]] = vector.extract %[[ARG0]][0, 5 ] : f32 from vector<2x8xf32 >
544+ // CHECK: %[[E3:.*]] = vector.extract %[[ARG0]][0, 3 ] : f32 from vector<2x4xf32 >
557545// CHECK: %[[V0:.*]] = vector.insert %[[E3]], %[[INS2]] [1, 1] : f32 into vector<2x2xf32>
558- // CHECK: %[[I0:.*]] = vector.insert_strided_slice %[[V0]], %[[CST]] {offsets = [0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32 >
559- // CHECK: %[[E4:.*]] = vector.extract %[[ARG0]][0, 2 ] : f32 from vector<2x8xf32 >
546+ // CHECK: %[[I0:.*]] = vector.insert_strided_slice %[[V0]], %[[CST]] {offsets = [0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x2xf32 >
547+ // CHECK: %[[E4:.*]] = vector.extract %[[ARG0]][1, 0 ] : f32 from vector<2x4xf32 >
560548// CHECK: %[[INS3:.*]] = vector.insert %[[E4]], %[[CST_0]] [0, 0] : f32 into vector<2x2xf32>
561- // CHECK: %[[E5:.*]] = vector.extract %[[ARG0]][0, 3 ] : f32 from vector<2x8xf32 >
549+ // CHECK: %[[E5:.*]] = vector.extract %[[ARG0]][1, 1 ] : f32 from vector<2x4xf32 >
562550// CHECK: %[[INS4:.*]] = vector.insert %[[E5]], %[[INS3]] [0, 1] : f32 into vector<2x2xf32>
563- // CHECK: %[[E6:.*]] = vector.extract %[[ARG0]][0, 6 ] : f32 from vector<2x8xf32 >
551+ // CHECK: %[[E6:.*]] = vector.extract %[[ARG0]][1, 2 ] : f32 from vector<2x4xf32 >
564552// CHECK: %[[INS5:.*]] = vector.insert %[[E6]], %[[INS4]] [1, 0] : f32 into vector<2x2xf32>
565- // CHECK: %[[E7:.*]] = vector.extract %[[ARG0]][0, 7 ] : f32 from vector<2x8xf32 >
553+ // CHECK: %[[E7:.*]] = vector.extract %[[ARG0]][1, 3 ] : f32 from vector<2x4xf32 >
566554// CHECK: %[[V1:.*]] = vector.insert %[[E7]], %[[INS5]] [1, 1] : f32 into vector<2x2xf32>
567- // CHECK: %[[I1:.*]] = vector.insert_strided_slice %[[V1]], %[[I0]] {offsets = [0, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
568- // CHECK: %[[E8:.*]] = vector.extract %[[ARG0]][1, 0] : f32 from vector<2x8xf32>
569- // CHECK: %[[INS6:.*]] = vector.insert %[[E8]], %[[CST_0]] [0, 0] : f32 into vector<2x2xf32>
570- // CHECK: %[[E9:.*]] = vector.extract %[[ARG0]][1, 1] : f32 from vector<2x8xf32>
571- // CHECK: %[[INS7:.*]] = vector.insert %[[E9]], %[[INS6]] [0, 1] : f32 into vector<2x2xf32>
572- // CHECK: %[[E10:.*]] = vector.extract %[[ARG0]][1, 4] : f32 from vector<2x8xf32>
573- // CHECK: %[[INS8:.*]] = vector.insert %[[E10]], %[[INS7]] [1, 0] : f32 into vector<2x2xf32>
574- // CHECK: %[[E11:.*]] = vector.extract %[[ARG0]][1, 5] : f32 from vector<2x8xf32>
575- // CHECK: %[[V2:.*]] = vector.insert %[[E11]], %[[INS8]] [1, 1] : f32 into vector<2x2xf32>
576- // CHECK: %[[I2:.*]] = vector.insert_strided_slice %[[V2]], %[[I1]] {offsets = [2, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
577- // CHECK: %[[E12:.*]] = vector.extract %[[ARG0]][1, 2] : f32 from vector<2x8xf32>
578- // CHECK: %[[INS9:.*]] = vector.insert %[[E12]], %[[CST_0]] [0, 0] : f32 into vector<2x2xf32>
579- // CHECK: %[[E13:.*]] = vector.extract %[[ARG0]][1, 3] : f32 from vector<2x8xf32>
580- // CHECK: %[[INS10:.*]] = vector.insert %[[E13]], %[[INS9]] [0, 1] : f32 into vector<2x2xf32>
581- // CHECK: %[[E14:.*]] = vector.extract %[[ARG0]][1, 6] : f32 from vector<2x8xf32>
582- // CHECK: %[[INS11:.*]] = vector.insert %[[E14]], %[[INS10]] [1, 0] : f32 into vector<2x2xf32>
583- // CHECK: %[[E15:.*]] = vector.extract %[[ARG0]][1, 7] : f32 from vector<2x8xf32>
584- // CHECK: %[[V3:.*]] = vector.insert %[[E15]], %[[INS11]] [1, 1] : f32 into vector<2x2xf32>
585- // CHECK: %[[I3:.*]] = vector.insert_strided_slice %[[V3]], %[[I2]] {offsets = [2, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
586- // CHECK: return %[[I3]] : vector<4x4xf32>
587- func.func @shape_cast_2D (%v: vector <2 x8 xf32 >) -> vector <4 x4 xf32 > {
588- %0 = vector.shape_cast %v : vector <2 x8 xf32 > to vector <4 x4 xf32 >
589- return %0 : vector <4 x4 xf32 >
590- }
555+ // CHECK: %[[I1:.*]] = vector.insert_strided_slice %[[V1]], %[[I0]] {offsets = [2, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x2xf32>
556+ // CHECK: return %[[I1]] : vector<4x2xf32>
0 commit comments