@@ -98,23 +98,23 @@ define void @dotp_small_epilogue_vf(i64 %idx.neg, i8 %a) #1 {
98
98
; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0
99
99
; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT6]], <4 x i8> poison, <4 x i32> zeroinitializer
100
100
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[IDX_NEG]], [[N_VEC5]]
101
- ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <1 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0
101
+ ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0
102
102
; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i8> [[BROADCAST_SPLAT7]] to <4 x i32>
103
103
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
104
104
; CHECK: vec.epilog.vector.body:
105
105
; CHECK-NEXT: [[INDEX9:%.*]] = phi i64 [ [[IV]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
106
- ; CHECK-NEXT: [[VEC_PHI9:%.*]] = phi <1 x i32> [ [[TMP10]], [[VEC_EPILOG_PH]] ], [ [[PARTIAL_REDUCE12 :%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
106
+ ; CHECK-NEXT: [[VEC_PHI9:%.*]] = phi <4 x i32> [ [[TMP10]], [[VEC_EPILOG_PH]] ], [ [[TMP13 :%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
107
107
; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr null, align 1
108
108
; CHECK-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <4 x i8> poison, i8 [[TMP9]], i64 0
109
109
; CHECK-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT10]], <4 x i8> poison, <4 x i32> zeroinitializer
110
110
; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i8> [[BROADCAST_SPLAT11]] to <4 x i32>
111
111
; CHECK-NEXT: [[TMP14:%.*]] = mul <4 x i32> [[TMP11]], [[TMP8]]
112
- ; CHECK-NEXT: [[PARTIAL_REDUCE12 ]] = call <1 x i32> @llvm.vector.partial.reduce.add.v1i32.v4i32(<1 x i32> [[VEC_PHI9 ]], <4 x i32> [[TMP14]])
112
+ ; CHECK-NEXT: [[TMP13 ]] = add <4 x i32> [[TMP14 ]], [[VEC_PHI9]]
113
113
; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX9]], 4
114
114
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC5]]
115
115
; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
116
116
; CHECK: vec.epilog.middle.block:
117
- ; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> [[PARTIAL_REDUCE12 ]])
117
+ ; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP13 ]])
118
118
; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]]
119
119
; CHECK-NEXT: br i1 [[CMP_N15]], label [[WHILE_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
120
120
; CHECK: vec.epilog.scalar.ph:
0 commit comments