@@ -2800,6 +2800,88 @@ exit:
28002800 ret i64 %r.0.lcssa
28012801}
28022802
2803+ define i32 @reduction_expression_ext_mulacc_livein (ptr %a , i16 %c ) {
2804+ ; CHECK-LABEL: define i32 @reduction_expression_ext_mulacc_livein(
2805+ ; CHECK-SAME: ptr [[A:%.*]], i16 [[C:%.*]]) {
2806+ ; CHECK-NEXT: [[ENTRY:.*:]]
2807+ ; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
2808+ ; CHECK: [[VECTOR_PH]]:
2809+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[C]], i64 0
2810+ ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
2811+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
2812+ ; CHECK: [[VECTOR_BODY]]:
2813+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
2814+ ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
2815+ ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
2816+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
2817+ ; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i16>
2818+ ; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i16> [[BROADCAST_SPLAT]], [[TMP1]]
2819+ ; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32>
2820+ ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3]])
2821+ ; CHECK-NEXT: [[TMP5]] = add i32 [[VEC_PHI]], [[TMP4]]
2822+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
2823+ ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
2824+ ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
2825+ ; CHECK: [[MIDDLE_BLOCK]]:
2826+ ; CHECK-NEXT: br label %[[FOR_EXIT:.*]]
2827+ ; CHECK: [[FOR_EXIT]]:
2828+ ; CHECK-NEXT: ret i32 [[TMP5]]
2829+ ;
2830+ ; CHECK-INTERLEAVED-LABEL: define i32 @reduction_expression_ext_mulacc_livein(
2831+ ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], i16 [[C:%.*]]) {
2832+ ; CHECK-INTERLEAVED-NEXT: [[ENTRY:.*:]]
2833+ ; CHECK-INTERLEAVED-NEXT: br label %[[VECTOR_PH:.*]]
2834+ ; CHECK-INTERLEAVED: [[VECTOR_PH]]:
2835+ ; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[C]], i64 0
2836+ ; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
2837+ ; CHECK-INTERLEAVED-NEXT: br label %[[VECTOR_BODY:.*]]
2838+ ; CHECK-INTERLEAVED: [[VECTOR_BODY]]:
2839+ ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
2840+ ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
2841+ ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
2842+ ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
2843+ ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4
2844+ ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
2845+ ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
2846+ ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i16>
2847+ ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[WIDE_LOAD2]] to <4 x i16>
2848+ ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul <4 x i16> [[BROADCAST_SPLAT]], [[TMP2]]
2849+ ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = mul <4 x i16> [[BROADCAST_SPLAT]], [[TMP3]]
2850+ ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32>
2851+ ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP6]])
2852+ ; CHECK-INTERLEAVED-NEXT: [[TMP8]] = add i32 [[VEC_PHI]], [[TMP7]]
2853+ ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = zext <4 x i16> [[TMP5]] to <4 x i32>
2854+ ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP9]])
2855+ ; CHECK-INTERLEAVED-NEXT: [[TMP11]] = add i32 [[VEC_PHI1]], [[TMP10]]
2856+ ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
2857+ ; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
2858+ ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
2859+ ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]:
2860+ ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP11]], [[TMP8]]
2861+ ; CHECK-INTERLEAVED-NEXT: br label %[[FOR_EXIT:.*]]
2862+ ; CHECK-INTERLEAVED: [[FOR_EXIT]]:
2863+ ; CHECK-INTERLEAVED-NEXT: ret i32 [[BIN_RDX]]
2864+ ;
2865+ entry:
2866+ br label %for.body
2867+
2868+ for.body: ; preds = %for.body, %entry
2869+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
2870+ %accum = phi i32 [ 0 , %entry ], [ %add , %for.body ]
2871+ %gep.a = getelementptr i8 , ptr %a , i64 %iv
2872+ %load.a = load i8 , ptr %gep.a , align 1
2873+ %ext.a = zext i8 %load.a to i16
2874+ %mul = mul i16 %c , %ext.a
2875+ %mul.ext = zext i16 %mul to i32
2876+ %add = add i32 %mul.ext , %accum
2877+ %iv.next = add i64 %iv , 1
2878+ %exitcond.not = icmp eq i64 %iv.next , 1024
2879+ br i1 %exitcond.not , label %for.exit , label %for.body
2880+
2881+ for.exit: ; preds = %for.body
2882+ ret i32 %add
2883+ }
2884+
28032885declare float @llvm.fmuladd.f32 (float , float , float )
28042886
28052887!6 = distinct !{!6 , !7 , !8 }
0 commit comments