@@ -2800,6 +2800,88 @@ exit:
28002800  ret  i64  %r.0.lcssa 
28012801}
28022802
2803+ define  i32  @reduction_expression_ext_mulacc_livein (ptr  %a , i16  %c ) {
2804+ ; CHECK-LABEL: define i32 @reduction_expression_ext_mulacc_livein( 
2805+ ; CHECK-SAME: ptr [[A:%.*]], i16 [[C:%.*]]) { 
2806+ ; CHECK-NEXT:  [[ENTRY:.*:]] 
2807+ ; CHECK-NEXT:    br label %[[VECTOR_PH:.*]] 
2808+ ; CHECK:       [[VECTOR_PH]]: 
2809+ ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[C]], i64 0 
2810+ ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer 
2811+ ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]] 
2812+ ; CHECK:       [[VECTOR_BODY]]: 
2813+ ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 
2814+ ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] 
2815+ ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] 
2816+ ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 
2817+ ; CHECK-NEXT:    [[TMP1:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i16> 
2818+ ; CHECK-NEXT:    [[TMP2:%.*]] = mul <4 x i16> [[BROADCAST_SPLAT]], [[TMP1]] 
2819+ ; CHECK-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> 
2820+ ; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3]]) 
2821+ ; CHECK-NEXT:    [[TMP5]] = add i32 [[VEC_PHI]], [[TMP4]] 
2822+ ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 
2823+ ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 
2824+ ; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] 
2825+ ; CHECK:       [[MIDDLE_BLOCK]]: 
2826+ ; CHECK-NEXT:    br label %[[FOR_EXIT:.*]] 
2827+ ; CHECK:       [[FOR_EXIT]]: 
2828+ ; CHECK-NEXT:    ret i32 [[TMP5]] 
2829+ ; 
2830+ ; CHECK-INTERLEAVED-LABEL: define i32 @reduction_expression_ext_mulacc_livein( 
2831+ ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], i16 [[C:%.*]]) { 
2832+ ; CHECK-INTERLEAVED-NEXT:  [[ENTRY:.*:]] 
2833+ ; CHECK-INTERLEAVED-NEXT:    br label %[[VECTOR_PH:.*]] 
2834+ ; CHECK-INTERLEAVED:       [[VECTOR_PH]]: 
2835+ ; CHECK-INTERLEAVED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[C]], i64 0 
2836+ ; CHECK-INTERLEAVED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer 
2837+ ; CHECK-INTERLEAVED-NEXT:    br label %[[VECTOR_BODY:.*]] 
2838+ ; CHECK-INTERLEAVED:       [[VECTOR_BODY]]: 
2839+ ; CHECK-INTERLEAVED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 
2840+ ; CHECK-INTERLEAVED-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] 
2841+ ; CHECK-INTERLEAVED-NEXT:    [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] 
2842+ ; CHECK-INTERLEAVED-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] 
2843+ ; CHECK-INTERLEAVED-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4 
2844+ ; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 
2845+ ; CHECK-INTERLEAVED-NEXT:    [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 
2846+ ; CHECK-INTERLEAVED-NEXT:    [[TMP2:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i16> 
2847+ ; CHECK-INTERLEAVED-NEXT:    [[TMP3:%.*]] = zext <4 x i8> [[WIDE_LOAD2]] to <4 x i16> 
2848+ ; CHECK-INTERLEAVED-NEXT:    [[TMP4:%.*]] = mul <4 x i16> [[BROADCAST_SPLAT]], [[TMP2]] 
2849+ ; CHECK-INTERLEAVED-NEXT:    [[TMP5:%.*]] = mul <4 x i16> [[BROADCAST_SPLAT]], [[TMP3]] 
2850+ ; CHECK-INTERLEAVED-NEXT:    [[TMP6:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32> 
2851+ ; CHECK-INTERLEAVED-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP6]]) 
2852+ ; CHECK-INTERLEAVED-NEXT:    [[TMP8]] = add i32 [[VEC_PHI]], [[TMP7]] 
2853+ ; CHECK-INTERLEAVED-NEXT:    [[TMP9:%.*]] = zext <4 x i16> [[TMP5]] to <4 x i32> 
2854+ ; CHECK-INTERLEAVED-NEXT:    [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP9]]) 
2855+ ; CHECK-INTERLEAVED-NEXT:    [[TMP11]] = add i32 [[VEC_PHI1]], [[TMP10]] 
2856+ ; CHECK-INTERLEAVED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 
2857+ ; CHECK-INTERLEAVED-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 
2858+ ; CHECK-INTERLEAVED-NEXT:    br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] 
2859+ ; CHECK-INTERLEAVED:       [[MIDDLE_BLOCK]]: 
2860+ ; CHECK-INTERLEAVED-NEXT:    [[BIN_RDX:%.*]] = add i32 [[TMP11]], [[TMP8]] 
2861+ ; CHECK-INTERLEAVED-NEXT:    br label %[[FOR_EXIT:.*]] 
2862+ ; CHECK-INTERLEAVED:       [[FOR_EXIT]]: 
2863+ ; CHECK-INTERLEAVED-NEXT:    ret i32 [[BIN_RDX]] 
2864+ ; 
2865+ entry:
2866+   br  label  %for.body 
2867+ 
2868+ for.body:                                         ; preds = %for.body, %entry 
2869+   %iv  = phi  i64  [ 0 , %entry  ], [ %iv.next , %for.body  ]
2870+   %accum  = phi  i32  [ 0 , %entry  ], [ %add , %for.body  ]
2871+   %gep.a  = getelementptr  i8 , ptr  %a , i64  %iv 
2872+   %load.a  = load  i8 , ptr  %gep.a , align  1 
2873+   %ext.a  = zext  i8  %load.a  to  i16 
2874+   %mul  = mul  i16  %c , %ext.a 
2875+   %mul.ext  = zext  i16  %mul  to  i32 
2876+   %add  = add  i32  %mul.ext , %accum 
2877+   %iv.next  = add  i64  %iv , 1 
2878+   %exitcond.not  = icmp  eq  i64  %iv.next , 1024 
2879+   br  i1  %exitcond.not , label  %for.exit , label  %for.body 
2880+ 
2881+ for.exit:                        ; preds = %for.body 
2882+   ret  i32  %add 
2883+ }
2884+ 
28032885declare  float  @llvm.fmuladd.f32 (float , float , float )
28042886
28052887!6  = distinct !{!6 , !7 , !8 }
0 commit comments