@@ -709,25 +709,34 @@ define float @dot_product_fp32_reorder(ptr %a, ptr %b) {
709709
710710
711711define double @dot_product_fp64 (ptr %a , ptr %b ) {
712- ; CHECK-LABEL: @dot_product_fp64(
713- ; CHECK-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i32 0
714- ; CHECK-NEXT: [[L_A_0:%.*]] = load double, ptr [[GEP_A_0]], align 4
715- ; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds double, ptr [[A]], i32 1
716- ; CHECK-NEXT: [[L_A_1:%.*]] = load double, ptr [[GEP_A_1]], align 4
717- ; CHECK-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds double, ptr [[A]], i32 2
718- ; CHECK-NEXT: [[L_A_2:%.*]] = load double, ptr [[GEP_A_2]], align 4
719- ; CHECK-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i32 0
720- ; CHECK-NEXT: [[L_B_0:%.*]] = load double, ptr [[GEP_B_0]], align 4
721- ; CHECK-NEXT: [[GEP_B_1:%.*]] = getelementptr inbounds double, ptr [[B]], i32 1
722- ; CHECK-NEXT: [[L_B_1:%.*]] = load double, ptr [[GEP_B_1]], align 4
723- ; CHECK-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds double, ptr [[B]], i32 2
724- ; CHECK-NEXT: [[L_B_2:%.*]] = load double, ptr [[GEP_B_2]], align 4
725- ; CHECK-NEXT: [[MUL_0:%.*]] = fmul fast double [[L_A_0]], [[L_B_0]]
726- ; CHECK-NEXT: [[MUL_1:%.*]] = fmul fast double [[L_A_1]], [[L_B_1]]
727- ; CHECK-NEXT: [[MUL_2:%.*]] = fmul fast double [[L_A_2]], [[L_B_2]]
728- ; CHECK-NEXT: [[ADD_0:%.*]] = fadd fast double [[MUL_0]], [[MUL_1]]
729- ; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast double [[ADD_0]], [[MUL_2]]
730- ; CHECK-NEXT: ret double [[ADD_1]]
712+ ; NON-POW2-LABEL: @dot_product_fp64(
713+ ; NON-POW2-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i32 0
714+ ; NON-POW2-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i32 0
715+ ; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x double>, ptr [[GEP_A_0]], align 4
716+ ; NON-POW2-NEXT: [[TMP2:%.*]] = load <3 x double>, ptr [[GEP_B_0]], align 4
717+ ; NON-POW2-NEXT: [[TMP3:%.*]] = fmul fast <3 x double> [[TMP1]], [[TMP2]]
718+ ; NON-POW2-NEXT: [[TMP4:%.*]] = call fast double @llvm.vector.reduce.fadd.v3f64(double 0.000000e+00, <3 x double> [[TMP3]])
719+ ; NON-POW2-NEXT: ret double [[TMP4]]
720+ ;
721+ ; POW2-ONLY-LABEL: @dot_product_fp64(
722+ ; POW2-ONLY-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i32 0
723+ ; POW2-ONLY-NEXT: [[L_A_0:%.*]] = load double, ptr [[GEP_A_0]], align 4
724+ ; POW2-ONLY-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds double, ptr [[A]], i32 1
725+ ; POW2-ONLY-NEXT: [[L_A_1:%.*]] = load double, ptr [[GEP_A_1]], align 4
726+ ; POW2-ONLY-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds double, ptr [[A]], i32 2
727+ ; POW2-ONLY-NEXT: [[L_A_2:%.*]] = load double, ptr [[GEP_A_2]], align 4
728+ ; POW2-ONLY-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i32 0
729+ ; POW2-ONLY-NEXT: [[L_B_0:%.*]] = load double, ptr [[GEP_B_0]], align 4
730+ ; POW2-ONLY-NEXT: [[GEP_B_1:%.*]] = getelementptr inbounds double, ptr [[B]], i32 1
731+ ; POW2-ONLY-NEXT: [[L_B_1:%.*]] = load double, ptr [[GEP_B_1]], align 4
732+ ; POW2-ONLY-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds double, ptr [[B]], i32 2
733+ ; POW2-ONLY-NEXT: [[L_B_2:%.*]] = load double, ptr [[GEP_B_2]], align 4
734+ ; POW2-ONLY-NEXT: [[MUL_0:%.*]] = fmul fast double [[L_A_0]], [[L_B_0]]
735+ ; POW2-ONLY-NEXT: [[MUL_1:%.*]] = fmul fast double [[L_A_1]], [[L_B_1]]
736+ ; POW2-ONLY-NEXT: [[MUL_2:%.*]] = fmul fast double [[L_A_2]], [[L_B_2]]
737+ ; POW2-ONLY-NEXT: [[ADD_0:%.*]] = fadd fast double [[MUL_0]], [[MUL_1]]
738+ ; POW2-ONLY-NEXT: [[ADD_1:%.*]] = fadd fast double [[ADD_0]], [[MUL_2]]
739+ ; POW2-ONLY-NEXT: ret double [[ADD_1]]
731740;
732741 %gep.a.0 = getelementptr inbounds double , ptr %a , i32 0
733742 %l.a.0 = load double , ptr %gep.a.0 , align 4
@@ -784,13 +793,21 @@ entry:
784793}
785794
786795define float @reduce_fadd_after_fmul_of_buildvec (float %a , float %b , float %c ) {
787- ; CHECK-LABEL: @reduce_fadd_after_fmul_of_buildvec(
788- ; CHECK-NEXT: [[MUL_0:%.*]] = fmul fast float [[A:%.*]], 1.000000e+01
789- ; CHECK-NEXT: [[MUL_1:%.*]] = fmul fast float [[B:%.*]], 1.000000e+01
790- ; CHECK-NEXT: [[MUL_2:%.*]] = fmul fast float [[C:%.*]], 1.000000e+01
791- ; CHECK-NEXT: [[ADD_0:%.*]] = fadd fast float [[MUL_0]], [[MUL_1]]
792- ; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[MUL_2]]
793- ; CHECK-NEXT: ret float [[ADD_1]]
796+ ; NON-POW2-LABEL: @reduce_fadd_after_fmul_of_buildvec(
797+ ; NON-POW2-NEXT: [[TMP1:%.*]] = insertelement <3 x float> poison, float [[A:%.*]], i32 0
798+ ; NON-POW2-NEXT: [[TMP2:%.*]] = insertelement <3 x float> [[TMP1]], float [[B:%.*]], i32 1
799+ ; NON-POW2-NEXT: [[TMP3:%.*]] = insertelement <3 x float> [[TMP2]], float [[C:%.*]], i32 2
800+ ; NON-POW2-NEXT: [[TMP4:%.*]] = fmul fast <3 x float> [[TMP3]], splat (float 1.000000e+01)
801+ ; NON-POW2-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP4]])
802+ ; NON-POW2-NEXT: ret float [[TMP5]]
803+ ;
804+ ; POW2-ONLY-LABEL: @reduce_fadd_after_fmul_of_buildvec(
805+ ; POW2-ONLY-NEXT: [[MUL_0:%.*]] = fmul fast float [[A:%.*]], 1.000000e+01
806+ ; POW2-ONLY-NEXT: [[MUL_1:%.*]] = fmul fast float [[B:%.*]], 1.000000e+01
807+ ; POW2-ONLY-NEXT: [[MUL_2:%.*]] = fmul fast float [[C:%.*]], 1.000000e+01
808+ ; POW2-ONLY-NEXT: [[ADD_0:%.*]] = fadd fast float [[MUL_0]], [[MUL_1]]
809+ ; POW2-ONLY-NEXT: [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[MUL_2]]
810+ ; POW2-ONLY-NEXT: ret float [[ADD_1]]
794811;
795812 %mul.0 = fmul fast float %a , 10 .0
796813 %mul.1 = fmul fast float %b , 10 .0
0 commit comments