@@ -709,34 +709,25 @@ define float @dot_product_fp32_reorder(ptr %a, ptr %b) {
709
709
710
710
711
711
define double @dot_product_fp64 (ptr %a , ptr %b ) {
712
- ; NON-POW2-LABEL: @dot_product_fp64(
713
- ; NON-POW2-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i32 0
714
- ; NON-POW2-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i32 0
715
- ; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x double>, ptr [[GEP_A_0]], align 4
716
- ; NON-POW2-NEXT: [[TMP2:%.*]] = load <3 x double>, ptr [[GEP_B_0]], align 4
717
- ; NON-POW2-NEXT: [[TMP3:%.*]] = fmul fast <3 x double> [[TMP1]], [[TMP2]]
718
- ; NON-POW2-NEXT: [[TMP4:%.*]] = call fast double @llvm.vector.reduce.fadd.v3f64(double 0.000000e+00, <3 x double> [[TMP3]])
719
- ; NON-POW2-NEXT: ret double [[TMP4]]
720
- ;
721
- ; POW2-ONLY-LABEL: @dot_product_fp64(
722
- ; POW2-ONLY-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i32 0
723
- ; POW2-ONLY-NEXT: [[L_A_0:%.*]] = load double, ptr [[GEP_A_0]], align 4
724
- ; POW2-ONLY-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds double, ptr [[A]], i32 1
725
- ; POW2-ONLY-NEXT: [[L_A_1:%.*]] = load double, ptr [[GEP_A_1]], align 4
726
- ; POW2-ONLY-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds double, ptr [[A]], i32 2
727
- ; POW2-ONLY-NEXT: [[L_A_2:%.*]] = load double, ptr [[GEP_A_2]], align 4
728
- ; POW2-ONLY-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i32 0
729
- ; POW2-ONLY-NEXT: [[L_B_0:%.*]] = load double, ptr [[GEP_B_0]], align 4
730
- ; POW2-ONLY-NEXT: [[GEP_B_1:%.*]] = getelementptr inbounds double, ptr [[B]], i32 1
731
- ; POW2-ONLY-NEXT: [[L_B_1:%.*]] = load double, ptr [[GEP_B_1]], align 4
732
- ; POW2-ONLY-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds double, ptr [[B]], i32 2
733
- ; POW2-ONLY-NEXT: [[L_B_2:%.*]] = load double, ptr [[GEP_B_2]], align 4
734
- ; POW2-ONLY-NEXT: [[MUL_0:%.*]] = fmul fast double [[L_A_0]], [[L_B_0]]
735
- ; POW2-ONLY-NEXT: [[MUL_1:%.*]] = fmul fast double [[L_A_1]], [[L_B_1]]
736
- ; POW2-ONLY-NEXT: [[MUL_2:%.*]] = fmul fast double [[L_A_2]], [[L_B_2]]
737
- ; POW2-ONLY-NEXT: [[ADD_0:%.*]] = fadd fast double [[MUL_0]], [[MUL_1]]
738
- ; POW2-ONLY-NEXT: [[ADD_1:%.*]] = fadd fast double [[ADD_0]], [[MUL_2]]
739
- ; POW2-ONLY-NEXT: ret double [[ADD_1]]
712
+ ; CHECK-LABEL: @dot_product_fp64(
713
+ ; CHECK-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i32 0
714
+ ; CHECK-NEXT: [[L_A_0:%.*]] = load double, ptr [[GEP_A_0]], align 4
715
+ ; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds double, ptr [[A]], i32 1
716
+ ; CHECK-NEXT: [[L_A_1:%.*]] = load double, ptr [[GEP_A_1]], align 4
717
+ ; CHECK-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds double, ptr [[A]], i32 2
718
+ ; CHECK-NEXT: [[L_A_2:%.*]] = load double, ptr [[GEP_A_2]], align 4
719
+ ; CHECK-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i32 0
720
+ ; CHECK-NEXT: [[L_B_0:%.*]] = load double, ptr [[GEP_B_0]], align 4
721
+ ; CHECK-NEXT: [[GEP_B_1:%.*]] = getelementptr inbounds double, ptr [[B]], i32 1
722
+ ; CHECK-NEXT: [[L_B_1:%.*]] = load double, ptr [[GEP_B_1]], align 4
723
+ ; CHECK-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds double, ptr [[B]], i32 2
724
+ ; CHECK-NEXT: [[L_B_2:%.*]] = load double, ptr [[GEP_B_2]], align 4
725
+ ; CHECK-NEXT: [[MUL_0:%.*]] = fmul fast double [[L_A_0]], [[L_B_0]]
726
+ ; CHECK-NEXT: [[MUL_1:%.*]] = fmul fast double [[L_A_1]], [[L_B_1]]
727
+ ; CHECK-NEXT: [[MUL_2:%.*]] = fmul fast double [[L_A_2]], [[L_B_2]]
728
+ ; CHECK-NEXT: [[ADD_0:%.*]] = fadd fast double [[MUL_0]], [[MUL_1]]
729
+ ; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast double [[ADD_0]], [[MUL_2]]
730
+ ; CHECK-NEXT: ret double [[ADD_1]]
740
731
;
741
732
%gep.a.0 = getelementptr inbounds double , ptr %a , i32 0
742
733
%l.a.0 = load double , ptr %gep.a.0 , align 4
@@ -793,21 +784,13 @@ entry:
793
784
}
794
785
795
786
define float @reduce_fadd_after_fmul_of_buildvec (float %a , float %b , float %c ) {
796
- ; NON-POW2-LABEL: @reduce_fadd_after_fmul_of_buildvec(
797
- ; NON-POW2-NEXT: [[TMP1:%.*]] = insertelement <3 x float> poison, float [[A:%.*]], i32 0
798
- ; NON-POW2-NEXT: [[TMP2:%.*]] = insertelement <3 x float> [[TMP1]], float [[B:%.*]], i32 1
799
- ; NON-POW2-NEXT: [[TMP3:%.*]] = insertelement <3 x float> [[TMP2]], float [[C:%.*]], i32 2
800
- ; NON-POW2-NEXT: [[TMP4:%.*]] = fmul fast <3 x float> [[TMP3]], splat (float 1.000000e+01)
801
- ; NON-POW2-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP4]])
802
- ; NON-POW2-NEXT: ret float [[TMP5]]
803
- ;
804
- ; POW2-ONLY-LABEL: @reduce_fadd_after_fmul_of_buildvec(
805
- ; POW2-ONLY-NEXT: [[MUL_0:%.*]] = fmul fast float [[A:%.*]], 1.000000e+01
806
- ; POW2-ONLY-NEXT: [[MUL_1:%.*]] = fmul fast float [[B:%.*]], 1.000000e+01
807
- ; POW2-ONLY-NEXT: [[MUL_2:%.*]] = fmul fast float [[C:%.*]], 1.000000e+01
808
- ; POW2-ONLY-NEXT: [[ADD_0:%.*]] = fadd fast float [[MUL_0]], [[MUL_1]]
809
- ; POW2-ONLY-NEXT: [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[MUL_2]]
810
- ; POW2-ONLY-NEXT: ret float [[ADD_1]]
787
+ ; CHECK-LABEL: @reduce_fadd_after_fmul_of_buildvec(
788
+ ; CHECK-NEXT: [[MUL_0:%.*]] = fmul fast float [[A:%.*]], 1.000000e+01
789
+ ; CHECK-NEXT: [[MUL_1:%.*]] = fmul fast float [[B:%.*]], 1.000000e+01
790
+ ; CHECK-NEXT: [[MUL_2:%.*]] = fmul fast float [[C:%.*]], 1.000000e+01
791
+ ; CHECK-NEXT: [[ADD_0:%.*]] = fadd fast float [[MUL_0]], [[MUL_1]]
792
+ ; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[MUL_2]]
793
+ ; CHECK-NEXT: ret float [[ADD_1]]
811
794
;
812
795
%mul.0 = fmul fast float %a , 10 .0
813
796
%mul.1 = fmul fast float %b , 10 .0
0 commit comments