@@ -554,6 +554,52 @@ define i32 @dot_product_i32(ptr %a, ptr %b) {
554554 ret i32 %add.1
555555}
556556
557+ ; Same as above, except the reduction order has been perturbed. This
558+ ; is checking for our ability to reorder.
559+ define i32 @dot_product_i32_reorder (ptr %a , ptr %b ) {
560+ ; CHECK-LABEL: @dot_product_i32_reorder(
561+ ; CHECK-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 0
562+ ; CHECK-NEXT: [[L_A_0:%.*]] = load i32, ptr [[GEP_A_0]], align 4
563+ ; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 1
564+ ; CHECK-NEXT: [[L_A_1:%.*]] = load i32, ptr [[GEP_A_1]], align 4
565+ ; CHECK-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 2
566+ ; CHECK-NEXT: [[L_A_2:%.*]] = load i32, ptr [[GEP_A_2]], align 4
567+ ; CHECK-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 0
568+ ; CHECK-NEXT: [[L_B_0:%.*]] = load i32, ptr [[GEP_B_0]], align 4
569+ ; CHECK-NEXT: [[GEP_B_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 1
570+ ; CHECK-NEXT: [[L_B_1:%.*]] = load i32, ptr [[GEP_B_1]], align 4
571+ ; CHECK-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 2
572+ ; CHECK-NEXT: [[L_B_2:%.*]] = load i32, ptr [[GEP_B_2]], align 4
573+ ; CHECK-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_A_0]], [[L_B_0]]
574+ ; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_A_1]], [[L_B_1]]
575+ ; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_A_2]], [[L_B_2]]
576+ ; CHECK-NEXT: [[ADD_0:%.*]] = add i32 [[MUL_1]], [[MUL_0]]
577+ ; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[MUL_2]]
578+ ; CHECK-NEXT: ret i32 [[ADD_1]]
579+ ;
580+ %gep.a.0 = getelementptr inbounds i32 , ptr %a , i32 0
581+ %l.a.0 = load i32 , ptr %gep.a.0 , align 4
582+ %gep.a.1 = getelementptr inbounds i32 , ptr %a , i32 1
583+ %l.a.1 = load i32 , ptr %gep.a.1 , align 4
584+ %gep.a.2 = getelementptr inbounds i32 , ptr %a , i32 2
585+ %l.a.2 = load i32 , ptr %gep.a.2 , align 4
586+
587+ %gep.b.0 = getelementptr inbounds i32 , ptr %b , i32 0
588+ %l.b.0 = load i32 , ptr %gep.b.0 , align 4
589+ %gep.b.1 = getelementptr inbounds i32 , ptr %b , i32 1
590+ %l.b.1 = load i32 , ptr %gep.b.1 , align 4
591+ %gep.b.2 = getelementptr inbounds i32 , ptr %b , i32 2
592+ %l.b.2 = load i32 , ptr %gep.b.2 , align 4
593+
594+ %mul.0 = mul nsw i32 %l.a.0 , %l.b.0
595+ %mul.1 = mul nsw i32 %l.a.1 , %l.b.1
596+ %mul.2 = mul nsw i32 %l.a.2 , %l.b.2
597+
598+ %add.0 = add i32 %mul.1 , %mul.0
599+ %add.1 = add i32 %add.0 , %mul.2
600+ ret i32 %add.1
601+ }
602+
557603define float @dot_product_fp32 (ptr %a , ptr %b ) {
558604; NON-POW2-LABEL: @dot_product_fp32(
559605; NON-POW2-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 0
@@ -604,6 +650,50 @@ define float @dot_product_fp32(ptr %a, ptr %b) {
604650 ret float %add.1
605651}
606652
653+ ; Same as above, except the reduction order has been perturbed. This
654+ ; is checking for our ability to reorder.
655+ define float @dot_product_fp32_reorder (ptr %a , ptr %b ) {
656+ ; CHECK-LABEL: @dot_product_fp32_reorder(
657+ ; CHECK-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 0
658+ ; CHECK-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2
659+ ; CHECK-NEXT: [[L_A_2:%.*]] = load float, ptr [[GEP_A_2]], align 4
660+ ; CHECK-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 0
661+ ; CHECK-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds float, ptr [[B]], i32 2
662+ ; CHECK-NEXT: [[L_B_2:%.*]] = load float, ptr [[GEP_B_2]], align 4
663+ ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_A_0]], align 4
664+ ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[GEP_B_0]], align 4
665+ ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP2]]
666+ ; CHECK-NEXT: [[MUL_2:%.*]] = fmul fast float [[L_A_2]], [[L_B_2]]
667+ ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
668+ ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
669+ ; CHECK-NEXT: [[ADD_0:%.*]] = fadd fast float [[TMP5]], [[TMP4]]
670+ ; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[MUL_2]]
671+ ; CHECK-NEXT: ret float [[ADD_1]]
672+ ;
673+ %gep.a.0 = getelementptr inbounds float , ptr %a , i32 0
674+ %l.a.0 = load float , ptr %gep.a.0 , align 4
675+ %gep.a.1 = getelementptr inbounds float , ptr %a , i32 1
676+ %l.a.1 = load float , ptr %gep.a.1 , align 4
677+ %gep.a.2 = getelementptr inbounds float , ptr %a , i32 2
678+ %l.a.2 = load float , ptr %gep.a.2 , align 4
679+
680+ %gep.b.0 = getelementptr inbounds float , ptr %b , i32 0
681+ %l.b.0 = load float , ptr %gep.b.0 , align 4
682+ %gep.b.1 = getelementptr inbounds float , ptr %b , i32 1
683+ %l.b.1 = load float , ptr %gep.b.1 , align 4
684+ %gep.b.2 = getelementptr inbounds float , ptr %b , i32 2
685+ %l.b.2 = load float , ptr %gep.b.2 , align 4
686+
687+ %mul.0 = fmul fast float %l.a.0 , %l.b.0
688+ %mul.1 = fmul fast float %l.a.1 , %l.b.1
689+ %mul.2 = fmul fast float %l.a.2 , %l.b.2
690+
691+ %add.0 = fadd fast float %mul.1 , %mul.0
692+ %add.1 = fadd fast float %add.0 , %mul.2
693+ ret float %add.1
694+ }
695+
696+
607697define double @dot_product_fp64 (ptr %a , ptr %b ) {
608698; NON-POW2-LABEL: @dot_product_fp64(
609699; NON-POW2-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i32 0
0 commit comments