@@ -743,219 +743,214 @@ define signext i32 @spill_reduce_succ(ptr %input1, ptr %input2, ptr %output, i64
743743; CHECK-NEXT: std r9, -184(r1) # 8-byte Folded Spill
744744; CHECK-NEXT: std r8, -176(r1) # 8-byte Folded Spill
745745; CHECK-NEXT: std r7, -168(r1) # 8-byte Folded Spill
746- ; CHECK-NEXT: std r4 , -160(r1) # 8-byte Folded Spill
746+ ; CHECK-NEXT: std r3 , -160(r1) # 8-byte Folded Spill
747747; CHECK-NEXT: ble cr0, .LBB7_7
748748; CHECK-NEXT: # %bb.1: # %for.body.preheader
749- ; CHECK-NEXT: sldi r4, r6, 2
750- ; CHECK-NEXT: li r6, 1
751- ; CHECK-NEXT: mr r0, r10
752- ; CHECK-NEXT: std r10, -192(r1) # 8-byte Folded Spill
753- ; CHECK-NEXT: cmpdi r4, 1
754- ; CHECK-NEXT: iselgt r4, r4, r6
755- ; CHECK-NEXT: addi r7, r4, -1
756- ; CHECK-NEXT: clrldi r6, r4, 63
757- ; CHECK-NEXT: cmpldi r7, 3
749+ ; CHECK-NEXT: sldi r6, r6, 2
750+ ; CHECK-NEXT: li r7, 1
751+ ; CHECK-NEXT: mr r30, r10
752+ ; CHECK-NEXT: cmpdi r6, 1
753+ ; CHECK-NEXT: iselgt r7, r6, r7
754+ ; CHECK-NEXT: addi r8, r7, -1
755+ ; CHECK-NEXT: clrldi r6, r7, 63
756+ ; CHECK-NEXT: cmpldi r8, 3
758757; CHECK-NEXT: blt cr0, .LBB7_4
759758; CHECK-NEXT: # %bb.2: # %for.body.preheader.new
760- ; CHECK-NEXT: ld r0, -192(r1) # 8-byte Folded Reload
761- ; CHECK-NEXT: ld r30, -184(r1) # 8-byte Folded Reload
762- ; CHECK-NEXT: ld r8, -176(r1) # 8-byte Folded Reload
763- ; CHECK-NEXT: rldicl r7, r4, 62, 2
764- ; CHECK-NEXT: ld r9, -168(r1) # 8-byte Folded Reload
765- ; CHECK-NEXT: add r11, r0, r30
766- ; CHECK-NEXT: add r4, r0, r0
767- ; CHECK-NEXT: mulli r23, r0, 24
768- ; CHECK-NEXT: add r14, r0, r8
769- ; CHECK-NEXT: sldi r12, r0, 5
770- ; CHECK-NEXT: add r31, r0, r9
771- ; CHECK-NEXT: sldi r9, r9, 3
772- ; CHECK-NEXT: sldi r18, r0, 4
773- ; CHECK-NEXT: sldi r8, r8, 3
774- ; CHECK-NEXT: add r10, r4, r4
775- ; CHECK-NEXT: sldi r4, r30, 3
776- ; CHECK-NEXT: sldi r11, r11, 3
777- ; CHECK-NEXT: add r26, r12, r9
778- ; CHECK-NEXT: add r16, r18, r9
779- ; CHECK-NEXT: add r29, r12, r8
780- ; CHECK-NEXT: add r19, r18, r8
781- ; CHECK-NEXT: add r30, r12, r4
782- ; CHECK-NEXT: mr r20, r4
783- ; CHECK-NEXT: std r4, -200(r1) # 8-byte Folded Spill
784- ; CHECK-NEXT: ld r4, -160(r1) # 8-byte Folded Reload
785- ; CHECK-NEXT: add r15, r5, r11
786- ; CHECK-NEXT: sldi r11, r14, 3
787- ; CHECK-NEXT: add r29, r5, r29
788- ; CHECK-NEXT: add r28, r3, r26
789- ; CHECK-NEXT: add r19, r5, r19
790- ; CHECK-NEXT: add r21, r23, r9
791- ; CHECK-NEXT: add r24, r23, r8
792- ; CHECK-NEXT: add r14, r5, r11
793- ; CHECK-NEXT: sldi r11, r31, 3
794- ; CHECK-NEXT: add r25, r23, r20
795- ; CHECK-NEXT: add r20, r18, r20
796- ; CHECK-NEXT: add r30, r5, r30
797- ; CHECK-NEXT: add r18, r3, r16
798- ; CHECK-NEXT: add r24, r5, r24
799- ; CHECK-NEXT: add r23, r3, r21
800- ; CHECK-NEXT: add r27, r4, r26
801- ; CHECK-NEXT: add r22, r4, r21
802- ; CHECK-NEXT: add r17, r4, r16
803- ; CHECK-NEXT: add r2, r4, r11
804- ; CHECK-NEXT: rldicl r4, r7, 2, 1
805- ; CHECK-NEXT: sub r7, r8, r9
806- ; CHECK-NEXT: ld r8, -200(r1) # 8-byte Folded Reload
759+ ; CHECK-NEXT: ld r14, -168(r1) # 8-byte Folded Reload
760+ ; CHECK-NEXT: mulli r24, r30, 24
761+ ; CHECK-NEXT: ld r16, -184(r1) # 8-byte Folded Reload
762+ ; CHECK-NEXT: ld r15, -176(r1) # 8-byte Folded Reload
763+ ; CHECK-NEXT: ld r3, -160(r1) # 8-byte Folded Reload
764+ ; CHECK-NEXT: rldicl r0, r7, 62, 2
765+ ; CHECK-NEXT: sldi r11, r30, 5
766+ ; CHECK-NEXT: sldi r19, r30, 4
767+ ; CHECK-NEXT: sldi r7, r14, 3
768+ ; CHECK-NEXT: add r14, r30, r14
769+ ; CHECK-NEXT: sldi r10, r16, 3
770+ ; CHECK-NEXT: sldi r12, r15, 3
771+ ; CHECK-NEXT: add r16, r30, r16
772+ ; CHECK-NEXT: add r15, r30, r15
773+ ; CHECK-NEXT: add r27, r11, r7
774+ ; CHECK-NEXT: add r22, r24, r7
775+ ; CHECK-NEXT: add r17, r19, r7
776+ ; CHECK-NEXT: sldi r2, r14, 3
777+ ; CHECK-NEXT: add r26, r24, r10
778+ ; CHECK-NEXT: add r25, r24, r12
779+ ; CHECK-NEXT: add r21, r19, r10
780+ ; CHECK-NEXT: add r20, r19, r12
781+ ; CHECK-NEXT: add r8, r11, r10
782+ ; CHECK-NEXT: sldi r16, r16, 3
783+ ; CHECK-NEXT: add r29, r5, r27
784+ ; CHECK-NEXT: add r28, r4, r27
785+ ; CHECK-NEXT: add r27, r3, r27
786+ ; CHECK-NEXT: add r24, r5, r22
787+ ; CHECK-NEXT: add r23, r4, r22
788+ ; CHECK-NEXT: add r22, r3, r22
789+ ; CHECK-NEXT: add r19, r5, r17
790+ ; CHECK-NEXT: add r18, r4, r17
791+ ; CHECK-NEXT: add r17, r3, r17
792+ ; CHECK-NEXT: add r14, r5, r2
793+ ; CHECK-NEXT: add r31, r4, r2
794+ ; CHECK-NEXT: add r2, r3, r2
795+ ; CHECK-NEXT: add r9, r5, r8
796+ ; CHECK-NEXT: add r8, r11, r12
807797; CHECK-NEXT: add r26, r5, r26
808798; CHECK-NEXT: add r25, r5, r25
809799; CHECK-NEXT: add r21, r5, r21
810800; CHECK-NEXT: add r20, r5, r20
811801; CHECK-NEXT: add r16, r5, r16
812- ; CHECK-NEXT: add r31, r5, r11
813- ; CHECK-NEXT: add r11, r3, r11
814- ; CHECK-NEXT: addi r4, r4, -4
815- ; CHECK-NEXT: rldicl r4, r4, 62, 2
816- ; CHECK-NEXT: sub r8, r8, r9
817- ; CHECK-NEXT: li r9, 0
818- ; CHECK-NEXT: addi r4, r4, 1
819- ; CHECK-NEXT: mtctr r4
802+ ; CHECK-NEXT: add r8, r5, r8
803+ ; CHECK-NEXT: rldicl r3, r0, 2, 1
804+ ; CHECK-NEXT: addi r3, r3, -4
805+ ; CHECK-NEXT: sub r0, r12, r7
806+ ; CHECK-NEXT: sub r12, r10, r7
807+ ; CHECK-NEXT: li r7, 0
808+ ; CHECK-NEXT: mr r10, r30
809+ ; CHECK-NEXT: sldi r15, r15, 3
810+ ; CHECK-NEXT: add r15, r5, r15
811+ ; CHECK-NEXT: rldicl r3, r3, 62, 2
812+ ; CHECK-NEXT: addi r3, r3, 1
813+ ; CHECK-NEXT: mtctr r3
820814; CHECK-NEXT: .p2align 4
821815; CHECK-NEXT: .LBB7_3: # %for.body
822816; CHECK-NEXT: #
823- ; CHECK-NEXT: lfd f0, 0(r11)
824- ; CHECK-NEXT: lfd f1, 0(r2)
825- ; CHECK-NEXT: add r0, r0, r10
826- ; CHECK-NEXT: xsmuldp f0, f0, f1
817+ ; CHECK-NEXT: lfd f0, 0(r2)
827818; CHECK-NEXT: lfd f1, 0(r31)
819+ ; CHECK-NEXT: add r3, r10, r30
820+ ; CHECK-NEXT: add r3, r3, r30
821+ ; CHECK-NEXT: xsmuldp f0, f0, f1
822+ ; CHECK-NEXT: lfd f1, 0(r14)
823+ ; CHECK-NEXT: add r3, r3, r30
824+ ; CHECK-NEXT: add r10, r3, r30
828825; CHECK-NEXT: xsadddp f0, f1, f0
829- ; CHECK-NEXT: stfd f0, 0(r31 )
830- ; CHECK-NEXT: add r31, r31, r12
831- ; CHECK-NEXT: lfdx f0, r11, r7
832- ; CHECK-NEXT: lfdx f1, r2, r7
826+ ; CHECK-NEXT: stfd f0, 0(r14 )
827+ ; CHECK-NEXT: add r14, r14, r11
828+ ; CHECK-NEXT: lfdx f0, r2, r0
829+ ; CHECK-NEXT: lfdx f1, r31, r0
833830; CHECK-NEXT: xsmuldp f0, f0, f1
834- ; CHECK-NEXT: lfdx f1, r14, r9
831+ ; CHECK-NEXT: lfdx f1, r15, r7
835832; CHECK-NEXT: xsadddp f0, f1, f0
836- ; CHECK-NEXT: stfdx f0, r14, r9
837- ; CHECK-NEXT: lfdx f0, r11, r8
838- ; CHECK-NEXT: lfdx f1, r2, r8
839- ; CHECK-NEXT: add r11, r11, r12
840- ; CHECK-NEXT: add r2, r2, r12
833+ ; CHECK-NEXT: stfdx f0, r15, r7
834+ ; CHECK-NEXT: lfdx f0, r2, r12
835+ ; CHECK-NEXT: lfdx f1, r31, r12
836+ ; CHECK-NEXT: add r2, r2, r11
837+ ; CHECK-NEXT: add r31, r31, r11
841838; CHECK-NEXT: xsmuldp f0, f0, f1
842- ; CHECK-NEXT: lfdx f1, r15, r9
839+ ; CHECK-NEXT: lfdx f1, r16, r7
843840; CHECK-NEXT: xsadddp f0, f1, f0
844- ; CHECK-NEXT: stfdx f0, r15, r9
845- ; CHECK-NEXT: lfd f0, 0(r18 )
846- ; CHECK-NEXT: lfd f1, 0(r17 )
841+ ; CHECK-NEXT: stfdx f0, r16, r7
842+ ; CHECK-NEXT: lfd f0, 0(r17 )
843+ ; CHECK-NEXT: lfd f1, 0(r18 )
847844; CHECK-NEXT: xsmuldp f0, f0, f1
848- ; CHECK-NEXT: lfdx f1, r16, r9
845+ ; CHECK-NEXT: lfdx f1, r19, r7
849846; CHECK-NEXT: xsadddp f0, f1, f0
850- ; CHECK-NEXT: stfdx f0, r16, r9
851- ; CHECK-NEXT: lfdx f0, r18, r7
852- ; CHECK-NEXT: lfdx f1, r17, r7
847+ ; CHECK-NEXT: stfdx f0, r19, r7
848+ ; CHECK-NEXT: lfdx f0, r17, r0
849+ ; CHECK-NEXT: lfdx f1, r18, r0
853850; CHECK-NEXT: xsmuldp f0, f0, f1
854- ; CHECK-NEXT: lfdx f1, r19, r9
851+ ; CHECK-NEXT: lfdx f1, r20, r7
855852; CHECK-NEXT: xsadddp f0, f1, f0
856- ; CHECK-NEXT: stfdx f0, r19, r9
857- ; CHECK-NEXT: lfdx f0, r18, r8
858- ; CHECK-NEXT: lfdx f1, r17, r8
859- ; CHECK-NEXT: add r18, r18, r12
860- ; CHECK-NEXT: add r17, r17, r12
853+ ; CHECK-NEXT: stfdx f0, r20, r7
854+ ; CHECK-NEXT: lfdx f0, r17, r12
855+ ; CHECK-NEXT: lfdx f1, r18, r12
856+ ; CHECK-NEXT: add r17, r17, r11
857+ ; CHECK-NEXT: add r18, r18, r11
861858; CHECK-NEXT: xsmuldp f0, f0, f1
862- ; CHECK-NEXT: lfdx f1, r20, r9
859+ ; CHECK-NEXT: lfdx f1, r21, r7
863860; CHECK-NEXT: xsadddp f0, f1, f0
864- ; CHECK-NEXT: stfdx f0, r20, r9
865- ; CHECK-NEXT: lfd f0, 0(r23 )
866- ; CHECK-NEXT: lfd f1, 0(r22 )
861+ ; CHECK-NEXT: stfdx f0, r21, r7
862+ ; CHECK-NEXT: lfd f0, 0(r22 )
863+ ; CHECK-NEXT: lfd f1, 0(r23 )
867864; CHECK-NEXT: xsmuldp f0, f0, f1
868- ; CHECK-NEXT: lfdx f1, r21, r9
865+ ; CHECK-NEXT: lfdx f1, r24, r7
869866; CHECK-NEXT: xsadddp f0, f1, f0
870- ; CHECK-NEXT: stfdx f0, r21, r9
871- ; CHECK-NEXT: lfdx f0, r23, r7
872- ; CHECK-NEXT: lfdx f1, r22, r7
867+ ; CHECK-NEXT: stfdx f0, r24, r7
868+ ; CHECK-NEXT: lfdx f0, r22, r0
869+ ; CHECK-NEXT: lfdx f1, r23, r0
873870; CHECK-NEXT: xsmuldp f0, f0, f1
874- ; CHECK-NEXT: lfdx f1, r24, r9
871+ ; CHECK-NEXT: lfdx f1, r25, r7
875872; CHECK-NEXT: xsadddp f0, f1, f0
876- ; CHECK-NEXT: stfdx f0, r24, r9
877- ; CHECK-NEXT: lfdx f0, r23, r8
878- ; CHECK-NEXT: lfdx f1, r22, r8
879- ; CHECK-NEXT: add r23, r23, r12
880- ; CHECK-NEXT: add r22, r22, r12
873+ ; CHECK-NEXT: stfdx f0, r25, r7
874+ ; CHECK-NEXT: lfdx f0, r22, r12
875+ ; CHECK-NEXT: lfdx f1, r23, r12
876+ ; CHECK-NEXT: add r22, r22, r11
877+ ; CHECK-NEXT: add r23, r23, r11
881878; CHECK-NEXT: xsmuldp f0, f0, f1
882- ; CHECK-NEXT: lfdx f1, r25, r9
879+ ; CHECK-NEXT: lfdx f1, r26, r7
883880; CHECK-NEXT: xsadddp f0, f1, f0
884- ; CHECK-NEXT: stfdx f0, r25, r9
885- ; CHECK-NEXT: lfd f0, 0(r28 )
886- ; CHECK-NEXT: lfd f1, 0(r27 )
881+ ; CHECK-NEXT: stfdx f0, r26, r7
882+ ; CHECK-NEXT: lfd f0, 0(r27 )
883+ ; CHECK-NEXT: lfd f1, 0(r28 )
887884; CHECK-NEXT: xsmuldp f0, f0, f1
888- ; CHECK-NEXT: lfdx f1, r26, r9
885+ ; CHECK-NEXT: lfdx f1, r29, r7
889886; CHECK-NEXT: xsadddp f0, f1, f0
890- ; CHECK-NEXT: stfdx f0, r26, r9
891- ; CHECK-NEXT: lfdx f0, r28, r7
892- ; CHECK-NEXT: lfdx f1, r27, r7
887+ ; CHECK-NEXT: stfdx f0, r29, r7
888+ ; CHECK-NEXT: lfdx f0, r27, r0
889+ ; CHECK-NEXT: lfdx f1, r28, r0
893890; CHECK-NEXT: xsmuldp f0, f0, f1
894- ; CHECK-NEXT: lfdx f1, r29, r9
891+ ; CHECK-NEXT: lfdx f1, r8, r7
895892; CHECK-NEXT: xsadddp f0, f1, f0
896- ; CHECK-NEXT: stfdx f0, r29, r9
897- ; CHECK-NEXT: lfdx f0, r28, r8
898- ; CHECK-NEXT: lfdx f1, r27, r8
899- ; CHECK-NEXT: add r28, r28, r12
900- ; CHECK-NEXT: add r27, r27, r12
893+ ; CHECK-NEXT: stfdx f0, r8, r7
894+ ; CHECK-NEXT: lfdx f0, r27, r12
895+ ; CHECK-NEXT: lfdx f1, r28, r12
896+ ; CHECK-NEXT: add r27, r27, r11
897+ ; CHECK-NEXT: add r28, r28, r11
901898; CHECK-NEXT: xsmuldp f0, f0, f1
902- ; CHECK-NEXT: lfdx f1, r30, r9
899+ ; CHECK-NEXT: lfdx f1, r9, r7
903900; CHECK-NEXT: xsadddp f0, f1, f0
904- ; CHECK-NEXT: stfdx f0, r30, r9
905- ; CHECK-NEXT: add r9, r9, r12
901+ ; CHECK-NEXT: stfdx f0, r9, r7
902+ ; CHECK-NEXT: add r7, r7, r11
906903; CHECK-NEXT: bdnz .LBB7_3
907904; CHECK-NEXT: .LBB7_4: # %for.cond.cleanup.loopexit.unr-lcssa
908- ; CHECK-NEXT: ld r7, -192(r1) # 8-byte Folded Reload
909905; CHECK-NEXT: cmpldi r6, 0
910906; CHECK-NEXT: beq cr0, .LBB7_7
911907; CHECK-NEXT: # %bb.5: # %for.body.epil.preheader
912- ; CHECK-NEXT: ld r4, -184(r1) # 8-byte Folded Reload
913- ; CHECK-NEXT: ld r29, -160(r1) # 8-byte Folded Reload
914- ; CHECK-NEXT: mr r30, r3
915- ; CHECK-NEXT: sldi r7, r7, 3
916- ; CHECK-NEXT: add r4, r0, r4
917- ; CHECK-NEXT: sldi r4, r4, 3
918- ; CHECK-NEXT: add r3, r5, r4
919- ; CHECK-NEXT: add r8, r29, r4
920- ; CHECK-NEXT: add r9, r30, r4
921- ; CHECK-NEXT: ld r4, -176(r1) # 8-byte Folded Reload
922- ; CHECK-NEXT: add r4, r0, r4
923- ; CHECK-NEXT: sldi r4, r4, 3
924- ; CHECK-NEXT: add r10, r5, r4
925- ; CHECK-NEXT: add r11, r29, r4
926- ; CHECK-NEXT: add r12, r30, r4
927- ; CHECK-NEXT: ld r4, -168(r1) # 8-byte Folded Reload
928- ; CHECK-NEXT: add r4, r0, r4
929- ; CHECK-NEXT: sldi r0, r4, 3
930- ; CHECK-NEXT: add r5, r5, r0
931- ; CHECK-NEXT: add r4, r29, r0
932- ; CHECK-NEXT: add r30, r30, r0
933- ; CHECK-NEXT: li r0, 0
908+ ; CHECK-NEXT: ld r3, -184(r1) # 8-byte Folded Reload
909+ ; CHECK-NEXT: ld r0, -160(r1) # 8-byte Folded Reload
910+ ; CHECK-NEXT: sldi r8, r30, 3
911+ ; CHECK-NEXT: add r3, r10, r3
912+ ; CHECK-NEXT: sldi r3, r3, 3
913+ ; CHECK-NEXT: add r7, r5, r3
914+ ; CHECK-NEXT: add r9, r4, r3
915+ ; CHECK-NEXT: add r11, r0, r3
916+ ; CHECK-NEXT: ld r3, -176(r1) # 8-byte Folded Reload
917+ ; CHECK-NEXT: add r3, r10, r3
918+ ; CHECK-NEXT: sldi r3, r3, 3
919+ ; CHECK-NEXT: add r12, r5, r3
920+ ; CHECK-NEXT: add r30, r4, r3
921+ ; CHECK-NEXT: add r29, r0, r3
922+ ; CHECK-NEXT: ld r3, -168(r1) # 8-byte Folded Reload
923+ ; CHECK-NEXT: add r3, r10, r3
924+ ; CHECK-NEXT: li r10, 0
925+ ; CHECK-NEXT: sldi r3, r3, 3
926+ ; CHECK-NEXT: add r5, r5, r3
927+ ; CHECK-NEXT: add r4, r4, r3
928+ ; CHECK-NEXT: add r3, r0, r3
934929; CHECK-NEXT: .p2align 4
935930; CHECK-NEXT: .LBB7_6: # %for.body.epil
936931; CHECK-NEXT: #
937- ; CHECK-NEXT: lfdx f0, r30, r0
938- ; CHECK-NEXT: lfdx f1, r4, r0
932+ ; CHECK-NEXT: lfdx f0, r3, r10
933+ ; CHECK-NEXT: lfdx f1, r4, r10
939934; CHECK-NEXT: addi r6, r6, -1
940935; CHECK-NEXT: cmpldi r6, 0
941936; CHECK-NEXT: xsmuldp f0, f0, f1
942937; CHECK-NEXT: lfd f1, 0(r5)
943938; CHECK-NEXT: xsadddp f0, f1, f0
944939; CHECK-NEXT: stfd f0, 0(r5)
945- ; CHECK-NEXT: add r5, r5, r7
946- ; CHECK-NEXT: lfdx f0, r12, r0
947- ; CHECK-NEXT: lfdx f1, r11, r0
940+ ; CHECK-NEXT: add r5, r5, r8
941+ ; CHECK-NEXT: lfdx f0, r29, r10
942+ ; CHECK-NEXT: lfdx f1, r30, r10
948943; CHECK-NEXT: xsmuldp f0, f0, f1
949- ; CHECK-NEXT: lfdx f1, r10, r0
944+ ; CHECK-NEXT: lfdx f1, r12, r10
950945; CHECK-NEXT: xsadddp f0, f1, f0
951- ; CHECK-NEXT: stfdx f0, r10, r0
952- ; CHECK-NEXT: lfdx f0, r9, r0
953- ; CHECK-NEXT: lfdx f1, r8, r0
946+ ; CHECK-NEXT: stfdx f0, r12, r10
947+ ; CHECK-NEXT: lfdx f0, r11, r10
948+ ; CHECK-NEXT: lfdx f1, r9, r10
954949; CHECK-NEXT: xsmuldp f0, f0, f1
955- ; CHECK-NEXT: lfdx f1, r3, r0
950+ ; CHECK-NEXT: lfdx f1, r7, r10
956951; CHECK-NEXT: xsadddp f0, f1, f0
957- ; CHECK-NEXT: stfdx f0, r3, r0
958- ; CHECK-NEXT: add r0, r0, r7
952+ ; CHECK-NEXT: stfdx f0, r7, r10
953+ ; CHECK-NEXT: add r10, r10, r8
959954; CHECK-NEXT: bne cr0, .LBB7_6
960955; CHECK-NEXT: .LBB7_7: # %for.cond.cleanup
961956; CHECK-NEXT: ld r2, -152(r1) # 8-byte Folded Reload
0 commit comments