Skip to content

Commit 9f7c85f

Browse files
authored
[LegalizeIntegerTypes] Use forceExpandWideMUL in ExpandIntRes_XMULO. (#123432)
This generates basically the same code with the operands commuted, but gets there with less legalization steps.
1 parent 2a4c4b5 commit 9f7c85f

File tree

5 files changed

+79
-81
lines changed

5 files changed

+79
-81
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5084,13 +5084,9 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
50845084
if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC) ||
50855085
TLI.getLibcallName(LC) == DAG.getMachineFunction().getName()) {
50865086
// FIXME: This is not an optimal expansion, but better than crashing.
5087-
EVT WideVT =
5088-
EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
5089-
SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, N->getOperand(0));
5090-
SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, N->getOperand(1));
5091-
SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
50925087
SDValue MulLo, MulHi;
5093-
SplitInteger(Mul, MulLo, MulHi);
5088+
TLI.forceExpandWideMUL(DAG, dl, /*Signed=*/true, N->getOperand(0),
5089+
N->getOperand(1), MulLo, MulHi);
50945090
SDValue SRA =
50955091
DAG.getNode(ISD::SRA, dl, VT, MulLo,
50965092
DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, VT));

llvm/test/CodeGen/LoongArch/smul-with-overflow.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
191191
; LA32-NEXT: sltu $s5, $s5, $s1
192192
; LA32-NEXT: sltu $s1, $s1, $s0
193193
; LA32-NEXT: sltu $s0, $s0, $t6
194-
; LA32-NEXT: mul.w $t2, $a3, $t5
194+
; LA32-NEXT: mul.w $t2, $t5, $a3
195195
; LA32-NEXT: st.w $a3, $sp, 24 # 4-byte Folded Spill
196196
; LA32-NEXT: sltu $t4, $fp, $t4
197197
; LA32-NEXT: mulh.wu $fp, $a5, $t3
@@ -232,10 +232,10 @@ define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) {
232232
; LA32-NEXT: add.w $a7, $s4, $t2
233233
; LA32-NEXT: st.w $a7, $sp, 12 # 4-byte Folded Spill
234234
; LA32-NEXT: add.w $s3, $t7, $a7
235-
; LA32-NEXT: mulh.wu $a7, $a3, $t5
236-
; LA32-NEXT: add.w $t4, $a7, $a0
237-
; LA32-NEXT: mul.w $s2, $s6, $t5
238-
; LA32-NEXT: add.w $s1, $t4, $s2
235+
; LA32-NEXT: mulh.wu $a7, $t5, $a3
236+
; LA32-NEXT: add.w $t4, $a0, $a7
237+
; LA32-NEXT: mul.w $s2, $t5, $s6
238+
; LA32-NEXT: add.w $s1, $s2, $t4
239239
; LA32-NEXT: add.w $fp, $s1, $s3
240240
; LA32-NEXT: add.w $a0, $fp, $t6
241241
; LA32-NEXT: add.w $fp, $s8, $a0

llvm/test/CodeGen/SPARC/smulo-128-legalisation-lowering.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) nounwind {
114114
; SPARC-NEXT: addxcc %o0, %o3, %l6
115115
; SPARC-NEXT: addcc %l2, %o1, %l2
116116
; SPARC-NEXT: sra %i4, 31, %i4
117-
; SPARC-NEXT: umul %g4, %i4, %g4
117+
; SPARC-NEXT: umul %i4, %g4, %g4
118118
; SPARC-NEXT: rd %y, %o0
119119
; SPARC-NEXT: addxcc %l6, %l7, %l6
120120
; SPARC-NEXT: umul %i4, %g2, %g2

llvm/test/CodeGen/X86/smul-with-overflow.ll

Lines changed: 69 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -435,8 +435,8 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind {
435435
; X86-NEXT: movl %edx, %esi
436436
; X86-NEXT: movl %eax, %ebp
437437
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
438-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
439-
; X86-NEXT: mull %ecx
438+
; X86-NEXT: movl %ecx, %eax
439+
; X86-NEXT: mull {{[0-9]+}}(%esp)
440440
; X86-NEXT: movl %eax, %ebx
441441
; X86-NEXT: movl %eax, %ecx
442442
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -822,167 +822,169 @@ define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind {
822822
; X64-NEXT: pushq %rbx
823823
; X64-NEXT: movq %r9, %r15
824824
; X64-NEXT: movq %rcx, %r9
825-
; X64-NEXT: movq %rdx, %r14
825+
; X64-NEXT: movq %rdx, %r10
826826
; X64-NEXT: movq %rsi, %r12
827827
; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
828828
; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11
829829
; X64-NEXT: andl $1, %r11d
830830
; X64-NEXT: negq %r11
831831
; X64-NEXT: andl $1, %r9d
832832
; X64-NEXT: negq %r9
833-
; X64-NEXT: movq %r9, %rax
834-
; X64-NEXT: mulq %r8
833+
; X64-NEXT: movq %r8, %rax
834+
; X64-NEXT: mulq %r9
835835
; X64-NEXT: movq %rdx, %rcx
836-
; X64-NEXT: movq %rax, %rbp
837836
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
837+
; X64-NEXT: movq %rax, %rdi
838838
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
839-
; X64-NEXT: addq %rdx, %rbp
839+
; X64-NEXT: addq %rdx, %rdi
840840
; X64-NEXT: adcq $0, %rcx
841-
; X64-NEXT: movq %r9, %rax
842-
; X64-NEXT: mulq %r15
841+
; X64-NEXT: movq %r15, %rax
842+
; X64-NEXT: mulq %r9
843843
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
844844
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
845-
; X64-NEXT: addq %rax, %rbp
845+
; X64-NEXT: addq %rax, %rdi
846846
; X64-NEXT: adcq %rdx, %rcx
847847
; X64-NEXT: setb %sil
848-
; X64-NEXT: movzbl %sil, %edi
848+
; X64-NEXT: movzbl %sil, %r14d
849849
; X64-NEXT: addq %rax, %rcx
850-
; X64-NEXT: adcq %rdx, %rdi
850+
; X64-NEXT: adcq %rdx, %r14
851851
; X64-NEXT: movq %r12, %rax
852852
; X64-NEXT: mulq %r8
853-
; X64-NEXT: movq %rdx, %r10
853+
; X64-NEXT: movq %rdx, %rbx
854854
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
855-
; X64-NEXT: movq %r14, %rax
855+
; X64-NEXT: movq %r10, %rax
856856
; X64-NEXT: mulq %r8
857-
; X64-NEXT: movq %rdx, %rbx
858-
; X64-NEXT: movq %rax, %r13
859-
; X64-NEXT: addq %r10, %r13
860-
; X64-NEXT: adcq $0, %rbx
857+
; X64-NEXT: movq %rdx, %r13
858+
; X64-NEXT: movq %rax, %rbp
859+
; X64-NEXT: addq %rbx, %rbp
860+
; X64-NEXT: adcq $0, %r13
861861
; X64-NEXT: movq %r12, %rax
862862
; X64-NEXT: mulq %r15
863863
; X64-NEXT: movq %rdx, %rsi
864-
; X64-NEXT: addq %r13, %rax
864+
; X64-NEXT: addq %rbp, %rax
865865
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
866-
; X64-NEXT: adcq %rbx, %rsi
866+
; X64-NEXT: adcq %r13, %rsi
867867
; X64-NEXT: setb %r8b
868-
; X64-NEXT: movq %r14, %rax
868+
; X64-NEXT: movq %r10, %rax
869869
; X64-NEXT: mulq %r15
870870
; X64-NEXT: movq %rdx, %rbx
871871
; X64-NEXT: addq %rsi, %rax
872872
; X64-NEXT: movzbl %r8b, %edx
873873
; X64-NEXT: adcq %rdx, %rbx
874874
; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
875875
; X64-NEXT: movq %rax, %rsi
876-
; X64-NEXT: adcq %rbp, %rbx
876+
; X64-NEXT: adcq %rdi, %rbx
877877
; X64-NEXT: adcq $0, %rcx
878-
; X64-NEXT: adcq $0, %rdi
878+
; X64-NEXT: adcq $0, %r14
879879
; X64-NEXT: movq %r11, %rax
880880
; X64-NEXT: mulq %r12
881881
; X64-NEXT: movq %rdx, %r13
882-
; X64-NEXT: movq %rax, %r15
883-
; X64-NEXT: movq %r11, %rax
884-
; X64-NEXT: mulq %r14
885-
; X64-NEXT: movq %rax, %r14
886882
; X64-NEXT: movq %rax, %r8
883+
; X64-NEXT: movq %r11, %rax
884+
; X64-NEXT: mulq %r10
885+
; X64-NEXT: movq %rax, %r15
886+
; X64-NEXT: movq %rax, %rdi
887887
; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
888-
; X64-NEXT: addq %r13, %r14
888+
; X64-NEXT: addq %r13, %r15
889889
; X64-NEXT: movq %rdx, %rbp
890890
; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
891891
; X64-NEXT: adcq $0, %rbp
892-
; X64-NEXT: addq %r15, %r14
892+
; X64-NEXT: addq %r8, %r15
893893
; X64-NEXT: adcq %r13, %rbp
894894
; X64-NEXT: setb %al
895-
; X64-NEXT: addq %r8, %rbp
895+
; X64-NEXT: addq %rdi, %rbp
896896
; X64-NEXT: movzbl %al, %r12d
897897
; X64-NEXT: adcq %rdx, %r12
898-
; X64-NEXT: addq %r15, %rsi
898+
; X64-NEXT: addq %r8, %rsi
899+
; X64-NEXT: movq %r8, %r10
900+
; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
899901
; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
900-
; X64-NEXT: adcq %rbx, %r14
902+
; X64-NEXT: adcq %rbx, %r15
901903
; X64-NEXT: adcq $0, %rbp
902904
; X64-NEXT: adcq $0, %r12
903905
; X64-NEXT: addq %rcx, %rbp
904-
; X64-NEXT: adcq %rdi, %r12
906+
; X64-NEXT: adcq %r14, %r12
905907
; X64-NEXT: setb %cl
906908
; X64-NEXT: movq %r9, %rax
907909
; X64-NEXT: mulq %r11
908-
; X64-NEXT: movq %rax, %r10
909-
; X64-NEXT: addq %rdx, %r10
910-
; X64-NEXT: movq %rdx, %rdi
911-
; X64-NEXT: adcq $0, %rdi
912-
; X64-NEXT: addq %rax, %r10
913-
; X64-NEXT: adcq %rdx, %rdi
914-
; X64-NEXT: setb %bl
915-
; X64-NEXT: addq %rax, %rdi
916-
; X64-NEXT: movzbl %bl, %esi
917-
; X64-NEXT: adcq %rdx, %rsi
910+
; X64-NEXT: movq %rax, %r8
911+
; X64-NEXT: addq %rdx, %r8
912+
; X64-NEXT: movq %rdx, %rbx
913+
; X64-NEXT: adcq $0, %rbx
914+
; X64-NEXT: addq %rax, %r8
915+
; X64-NEXT: adcq %rdx, %rbx
916+
; X64-NEXT: setb %r14b
917+
; X64-NEXT: addq %rax, %rbx
918+
; X64-NEXT: movzbl %r14b, %r14d
919+
; X64-NEXT: adcq %rdx, %r14
918920
; X64-NEXT: addq %rax, %rbp
919-
; X64-NEXT: adcq %r12, %r10
921+
; X64-NEXT: adcq %r12, %r8
920922
; X64-NEXT: movzbl %cl, %eax
921-
; X64-NEXT: adcq %rax, %rdi
922-
; X64-NEXT: adcq $0, %rsi
923-
; X64-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
923+
; X64-NEXT: adcq %rax, %rbx
924+
; X64-NEXT: adcq $0, %r14
924925
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
925-
; X64-NEXT: movq %rsi, %r8
926+
; X64-NEXT: movq %rsi, %rdi
926927
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
927-
; X64-NEXT: addq %rax, %r8
928+
; X64-NEXT: addq %rax, %rdi
928929
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
929930
; X64-NEXT: movq %rdx, %rcx
930931
; X64-NEXT: adcq $0, %rcx
931-
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
932-
; X64-NEXT: addq %rbx, %r8
932+
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
933+
; X64-NEXT: addq %r12, %rdi
933934
; X64-NEXT: adcq %rax, %rcx
934935
; X64-NEXT: setb %al
935936
; X64-NEXT: addq %rsi, %rcx
936937
; X64-NEXT: movzbl %al, %esi
937938
; X64-NEXT: adcq %rdx, %rsi
938939
; X64-NEXT: movq %r9, %rax
939940
; X64-NEXT: imulq %r11
940-
; X64-NEXT: movq %rbx, %r11
941+
; X64-NEXT: movq %r12, %r11
941942
; X64-NEXT: addq %rax, %r11
942-
; X64-NEXT: movq %r8, %r12
943+
; X64-NEXT: movq %rdi, %r12
943944
; X64-NEXT: adcq %rdx, %r12
944945
; X64-NEXT: addq %rcx, %r11
945946
; X64-NEXT: adcq %rsi, %r12
946-
; X64-NEXT: movq %r15, %r9
947+
; X64-NEXT: movq %r10, %r9
947948
; X64-NEXT: addq %r13, %r9
948949
; X64-NEXT: adcq $0, %r13
949950
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
950951
; X64-NEXT: addq %rcx, %r9
951952
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
952953
; X64-NEXT: adcq %rsi, %r13
953-
; X64-NEXT: setb %bl
954+
; X64-NEXT: setb %r10b
954955
; X64-NEXT: addq %rcx, %r13
955-
; X64-NEXT: movzbl %bl, %ecx
956+
; X64-NEXT: movzbl %r10b, %ecx
956957
; X64-NEXT: adcq %rsi, %rcx
957-
; X64-NEXT: addq %r15, %rax
958+
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
959+
; X64-NEXT: addq %rsi, %rax
958960
; X64-NEXT: adcq %r9, %rdx
959961
; X64-NEXT: addq %r13, %rax
960962
; X64-NEXT: adcq %rcx, %rdx
961-
; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
962-
; X64-NEXT: adcq %r8, %r9
963+
; X64-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
964+
; X64-NEXT: adcq %rdi, %r9
963965
; X64-NEXT: adcq %r11, %rax
964966
; X64-NEXT: adcq %r12, %rdx
965-
; X64-NEXT: addq %rbp, %r15
966-
; X64-NEXT: adcq %r10, %r9
967-
; X64-NEXT: adcq %rdi, %rax
968-
; X64-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
969-
; X64-NEXT: movq %r14, %rcx
967+
; X64-NEXT: addq %rbp, %rsi
968+
; X64-NEXT: adcq %r8, %r9
969+
; X64-NEXT: adcq %rbx, %rax
970+
; X64-NEXT: adcq %r14, %rdx
971+
; X64-NEXT: movq %r15, %rcx
970972
; X64-NEXT: sarq $63, %rcx
971973
; X64-NEXT: xorq %rcx, %rdx
972974
; X64-NEXT: xorq %rcx, %r9
973975
; X64-NEXT: orq %rdx, %r9
974976
; X64-NEXT: xorq %rcx, %rax
975-
; X64-NEXT: xorq %r15, %rcx
977+
; X64-NEXT: xorq %rsi, %rcx
976978
; X64-NEXT: orq %rax, %rcx
977979
; X64-NEXT: orq %r9, %rcx
978980
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
979981
; X64-NEXT: movl %eax, %esi
980982
; X64-NEXT: andl $1, %esi
981983
; X64-NEXT: movq %rsi, %rdx
982984
; X64-NEXT: negq %rdx
983-
; X64-NEXT: xorq %rdx, %r14
985+
; X64-NEXT: xorq %rdx, %r15
984986
; X64-NEXT: xorq %rax, %rdx
985-
; X64-NEXT: orq %r14, %rdx
987+
; X64-NEXT: orq %r15, %rdx
986988
; X64-NEXT: orq %rcx, %rdx
987989
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
988990
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload

llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -505,8 +505,8 @@ define zeroext i1 @smuloi256(i256 %v1, i256 %v2, ptr %res) {
505505
; X64-NEXT: addq %rax, %r9
506506
; X64-NEXT: adcq %rdx, %rsi
507507
; X64-NEXT: sarq $63, %r12
508-
; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload
509-
; X64-NEXT: mulq %r12
508+
; X64-NEXT: movq %r12, %rax
509+
; X64-NEXT: mulq {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Folded Reload
510510
; X64-NEXT: movq %rdx, %rdi
511511
; X64-NEXT: movq %rax, %rcx
512512
; X64-NEXT: movq %rax, %r14

0 commit comments

Comments
 (0)