Skip to content

Commit 2950a43

Browse files
rotaterighttru
authored andcommitted
[SDAG] avoid generating libcall to function with same name
This is a potentially better alternative to D131452 that also should avoid the infinite loop bug from: issue #56403 This is again a minimal fix to reduce merging pain for the release. But if this makes sense, then we might want to guard all of the RTLIB generation (and other libcalls?) with a similar name check. Differential Revision: https://reviews.llvm.org/D131521 (cherry picked from commit 7f72a0f)
1 parent e2613c5 commit 2950a43

File tree

2 files changed

+52
-25
lines changed

2 files changed

+52
-25
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4428,7 +4428,10 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
44284428
else if (VT == MVT::i128)
44294429
LC = RTLIB::MULO_I128;
44304430

4431-
if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) {
4431+
// If we don't have the libcall or if the function we are compiling is the
4432+
// implementation of the expected libcall (avoid inf-loop), expand inline.
4433+
if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC) ||
4434+
TLI.getLibcallName(LC) == DAG.getMachineFunction().getName()) {
44324435
// FIXME: This is not an optimal expansion, but better than crashing.
44334436
EVT WideVT =
44344437
EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);

llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll

Lines changed: 48 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -30,37 +30,62 @@ start:
3030
ret { i128, i8 } %5
3131
}
3232

33-
define i128 @__muloti4(i128 %0, i128 %1, i32* nocapture nonnull writeonly align 4 %2) #4 {
33+
; PR56403
34+
; We avoid lowering the intrinsic as a libcall because this function has the same name as
35+
; the libcall we wanted to generate (that would create an infinite loop).
36+
37+
define i128 @__muloti4(i128 %0, i128 %1, i32* nocapture nonnull writeonly align 4 %2) #2 {
3438
; AARCH-LABEL: __muloti4:
3539
; AARCH: // %bb.0: // %Entry
36-
; AARCH-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
37-
; AARCH-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
38-
; AARCH-NEXT: mov x19, x4
40+
; AARCH-NEXT: asr x9, x1, #63
41+
; AARCH-NEXT: asr x10, x3, #63
42+
; AARCH-NEXT: umulh x14, x0, x2
43+
; AARCH-NEXT: mov x8, x1
44+
; AARCH-NEXT: mul x11, x2, x9
3945
; AARCH-NEXT: str wzr, [x4]
40-
; AARCH-NEXT: add x4, sp, #8
41-
; AARCH-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
42-
; AARCH-NEXT: mov x21, x3
43-
; AARCH-NEXT: mov x20, x2
44-
; AARCH-NEXT: mov x22, x1
45-
; AARCH-NEXT: str xzr, [sp, #8]
46-
; AARCH-NEXT: bl __muloti4
47-
; AARCH-NEXT: ldr x8, [sp, #8]
48-
; AARCH-NEXT: cmp x8, #0
49-
; AARCH-NEXT: cset w8, ne
50-
; AARCH-NEXT: tbz x22, #63, .LBB1_2
46+
; AARCH-NEXT: umulh x12, x10, x0
47+
; AARCH-NEXT: umulh x13, x2, x9
48+
; AARCH-NEXT: madd x12, x10, x1, x12
49+
; AARCH-NEXT: add x13, x13, x11
50+
; AARCH-NEXT: mul x10, x10, x0
51+
; AARCH-NEXT: madd x9, x3, x9, x13
52+
; AARCH-NEXT: add x12, x12, x10
53+
; AARCH-NEXT: adds x10, x10, x11
54+
; AARCH-NEXT: mul x11, x1, x2
55+
; AARCH-NEXT: adc x9, x12, x9
56+
; AARCH-NEXT: umulh x13, x1, x2
57+
; AARCH-NEXT: mul x12, x0, x3
58+
; AARCH-NEXT: adds x11, x11, x14
59+
; AARCH-NEXT: umulh x14, x0, x3
60+
; AARCH-NEXT: cinc x13, x13, hs
61+
; AARCH-NEXT: adds x1, x12, x11
62+
; AARCH-NEXT: mul x12, x8, x3
63+
; AARCH-NEXT: cinc x11, x14, hs
64+
; AARCH-NEXT: mul x0, x0, x2
65+
; AARCH-NEXT: adds x11, x13, x11
66+
; AARCH-NEXT: umulh x13, x8, x3
67+
; AARCH-NEXT: cset w14, hs
68+
; AARCH-NEXT: adds x11, x12, x11
69+
; AARCH-NEXT: adc x12, x13, x14
70+
; AARCH-NEXT: adds x10, x11, x10
71+
; AARCH-NEXT: adc x9, x12, x9
72+
; AARCH-NEXT: asr x11, x1, #63
73+
; AARCH-NEXT: eor x9, x9, x11
74+
; AARCH-NEXT: eor x10, x10, x11
75+
; AARCH-NEXT: orr x9, x10, x9
76+
; AARCH-NEXT: cmp x9, #0
77+
; AARCH-NEXT: cset w9, ne
78+
; AARCH-NEXT: tbz x8, #63, .LBB1_2
5179
; AARCH-NEXT: // %bb.1: // %Entry
52-
; AARCH-NEXT: eor x9, x21, #0x8000000000000000
53-
; AARCH-NEXT: orr x9, x20, x9
54-
; AARCH-NEXT: cbz x9, .LBB1_3
80+
; AARCH-NEXT: eor x8, x3, #0x8000000000000000
81+
; AARCH-NEXT: orr x8, x2, x8
82+
; AARCH-NEXT: cbz x8, .LBB1_3
5583
; AARCH-NEXT: .LBB1_2: // %Else2
56-
; AARCH-NEXT: cbz w8, .LBB1_4
84+
; AARCH-NEXT: cbz w9, .LBB1_4
5785
; AARCH-NEXT: .LBB1_3: // %Then7
5886
; AARCH-NEXT: mov w8, #1
59-
; AARCH-NEXT: str w8, [x19]
87+
; AARCH-NEXT: str w8, [x4]
6088
; AARCH-NEXT: .LBB1_4: // %Block9
61-
; AARCH-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
62-
; AARCH-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
63-
; AARCH-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
6489
; AARCH-NEXT: ret
6590
Entry:
6691
store i32 0, i32* %2, align 4
@@ -90,4 +115,3 @@ declare { i128, i1 } @llvm.smul.with.overflow.i128(i128, i128) #1
90115
attributes #0 = { nounwind readnone uwtable }
91116
attributes #1 = { nounwind readnone speculatable }
92117
attributes #2 = { nounwind }
93-
attributes #4 = { nounwind mustprogress nobuiltin }

0 commit comments

Comments
 (0)