[SDAG] avoid generating libcall to function with same name

rotateright · tru · commit 2950a4386761 · 2022-08-22T11:35:21.000+02:00
This is a potentially better alternative to D131452 that also should avoid the infinite loop bug from: issue #56403 This is again a minimal fix to reduce merging pain for the release. But if this makes sense, then we might want to guard all of the RTLIB generation (and other libcalls?) with a similar name check. Differential Revision: https://reviews.llvm.org/D131521 (cherry picked from commit 7f72a0f)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -4428,7 +4428,10 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
   else if (VT == MVT::i128)
     LC = RTLIB::MULO_I128;
 
-  if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) {
+  // If we don't have the libcall or if the function we are compiling is the
+  // implementation of the expected libcall (avoid inf-loop), expand inline.
+  if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC) ||
+      TLI.getLibcallName(LC) == DAG.getMachineFunction().getName()) {
     // FIXME: This is not an optimal expansion, but better than crashing.
     EVT WideVT =
         EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
diff --git a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll
@@ -30,37 +30,62 @@ start:
   ret { i128, i8 } %5
 }
 
-define i128 @__muloti4(i128 %0, i128 %1, i32* nocapture nonnull writeonly align 4 %2) #4 {
+; PR56403
+; We avoid lowering the intrinsic as a libcall because this function has the same name as
+; the libcall we wanted to generate (that would create an infinite loop).
+
+define i128 @__muloti4(i128 %0, i128 %1, i32* nocapture nonnull writeonly align 4 %2) #2 {
 ; AARCH-LABEL: __muloti4:
 ; AARCH:       // %bb.0: // %Entry
-; AARCH-NEXT:    str x30, [sp, #-48]! // 8-byte Folded Spill
-; AARCH-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
-; AARCH-NEXT:    mov x19, x4
+; AARCH-NEXT:    asr x9, x1, #63
+; AARCH-NEXT:    asr x10, x3, #63
+; AARCH-NEXT:    umulh x14, x0, x2
+; AARCH-NEXT:    mov x8, x1
+; AARCH-NEXT:    mul x11, x2, x9
 ; AARCH-NEXT:    str wzr, [x4]
-; AARCH-NEXT:    add x4, sp, #8
-; AARCH-NEXT:    stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; AARCH-NEXT:    mov x21, x3
-; AARCH-NEXT:    mov x20, x2
-; AARCH-NEXT:    mov x22, x1
-; AARCH-NEXT:    str xzr, [sp, #8]
-; AARCH-NEXT:    bl __muloti4
-; AARCH-NEXT:    ldr x8, [sp, #8]
-; AARCH-NEXT:    cmp x8, #0
-; AARCH-NEXT:    cset w8, ne
-; AARCH-NEXT:    tbz x22, #63, .LBB1_2
+; AARCH-NEXT:    umulh x12, x10, x0
+; AARCH-NEXT:    umulh x13, x2, x9
+; AARCH-NEXT:    madd x12, x10, x1, x12
+; AARCH-NEXT:    add x13, x13, x11
+; AARCH-NEXT:    mul x10, x10, x0
+; AARCH-NEXT:    madd x9, x3, x9, x13
+; AARCH-NEXT:    add x12, x12, x10
+; AARCH-NEXT:    adds x10, x10, x11
+; AARCH-NEXT:    mul x11, x1, x2
+; AARCH-NEXT:    adc x9, x12, x9
+; AARCH-NEXT:    umulh x13, x1, x2
+; AARCH-NEXT:    mul x12, x0, x3
+; AARCH-NEXT:    adds x11, x11, x14
+; AARCH-NEXT:    umulh x14, x0, x3
+; AARCH-NEXT:    cinc x13, x13, hs
+; AARCH-NEXT:    adds x1, x12, x11
+; AARCH-NEXT:    mul x12, x8, x3
+; AARCH-NEXT:    cinc x11, x14, hs
+; AARCH-NEXT:    mul x0, x0, x2
+; AARCH-NEXT:    adds x11, x13, x11
+; AARCH-NEXT:    umulh x13, x8, x3
+; AARCH-NEXT:    cset w14, hs
+; AARCH-NEXT:    adds x11, x12, x11
+; AARCH-NEXT:    adc x12, x13, x14
+; AARCH-NEXT:    adds x10, x11, x10
+; AARCH-NEXT:    adc x9, x12, x9
+; AARCH-NEXT:    asr x11, x1, #63
+; AARCH-NEXT:    eor x9, x9, x11
+; AARCH-NEXT:    eor x10, x10, x11
+; AARCH-NEXT:    orr x9, x10, x9
+; AARCH-NEXT:    cmp x9, #0
+; AARCH-NEXT:    cset w9, ne
+; AARCH-NEXT:    tbz x8, #63, .LBB1_2
 ; AARCH-NEXT:  // %bb.1: // %Entry
-; AARCH-NEXT:    eor x9, x21, #0x8000000000000000
-; AARCH-NEXT:    orr x9, x20, x9
-; AARCH-NEXT:    cbz x9, .LBB1_3
+; AARCH-NEXT:    eor x8, x3, #0x8000000000000000
+; AARCH-NEXT:    orr x8, x2, x8
+; AARCH-NEXT:    cbz x8, .LBB1_3
 ; AARCH-NEXT:  .LBB1_2: // %Else2
-; AARCH-NEXT:    cbz w8, .LBB1_4
+; AARCH-NEXT:    cbz w9, .LBB1_4
 ; AARCH-NEXT:  .LBB1_3: // %Then7
 ; AARCH-NEXT:    mov w8, #1
-; AARCH-NEXT:    str w8, [x19]
+; AARCH-NEXT:    str w8, [x4]
 ; AARCH-NEXT:  .LBB1_4: // %Block9
-; AARCH-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
-; AARCH-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
-; AARCH-NEXT:    ldr x30, [sp], #48 // 8-byte Folded Reload
 ; AARCH-NEXT:    ret
 Entry:
   store i32 0, i32* %2, align 4
@@ -90,4 +115,3 @@ declare { i128, i1 } @llvm.smul.with.overflow.i128(i128, i128) #1
 attributes #0 = { nounwind readnone uwtable }
 attributes #1 = { nounwind readnone speculatable }
 attributes #2 = { nounwind }
-attributes #4 = { nounwind mustprogress nobuiltin }