@@ -30,37 +30,62 @@ start:
30
30
ret { i128 , i8 } %5
31
31
}
32
32
33
- define i128 @__muloti4 (i128 %0 , i128 %1 , i32* nocapture nonnull writeonly align 4 %2 ) #4 {
33
+ ; PR56403
34
+ ; We avoid lowering the intrinsic as a libcall because this function has the same name as
35
+ ; the libcall we wanted to generate (that would create an infinite loop).
36
+
37
+ define i128 @__muloti4 (i128 %0 , i128 %1 , i32* nocapture nonnull writeonly align 4 %2 ) #2 {
34
38
; AARCH-LABEL: __muloti4:
35
39
; AARCH: // %bb.0: // %Entry
36
- ; AARCH-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
37
- ; AARCH-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
38
- ; AARCH-NEXT: mov x19, x4
40
+ ; AARCH-NEXT: asr x9, x1, #63
41
+ ; AARCH-NEXT: asr x10, x3, #63
42
+ ; AARCH-NEXT: umulh x14, x0, x2
43
+ ; AARCH-NEXT: mov x8, x1
44
+ ; AARCH-NEXT: mul x11, x2, x9
39
45
; AARCH-NEXT: str wzr, [x4]
40
- ; AARCH-NEXT: add x4, sp, #8
41
- ; AARCH-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
42
- ; AARCH-NEXT: mov x21, x3
43
- ; AARCH-NEXT: mov x20, x2
44
- ; AARCH-NEXT: mov x22, x1
45
- ; AARCH-NEXT: str xzr, [sp, #8]
46
- ; AARCH-NEXT: bl __muloti4
47
- ; AARCH-NEXT: ldr x8, [sp, #8]
48
- ; AARCH-NEXT: cmp x8, #0
49
- ; AARCH-NEXT: cset w8, ne
50
- ; AARCH-NEXT: tbz x22, #63, .LBB1_2
46
+ ; AARCH-NEXT: umulh x12, x10, x0
47
+ ; AARCH-NEXT: umulh x13, x2, x9
48
+ ; AARCH-NEXT: madd x12, x10, x1, x12
49
+ ; AARCH-NEXT: add x13, x13, x11
50
+ ; AARCH-NEXT: mul x10, x10, x0
51
+ ; AARCH-NEXT: madd x9, x3, x9, x13
52
+ ; AARCH-NEXT: add x12, x12, x10
53
+ ; AARCH-NEXT: adds x10, x10, x11
54
+ ; AARCH-NEXT: mul x11, x1, x2
55
+ ; AARCH-NEXT: adc x9, x12, x9
56
+ ; AARCH-NEXT: umulh x13, x1, x2
57
+ ; AARCH-NEXT: mul x12, x0, x3
58
+ ; AARCH-NEXT: adds x11, x11, x14
59
+ ; AARCH-NEXT: umulh x14, x0, x3
60
+ ; AARCH-NEXT: cinc x13, x13, hs
61
+ ; AARCH-NEXT: adds x1, x12, x11
62
+ ; AARCH-NEXT: mul x12, x8, x3
63
+ ; AARCH-NEXT: cinc x11, x14, hs
64
+ ; AARCH-NEXT: mul x0, x0, x2
65
+ ; AARCH-NEXT: adds x11, x13, x11
66
+ ; AARCH-NEXT: umulh x13, x8, x3
67
+ ; AARCH-NEXT: cset w14, hs
68
+ ; AARCH-NEXT: adds x11, x12, x11
69
+ ; AARCH-NEXT: adc x12, x13, x14
70
+ ; AARCH-NEXT: adds x10, x11, x10
71
+ ; AARCH-NEXT: adc x9, x12, x9
72
+ ; AARCH-NEXT: asr x11, x1, #63
73
+ ; AARCH-NEXT: eor x9, x9, x11
74
+ ; AARCH-NEXT: eor x10, x10, x11
75
+ ; AARCH-NEXT: orr x9, x10, x9
76
+ ; AARCH-NEXT: cmp x9, #0
77
+ ; AARCH-NEXT: cset w9, ne
78
+ ; AARCH-NEXT: tbz x8, #63, .LBB1_2
51
79
; AARCH-NEXT: // %bb.1: // %Entry
52
- ; AARCH-NEXT: eor x9, x21 , #0x8000000000000000
53
- ; AARCH-NEXT: orr x9, x20, x9
54
- ; AARCH-NEXT: cbz x9 , .LBB1_3
80
+ ; AARCH-NEXT: eor x8, x3 , #0x8000000000000000
81
+ ; AARCH-NEXT: orr x8, x2, x8
82
+ ; AARCH-NEXT: cbz x8 , .LBB1_3
55
83
; AARCH-NEXT: .LBB1_2: // %Else2
56
- ; AARCH-NEXT: cbz w8 , .LBB1_4
84
+ ; AARCH-NEXT: cbz w9 , .LBB1_4
57
85
; AARCH-NEXT: .LBB1_3: // %Then7
58
86
; AARCH-NEXT: mov w8, #1
59
- ; AARCH-NEXT: str w8, [x19 ]
87
+ ; AARCH-NEXT: str w8, [x4 ]
60
88
; AARCH-NEXT: .LBB1_4: // %Block9
61
- ; AARCH-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
62
- ; AARCH-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
63
- ; AARCH-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
64
89
; AARCH-NEXT: ret
65
90
Entry:
66
91
store i32 0 , i32* %2 , align 4
@@ -90,4 +115,3 @@ declare { i128, i1 } @llvm.smul.with.overflow.i128(i128, i128) #1
90
115
attributes #0 = { nounwind readnone uwtable }
91
116
attributes #1 = { nounwind readnone speculatable }
92
117
attributes #2 = { nounwind }
93
- attributes #4 = { nounwind mustprogress nobuiltin }
0 commit comments