Skip to content

Commit 22d7bee

Browse files
aaronpucherttstellar
authored andcommitted
[PPCISelLowering] Avoid emitting calls to __multi3, __muloti4
After D108936, @llvm.smul.with.overflow.i64 was lowered to __multi3 instead of __mulodi4, which also doesn't exist on PowerPC 32-bit, not even with compiler-rt. Block it as well so that we get inline code. Because libgcc doesn't have __muloti4, we block that as well. Fixes #54460. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D122090
1 parent 23d0827 commit 22d7bee

File tree

4 files changed

+127
-117
lines changed

4 files changed

+127
-117
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1305,11 +1305,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
13051305
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
13061306
}
13071307

1308+
setLibcallName(RTLIB::MULO_I128, nullptr);
13081309
if (!isPPC64) {
13091310
// These libcalls are not available in 32-bit.
13101311
setLibcallName(RTLIB::SHL_I128, nullptr);
13111312
setLibcallName(RTLIB::SRL_I128, nullptr);
13121313
setLibcallName(RTLIB::SRA_I128, nullptr);
1314+
setLibcallName(RTLIB::MUL_I128, nullptr);
13131315
setLibcallName(RTLIB::MULO_I64, nullptr);
13141316
}
13151317

llvm/test/CodeGen/PowerPC/overflow-intrinsic-optimizations.ll

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
; RUN: llc %s -mtriple=powerpc -o - | FileCheck %s
2+
; RUN: llc %s -mtriple=powerpc64 -o - | FileCheck %s
23

34
define i1 @no__mulodi4(i32 %a, i64 %b, i32* %c) {
45
; CHECK-LABEL: no__mulodi4
56
; CHECK-NOT: bl __mulodi4
7+
; CHECK-NOT: bl __multi3
68
entry:
79
%0 = sext i32 %a to i64
810
%1 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %0, i64 %b)
@@ -16,4 +18,14 @@ entry:
1618
ret i1 %7
1719
}
1820

21+
define i1 @no__muloti4(i128 %a, i128 %b) {
22+
; CHECK-LABEL: no__muloti4
23+
; CHECK-NOT: bl __muloti4
24+
entry:
25+
%0 = call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %a, i128 %b)
26+
%1 = extractvalue { i128, i1 } %0, 1
27+
ret i1 %1
28+
}
29+
1930
declare { i64, i1 } @llvm.smul.with.overflow.i64(i64, i64)
31+
declare { i128, i1 } @llvm.smul.with.overflow.i128(i128, i128)

llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll

Lines changed: 81 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -32,110 +32,103 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
3232
;
3333
; PPC32-LABEL: muloti_test:
3434
; PPC32: # %bb.0: # %start
35-
; PPC32-NEXT: mflr 0
36-
; PPC32-NEXT: stw 0, 4(1)
3735
; PPC32-NEXT: stwu 1, -64(1)
38-
; PPC32-NEXT: stw 24, 32(1) # 4-byte Folded Spill
39-
; PPC32-NEXT: mfcr 12
4036
; PPC32-NEXT: stw 26, 40(1) # 4-byte Folded Spill
41-
; PPC32-NEXT: mr 26, 7
42-
; PPC32-NEXT: stw 28, 48(1) # 4-byte Folded Spill
43-
; PPC32-NEXT: mr 28, 4
44-
; PPC32-NEXT: stw 29, 52(1) # 4-byte Folded Spill
45-
; PPC32-NEXT: mr 29, 8
46-
; PPC32-NEXT: mr 24, 3
47-
; PPC32-NEXT: li 3, 0
48-
; PPC32-NEXT: li 4, 0
49-
; PPC32-NEXT: li 7, 0
50-
; PPC32-NEXT: li 8, 0
37+
; PPC32-NEXT: mulhwu. 26, 7, 6
38+
; PPC32-NEXT: mcrf 1, 0
39+
; PPC32-NEXT: mfcr 12
40+
; PPC32-NEXT: cmpwi 7, 5, 0
41+
; PPC32-NEXT: cmpwi 2, 7, 0
42+
; PPC32-NEXT: stw 22, 24(1) # 4-byte Folded Spill
43+
; PPC32-NEXT: mulhwu. 26, 5, 8
44+
; PPC32-NEXT: mcrf 5, 0
5145
; PPC32-NEXT: stw 23, 28(1) # 4-byte Folded Spill
52-
; PPC32-NEXT: mr 23, 5
46+
; PPC32-NEXT: crnor 28, 30, 10
47+
; PPC32-NEXT: stw 29, 52(1) # 4-byte Folded Spill
48+
; PPC32-NEXT: cmpwi 2, 9, 0
49+
; PPC32-NEXT: mulhwu. 26, 3, 10
50+
; PPC32-NEXT: mcrf 6, 0
51+
; PPC32-NEXT: cmpwi 3, 3, 0
52+
; PPC32-NEXT: stw 24, 32(1) # 4-byte Folded Spill
53+
; PPC32-NEXT: crnor 29, 10, 14
5354
; PPC32-NEXT: stw 25, 36(1) # 4-byte Folded Spill
54-
; PPC32-NEXT: mr 25, 9
55+
; PPC32-NEXT: mulhwu. 26, 9, 4
5556
; PPC32-NEXT: stw 27, 44(1) # 4-byte Folded Spill
56-
; PPC32-NEXT: mr 27, 6
57+
; PPC32-NEXT: crorc 28, 28, 6
58+
; PPC32-NEXT: stw 28, 48(1) # 4-byte Folded Spill
59+
; PPC32-NEXT: crorc 20, 28, 22
5760
; PPC32-NEXT: stw 30, 56(1) # 4-byte Folded Spill
58-
; PPC32-NEXT: mr 30, 10
59-
; PPC32-NEXT: stw 12, 24(1)
60-
; PPC32-NEXT: bl __multi3
61-
; PPC32-NEXT: mulhwu. 9, 26, 27
62-
; PPC32-NEXT: mfcr 9 # cr0
63-
; PPC32-NEXT: cmpwi 2, 26, 0
64-
; PPC32-NEXT: stw 9, 20(1)
65-
; PPC32-NEXT: cmpwi 3, 23, 0
66-
; PPC32-NEXT: crnor 12, 14, 10
67-
; PPC32-NEXT: cmpwi 4, 24, 0
68-
; PPC32-NEXT: mulhwu. 9, 23, 29
69-
; PPC32-NEXT: mcrf 5, 0
70-
; PPC32-NEXT: cmpwi 1, 25, 0
71-
; PPC32-NEXT: crnor 4, 6, 18
72-
; PPC32-NEXT: mulhwu. 9, 24, 30
73-
; PPC32-NEXT: mcrf 6, 0
74-
; PPC32-NEXT: mulhwu. 0, 25, 28
75-
; PPC32-NEXT: mcrf 7, 0
76-
; PPC32-NEXT: or. 0, 28, 24
77-
; PPC32-NEXT: mcrf 2, 0
78-
; PPC32-NEXT: or. 0, 29, 26
79-
; PPC32-NEXT: crnor 5, 2, 10
80-
; PPC32-NEXT: mullw 10, 26, 27
81-
; PPC32-NEXT: lwz 26, 20(1)
82-
; PPC32-NEXT: mullw 9, 23, 29
83-
; PPC32-NEXT: add 9, 10, 9
84-
; PPC32-NEXT: mtcrf 128, 26 # cr0
85-
; PPC32-NEXT: crorc 6, 12, 2
86-
; PPC32-NEXT: crorc 20, 6, 22
87-
; PPC32-NEXT: mulhwu 7, 29, 27
88-
; PPC32-NEXT: add 9, 7, 9
89-
; PPC32-NEXT: cmplw 9, 7
90-
; PPC32-NEXT: crorc 21, 4, 26
91-
; PPC32-NEXT: cror 20, 20, 0
92-
; PPC32-NEXT: crorc 21, 21, 30
93-
; PPC32-NEXT: mullw 11, 25, 28
94-
; PPC32-NEXT: mullw 12, 24, 30
95-
; PPC32-NEXT: add 10, 12, 11
96-
; PPC32-NEXT: lwz 12, 24(1)
97-
; PPC32-NEXT: lwz 26, 40(1) # 4-byte Folded Reload
98-
; PPC32-NEXT: mulhwu 8, 28, 30
99-
; PPC32-NEXT: add 10, 8, 10
100-
; PPC32-NEXT: cmplw 10, 8
101-
; PPC32-NEXT: cror 21, 21, 0
102-
; PPC32-NEXT: cror 21, 5, 21
61+
; PPC32-NEXT: mulhwu 0, 6, 10
62+
; PPC32-NEXT: stw 12, 20(1)
63+
; PPC32-NEXT: crorc 21, 29, 26
64+
; PPC32-NEXT: crorc 21, 21, 2
65+
; PPC32-NEXT: li 11, 0
66+
; PPC32-NEXT: mullw 26, 5, 10
67+
; PPC32-NEXT: addc 0, 26, 0
68+
; PPC32-NEXT: mulhwu 29, 5, 10
69+
; PPC32-NEXT: addze 29, 29
70+
; PPC32-NEXT: mullw 23, 5, 8
71+
; PPC32-NEXT: mullw 22, 7, 6
72+
; PPC32-NEXT: mulhwu 30, 6, 9
73+
; PPC32-NEXT: mulhwu 12, 5, 9
74+
; PPC32-NEXT: mulhwu 28, 8, 6
75+
; PPC32-NEXT: mullw 25, 6, 9
76+
; PPC32-NEXT: mullw 24, 5, 9
77+
; PPC32-NEXT: mullw 5, 9, 4
78+
; PPC32-NEXT: add 9, 22, 23
79+
; PPC32-NEXT: add 9, 28, 9
80+
; PPC32-NEXT: cmplw 1, 9, 28
81+
; PPC32-NEXT: cror 20, 20, 4
82+
; PPC32-NEXT: mullw 23, 3, 10
83+
; PPC32-NEXT: add 26, 23, 5
84+
; PPC32-NEXT: addc 5, 25, 0
85+
; PPC32-NEXT: addze 30, 30
86+
; PPC32-NEXT: or. 3, 4, 3
87+
; PPC32-NEXT: mulhwu 27, 4, 10
88+
; PPC32-NEXT: mcrf 1, 0
89+
; PPC32-NEXT: addc 3, 29, 30
90+
; PPC32-NEXT: add 26, 27, 26
91+
; PPC32-NEXT: cmplw 6, 26, 27
92+
; PPC32-NEXT: cror 21, 21, 24
93+
; PPC32-NEXT: mullw 0, 4, 10
94+
; PPC32-NEXT: or. 4, 8, 7
95+
; PPC32-NEXT: addze 4, 11
96+
; PPC32-NEXT: addc 7, 24, 3
97+
; PPC32-NEXT: crnor 22, 2, 6
98+
; PPC32-NEXT: mullw 28, 8, 6
99+
; PPC32-NEXT: adde 8, 12, 4
100+
; PPC32-NEXT: addc 3, 0, 28
101+
; PPC32-NEXT: adde 9, 26, 9
102+
; PPC32-NEXT: addc 4, 7, 3
103+
; PPC32-NEXT: adde 3, 8, 9
104+
; PPC32-NEXT: cror 21, 22, 21
105+
; PPC32-NEXT: cmplw 4, 7
106+
; PPC32-NEXT: cmplw 1, 3, 8
107+
; PPC32-NEXT: lwz 12, 20(1)
103108
; PPC32-NEXT: cror 20, 21, 20
104-
; PPC32-NEXT: mullw 0, 29, 27
109+
; PPC32-NEXT: crandc 21, 4, 6
110+
; PPC32-NEXT: crand 22, 6, 0
111+
; PPC32-NEXT: cror 21, 22, 21
112+
; PPC32-NEXT: crnor 20, 20, 21
113+
; PPC32-NEXT: li 7, 1
114+
; PPC32-NEXT: mullw 6, 6, 10
115+
; PPC32-NEXT: bc 12, 20, .LBB0_1
116+
; PPC32-NEXT: b .LBB0_2
117+
; PPC32-NEXT: .LBB0_1: # %start
118+
; PPC32-NEXT: li 7, 0
119+
; PPC32-NEXT: .LBB0_2: # %start
105120
; PPC32-NEXT: mtcrf 32, 12 # cr2
106121
; PPC32-NEXT: mtcrf 16, 12 # cr3
107-
; PPC32-NEXT: mtcrf 8, 12 # cr4
108-
; PPC32-NEXT: lwz 29, 52(1) # 4-byte Folded Reload
109-
; PPC32-NEXT: mullw 7, 28, 30
110-
; PPC32-NEXT: addc 7, 7, 0
111-
; PPC32-NEXT: adde 11, 10, 9
112-
; PPC32-NEXT: addc 9, 4, 7
113-
; PPC32-NEXT: adde 8, 3, 11
114-
; PPC32-NEXT: cmplw 6, 9, 4
115-
; PPC32-NEXT: cmplw 8, 3
116-
; PPC32-NEXT: crand 22, 2, 24
117-
; PPC32-NEXT: crandc 23, 0, 2
118-
; PPC32-NEXT: cror 22, 22, 23
119-
; PPC32-NEXT: crnor 20, 20, 22
120-
; PPC32-NEXT: li 3, 1
121-
; PPC32-NEXT: bc 12, 20, .LBB0_2
122-
; PPC32-NEXT: # %bb.1: # %start
123-
; PPC32-NEXT: ori 7, 3, 0
124-
; PPC32-NEXT: b .LBB0_3
125-
; PPC32-NEXT: .LBB0_2: # %start
126-
; PPC32-NEXT: li 7, 0
127-
; PPC32-NEXT: .LBB0_3: # %start
128-
; PPC32-NEXT: mr 3, 8
129-
; PPC32-NEXT: mr 4, 9
130122
; PPC32-NEXT: lwz 30, 56(1) # 4-byte Folded Reload
123+
; PPC32-NEXT: lwz 29, 52(1) # 4-byte Folded Reload
131124
; PPC32-NEXT: lwz 28, 48(1) # 4-byte Folded Reload
132125
; PPC32-NEXT: lwz 27, 44(1) # 4-byte Folded Reload
126+
; PPC32-NEXT: lwz 26, 40(1) # 4-byte Folded Reload
133127
; PPC32-NEXT: lwz 25, 36(1) # 4-byte Folded Reload
134128
; PPC32-NEXT: lwz 24, 32(1) # 4-byte Folded Reload
135129
; PPC32-NEXT: lwz 23, 28(1) # 4-byte Folded Reload
136-
; PPC32-NEXT: lwz 0, 68(1)
130+
; PPC32-NEXT: lwz 22, 24(1) # 4-byte Folded Reload
137131
; PPC32-NEXT: addi 1, 1, 64
138-
; PPC32-NEXT: mtlr 0
139132
; PPC32-NEXT: blr
140133
start:
141134
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2

llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll

Lines changed: 32 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -208,43 +208,46 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
208208
define i1 @test_urem_oversized(i66 %X) nounwind {
209209
; PPC-LABEL: test_urem_oversized:
210210
; PPC: # %bb.0:
211-
; PPC-NEXT: mflr 0
212-
; PPC-NEXT: stw 0, 4(1)
213-
; PPC-NEXT: stwu 1, -16(1)
214-
; PPC-NEXT: mr 6, 5
215-
; PPC-NEXT: mr 5, 4
216-
; PPC-NEXT: mr 4, 3
217-
; PPC-NEXT: lis 3, 12057
218-
; PPC-NEXT: lis 7, -12795
219-
; PPC-NEXT: ori 9, 3, 37186
220-
; PPC-NEXT: ori 10, 7, 40665
221-
; PPC-NEXT: li 3, 0
222-
; PPC-NEXT: li 7, 0
223-
; PPC-NEXT: li 8, 2
224-
; PPC-NEXT: bl __multi3
225-
; PPC-NEXT: rotlwi 7, 6, 31
226-
; PPC-NEXT: lis 3, -5526
227-
; PPC-NEXT: rlwimi 7, 5, 31, 0, 0
228-
; PPC-NEXT: rotlwi 5, 5, 31
229-
; PPC-NEXT: rlwimi 5, 4, 31, 0, 0
230-
; PPC-NEXT: ori 3, 3, 61135
231-
; PPC-NEXT: cmplwi 1, 5, 13
232-
; PPC-NEXT: cmplw 7, 3
233-
; PPC-NEXT: rlwinm 4, 4, 31, 31, 31
211+
; PPC-NEXT: lis 6, -12795
212+
; PPC-NEXT: ori 6, 6, 40665
213+
; PPC-NEXT: mulhwu 7, 5, 6
214+
; PPC-NEXT: lis 9, 12057
215+
; PPC-NEXT: ori 9, 9, 37186
216+
; PPC-NEXT: mullw 11, 4, 6
217+
; PPC-NEXT: addc 7, 11, 7
218+
; PPC-NEXT: lis 11, -5526
219+
; PPC-NEXT: ori 11, 11, 61135
220+
; PPC-NEXT: mulhwu 8, 4, 6
221+
; PPC-NEXT: addze 8, 8
222+
; PPC-NEXT: mulhwu 10, 5, 9
223+
; PPC-NEXT: mullw 4, 4, 9
224+
; PPC-NEXT: mullw 9, 5, 9
225+
; PPC-NEXT: addc 7, 9, 7
226+
; PPC-NEXT: addze 9, 10
227+
; PPC-NEXT: rotlwi 10, 7, 31
228+
; PPC-NEXT: mullw 3, 3, 6
229+
; PPC-NEXT: mullw 6, 5, 6
230+
; PPC-NEXT: slwi 5, 5, 1
231+
; PPC-NEXT: add 3, 5, 3
232+
; PPC-NEXT: rotlwi 5, 6, 31
233+
; PPC-NEXT: rlwimi 5, 7, 31, 0, 0
234+
; PPC-NEXT: add 7, 8, 9
235+
; PPC-NEXT: add 4, 4, 7
236+
; PPC-NEXT: add 3, 4, 3
237+
; PPC-NEXT: rlwimi 10, 3, 31, 0, 0
238+
; PPC-NEXT: cmplw 5, 11
239+
; PPC-NEXT: cmplwi 1, 10, 13
240+
; PPC-NEXT: rlwinm 3, 3, 31, 31, 31
234241
; PPC-NEXT: crand 20, 6, 0
235242
; PPC-NEXT: crandc 21, 4, 6
236-
; PPC-NEXT: rlwimi. 4, 6, 1, 30, 30
243+
; PPC-NEXT: rlwimi. 3, 6, 1, 30, 30
237244
; PPC-NEXT: cror 20, 20, 21
238245
; PPC-NEXT: crnand 20, 2, 20
239246
; PPC-NEXT: li 3, 1
240247
; PPC-NEXT: bc 12, 20, .LBB5_1
241-
; PPC-NEXT: b .LBB5_2
248+
; PPC-NEXT: blr
242249
; PPC-NEXT: .LBB5_1:
243250
; PPC-NEXT: li 3, 0
244-
; PPC-NEXT: .LBB5_2:
245-
; PPC-NEXT: lwz 0, 20(1)
246-
; PPC-NEXT: addi 1, 1, 16
247-
; PPC-NEXT: mtlr 0
248251
; PPC-NEXT: blr
249252
;
250253
; PPC64LE-LABEL: test_urem_oversized:

0 commit comments

Comments
 (0)