Skip to content

Commit fd1e269

Browse files
committed
Alternate fake_use missing pop fix
1 parent b522657 commit fd1e269

File tree

10 files changed

+100
-51
lines changed

10 files changed

+100
-51
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -435,21 +435,8 @@ bool DAGTypeLegalizer::run() {
435435
#endif
436436
PerformExpensiveChecks();
437437

438-
// Get the value of the original root after type legalization.
439-
SDValue Root = Dummy.getValue();
440-
441-
// Get the current root value, if it's not null combine it with the original
442-
// root to prevent it being removed as a dead node.
443-
if (SDValue LegalRoot = DAG.getRoot()) {
444-
Root = DAG.getNode(ISD::TokenFactor, SDLoc(LegalRoot), MVT::Other, Root,
445-
LegalRoot);
446-
// The token_factor should not need any legalization (as both inputs have
447-
// already been legalized).
448-
Root->setNodeId(Processed);
449-
}
450-
451-
// Restore the root.
452-
DAG.setRoot(Root);
438+
// If the root changed (e.g. it was a dead load) update the root.
439+
DAG.setRoot(Dummy.getValue());
453440

454441
// Remove dead nodes. This is important to do for cleanliness but also before
455442
// the checking loop below. Implicit folding by the DAG.getNode operators and

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2616,6 +2616,20 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
26162616

26172617
auto [Call, CallChain] = TLI->LowerCallTo(CLI);
26182618

2619+
if (CallRetResNo && !Node->hasAnyUseOfValue(*CallRetResNo)) {
2620+
// FIXME: This is needed for x87, which uses a floating-point stack. If (for
2621+
// example) the node to be expanded has two results one floating-point which
2622+
// is returned by the call, and one integer result, returned via an output
2623+
// pointer. If only the integer result is used then the `CopyFromReg` for
2624+
// the FP result may be optimized out. This prevents an FP stack pop from
2625+
// being emitted for it. The `FAKE_USE` node prevents optimizations from
2626+
// removing the `CopyFromReg` from the chain, and ensures the FP pop will be
2627+
// emitted. Note: We use an undef pointer as the argument to prevent keeping
2628+
// any real values live longer than we need to.
2629+
CallChain = getNode(ISD::FAKE_USE, DL, MVT::Other, CallChain,
2630+
getUNDEF(TLI->getPointerTy(getDataLayout())));
2631+
}
2632+
26192633
for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) {
26202634
if (ResNo == CallRetResNo) {
26212635
Results.push_back(Call);
@@ -2635,24 +2649,6 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
26352649
Results.push_back(LoadResult);
26362650
}
26372651

2638-
if (CallRetResNo && !Node->hasAnyUseOfValue(*CallRetResNo)) {
2639-
// FIXME: Find a way to avoid updating the root. This is needed for x86,
2640-
// which uses a floating-point stack. If (for example) the node to be
2641-
// expanded has two results one floating-point which is returned by the
2642-
// call, and one integer result, returned via an output pointer. If only the
2643-
// integer result is used then the `CopyFromReg` for the FP result may be
2644-
// optimized out. This prevents an FP stack pop from being emitted for it.
2645-
// Setting the root like this ensures there will be a use of the
2646-
// `CopyFromReg` chain, and ensures the FP pop will be emitted.
2647-
SDValue OldRoot = getRoot();
2648-
SDValue NewRoot =
2649-
OldRoot ? getNode(ISD::TokenFactor, DL, MVT::Other, OldRoot, CallChain)
2650-
: CallChain;
2651-
setRoot(NewRoot);
2652-
// Ensure the new root is reachable from the results.
2653-
Results[0] = getMergeValues({Results[0], NewRoot}, DL);
2654-
}
2655-
26562652
return true;
26572653
}
26582654

llvm/test/CodeGen/AArch64/llvm.modf.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ define half @test_modf_f16_only_use_integral_part(half %a) {
4545
; CHECK-NEXT: fcvt s0, h0
4646
; CHECK-NEXT: add x0, sp, #12
4747
; CHECK-NEXT: bl modff
48+
; CHECK-NEXT: // fake_use: $x0
4849
; CHECK-NEXT: ldr s0, [sp, #12]
4950
; CHECK-NEXT: fcvt h0, s0
5051
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload

llvm/test/CodeGen/ARM/llvm.frexp.ll

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ define i32 @test_frexp_f16_i32_only_use_exp(half %a) {
4141
; CHECK-NEXT: bl __gnu_h2f_ieee
4242
; CHECK-NEXT: add r1, sp, #4
4343
; CHECK-NEXT: bl frexpf
44+
; CHECK-NEXT: @ fake_use: $r0
4445
; CHECK-NEXT: ldr r0, [sp, #4]
4546
; CHECK-NEXT: add sp, #8
4647
; CHECK-NEXT: pop {r7, pc}
@@ -132,6 +133,8 @@ define <2 x i32> @test_frexp_v2f16_v2i32_only_use_exp(<2 x half> %a) {
132133
; CHECK-NEXT: mov r1, r4
133134
; CHECK-NEXT: bl frexpf
134135
; CHECK-NEXT: vld1.32 {d16[0]}, [r5:32]
136+
; CHECK-NEXT: @ fake_use: $r0
137+
; CHECK-NEXT: @ fake_use: $r0
135138
; CHECK-NEXT: vld1.32 {d16[1]}, [r4:32]
136139
; CHECK-NEXT: vmov r0, r1, d16
137140
; CHECK-NEXT: add sp, #8
@@ -190,6 +193,7 @@ define i32 @test_frexp_f32_i32_only_use_exp(float %a) {
190193
; CHECK-NEXT: sub sp, #8
191194
; CHECK-NEXT: add r1, sp, #4
192195
; CHECK-NEXT: bl frexpf
196+
; CHECK-NEXT: @ fake_use: $r0
193197
; CHECK-NEXT: ldr r0, [sp, #4]
194198
; CHECK-NEXT: add sp, #8
195199
; CHECK-NEXT: pop {r7, pc}
@@ -265,6 +269,8 @@ define <2 x i32> @test_frexp_v2f32_v2i32_only_use_exp(<2 x float> %a) {
265269
; CHECK-NEXT: mov r1, r5
266270
; CHECK-NEXT: bl frexpf
267271
; CHECK-NEXT: vld1.32 {d16[0]}, [r4:32]
272+
; CHECK-NEXT: @ fake_use: $r0
273+
; CHECK-NEXT: @ fake_use: $r0
268274
; CHECK-NEXT: vld1.32 {d16[1]}, [r5:32]
269275
; CHECK-NEXT: vmov r0, r1, d16
270276
; CHECK-NEXT: add sp, #8
@@ -376,8 +382,13 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) {
376382
; CHECK-NEXT: mov r1, sp
377383
; CHECK-NEXT: mov r0, r4
378384
; CHECK-NEXT: bl frexpf
379-
; CHECK-NEXT: ldrd r1, r0, [sp, #8]
380-
; CHECK-NEXT: ldrd r3, r2, [sp], #16
385+
; CHECK-NEXT: @ fake_use: $r0
386+
; CHECK-NEXT: @ fake_use: $r0
387+
; CHECK-NEXT: ldr r0, [sp, #12]
388+
; CHECK-NEXT: @ fake_use: $r0
389+
; CHECK-NEXT: ldrd r2, r1, [sp, #4]
390+
; CHECK-NEXT: @ fake_use: $r0
391+
; CHECK-NEXT: ldr r3, [sp], #16
381392
; CHECK-NEXT: pop {r4, r5, r6, pc}
382393
%result = call { <4 x float>, <4 x i32> } @llvm.frexp.v4f32.v4i32(<4 x float> %a)
383394
%result.1 = extractvalue { <4 x float>, <4 x i32> } %result, 1
@@ -419,6 +430,7 @@ define i32 @test_frexp_f64_i32_only_use_exp(double %a) {
419430
; CHECK-NEXT: sub sp, #8
420431
; CHECK-NEXT: add r2, sp, #4
421432
; CHECK-NEXT: bl frexp
433+
; CHECK-NEXT: @ fake_use: $r0
422434
; CHECK-NEXT: ldr r0, [sp, #4]
423435
; CHECK-NEXT: add sp, #8
424436
; CHECK-NEXT: pop {r7, pc}
@@ -498,6 +510,8 @@ define <2 x i32> @test_frexp_v2f64_v2i32_only_use_exp(<2 x double> %a) {
498510
; CHECK-NEXT: mov r2, r7
499511
; CHECK-NEXT: bl frexp
500512
; CHECK-NEXT: vld1.32 {d16[0]}, [r6:32]
513+
; CHECK-NEXT: @ fake_use: $r0
514+
; CHECK-NEXT: @ fake_use: $r0
501515
; CHECK-NEXT: vld1.32 {d16[1]}, [r7:32]
502516
; CHECK-NEXT: vmov r0, r1, d16
503517
; CHECK-NEXT: add sp, #12

llvm/test/CodeGen/ARM/llvm.modf.ll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ define half @test_modf_f16_only_use_integral_part(half %a) {
7575
; CHECK-NEXT: bl __gnu_h2f_ieee
7676
; CHECK-NEXT: add r1, sp, #4
7777
; CHECK-NEXT: bl modff
78+
; CHECK-NEXT: @ fake_use: $r0
7879
; CHECK-NEXT: ldr r0, [sp, #4]
7980
; CHECK-NEXT: bl __gnu_f2h_ieee
8081
; CHECK-NEXT: add sp, #8
@@ -390,6 +391,7 @@ define double @test_modf_f64_only_use_intergral(double %a) {
390391
; CHECK-NEXT: sub sp, #8
391392
; CHECK-NEXT: mov r2, sp
392393
; CHECK-NEXT: bl modf
394+
; CHECK-NEXT: @ fake_use: $r0
393395
; CHECK-NEXT: ldrd r0, r1, [sp], #8
394396
; CHECK-NEXT: pop {r7, pc}
395397
;
@@ -462,6 +464,7 @@ define double @test_modf_f64_only_use_intergral_tail_call(double %a) {
462464
; CHECK-NEXT: sub sp, #8
463465
; CHECK-NEXT: mov r2, sp
464466
; CHECK-NEXT: bl modf
467+
; CHECK-NEXT: @ fake_use: $r0
465468
; CHECK-NEXT: ldrd r0, r1, [sp], #8
466469
; CHECK-NEXT: pop {r7, pc}
467470
;

llvm/test/CodeGen/PowerPC/llvm.frexp.ll

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ define i32 @test_frexp_f16_i32_only_use_exp(half %a) {
6868
; CHECK-NEXT: xscvhpdp f1, f0
6969
; CHECK-NEXT: bl frexpf
7070
; CHECK-NEXT: nop
71+
; CHECK-NEXT: # fake_use: $x3
7172
; CHECK-NEXT: lwz r3, 44(r1)
7273
; CHECK-NEXT: addi r1, r1, 48
7374
; CHECK-NEXT: ld r0, 16(r1)
@@ -207,6 +208,8 @@ define <2 x i32> @test_frexp_v2f16_v2i32_only_use_exp(<2 x half> %a) {
207208
; CHECK-NEXT: mr r4, r29
208209
; CHECK-NEXT: bl frexpf
209210
; CHECK-NEXT: nop
211+
; CHECK-NEXT: # fake_use: $x3
212+
; CHECK-NEXT: # fake_use: $x3
210213
; CHECK-NEXT: lfiwzx f0, 0, r30
211214
; CHECK-NEXT: lfiwzx f1, 0, r29
212215
; CHECK-NEXT: xxmrghw v2, vs1, vs0
@@ -273,6 +276,7 @@ define i32 @test_frexp_f32_i32_only_use_exp(float %a) {
273276
; CHECK-NEXT: addi r4, r1, 44
274277
; CHECK-NEXT: bl frexpf
275278
; CHECK-NEXT: nop
279+
; CHECK-NEXT: # fake_use: $x3
276280
; CHECK-NEXT: lwz r3, 44(r1)
277281
; CHECK-NEXT: addi r1, r1, 48
278282
; CHECK-NEXT: ld r0, 16(r1)
@@ -352,6 +356,7 @@ define i32 @test_frexp_f64_i32_only_use_exp(double %a) {
352356
; CHECK-NEXT: addi r4, r1, 44
353357
; CHECK-NEXT: bl frexp
354358
; CHECK-NEXT: nop
359+
; CHECK-NEXT: # fake_use: $x3
355360
; CHECK-NEXT: lwz r3, 44(r1)
356361
; CHECK-NEXT: addi r1, r1, 48
357362
; CHECK-NEXT: ld r0, 16(r1)

llvm/test/CodeGen/PowerPC/llvm.modf.ll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ define half @test_modf_f16_only_use_integral_part(half %a) {
6868
; CHECK-NEXT: xscvhpdp f1, f0
6969
; CHECK-NEXT: bl modff
7070
; CHECK-NEXT: nop
71+
; CHECK-NEXT: # fake_use: $x3
7172
; CHECK-NEXT: lfs f1, 44(r1)
7273
; CHECK-NEXT: addi r1, r1, 48
7374
; CHECK-NEXT: ld r0, 16(r1)
@@ -340,6 +341,7 @@ define ppc_fp128 @test_modf_ppcf128_only_use_intergral(ppc_fp128 %a) {
340341
; CHECK-NEXT: addi r5, r1, 32
341342
; CHECK-NEXT: bl modfl
342343
; CHECK-NEXT: nop
344+
; CHECK-NEXT: # fake_use: $x3
343345
; CHECK-NEXT: lfd f1, 32(r1)
344346
; CHECK-NEXT: lfd f2, 40(r1)
345347
; CHECK-NEXT: addi r1, r1, 48
@@ -403,6 +405,7 @@ define ppc_fp128 @test_modf_ppcf128_only_use_intergral_tail_call(ppc_fp128 %a) {
403405
; CHECK-NEXT: addi r5, r1, 32
404406
; CHECK-NEXT: bl modfl
405407
; CHECK-NEXT: nop
408+
; CHECK-NEXT: # fake_use: $x3
406409
; CHECK-NEXT: lfd f1, 32(r1)
407410
; CHECK-NEXT: lfd f2, 40(r1)
408411
; CHECK-NEXT: addi r1, r1, 48

0 commit comments

Comments
 (0)