Skip to content

Commit 1fe1299

Browse files
committed
GlobalISel: Legalize strict_fsub
In the future should probably have a more convenient way to switch between building strict and non-strict ops.
1 parent ea1826e commit 1fe1299

File tree

7 files changed

+553
-245
lines changed

7 files changed

+553
-245
lines changed

llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1677,6 +1677,13 @@ class MachineIRBuilder {
16771677
return buildInstr(TargetOpcode::G_FADD, {Dst}, {Src0, Src1}, Flags);
16781678
}
16791679

1680+
/// Build and insert \p Res = G_STRICT_FADD \p Op0, \p Op1
1681+
MachineInstrBuilder buildStrictFAdd(const DstOp &Dst, const SrcOp &Src0,
1682+
const SrcOp &Src1,
1683+
Optional<unsigned> Flags = None) {
1684+
return buildInstr(TargetOpcode::G_STRICT_FADD, {Dst}, {Src0, Src1}, Flags);
1685+
}
1686+
16801687
/// Build and insert \p Res = G_FSUB \p Op0, \p Op1
16811688
MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0,
16821689
const SrcOp &Src1,

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3319,7 +3319,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
33193319
MI.eraseFromParent();
33203320
return Legalized;
33213321
}
3322-
case TargetOpcode::G_FSUB: {
3322+
case TargetOpcode::G_FSUB:
3323+
case TargetOpcode::G_STRICT_FSUB: {
33233324
Register Res = MI.getOperand(0).getReg();
33243325
LLT Ty = MRI.getType(Res);
33253326

@@ -3330,9 +3331,13 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
33303331
return UnableToLegalize;
33313332
Register LHS = MI.getOperand(1).getReg();
33323333
Register RHS = MI.getOperand(2).getReg();
3333-
Register Neg = MRI.createGenericVirtualRegister(Ty);
3334-
MIRBuilder.buildFNeg(Neg, RHS);
3335-
MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
3334+
auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
3335+
3336+
if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
3337+
MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
3338+
else
3339+
MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
3340+
33363341
MI.eraseFromParent();
33373342
return Legalized;
33383343
}
@@ -4219,6 +4224,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
42194224
case G_SADDE:
42204225
case G_SSUBE:
42214226
case G_STRICT_FADD:
4227+
case G_STRICT_FSUB:
42224228
case G_STRICT_FMUL:
42234229
case G_STRICT_FMA:
42244230
return fewerElementsVectorMultiEltType(GMI, NumElts);

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -795,7 +795,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
795795
.narrowScalarFor({{S64, S16}}, changeTo(0, S32))
796796
.scalarize(0);
797797

798-
auto &FSubActions = getActionDefinitionsBuilder(G_FSUB);
798+
auto &FSubActions = getActionDefinitionsBuilder({G_FSUB, G_STRICT_FSUB});
799799
if (ST.has16BitInsts()) {
800800
FSubActions
801801
// Use actual fsub instruction
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GCN %s
3+
4+
---
5+
name: test_strict_fsub_s64
6+
body: |
7+
bb.0:
8+
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
9+
10+
; GCN-LABEL: name: test_strict_fsub_s64
11+
; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
12+
; GCN-NEXT: {{ $}}
13+
; GCN-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
14+
; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
15+
; GCN-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]]
16+
; GCN-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s64) = G_STRICT_FADD [[COPY]], [[FNEG]]
17+
; GCN-NEXT: $vgpr0_vgpr1 = COPY [[STRICT_FADD]](s64)
18+
%0:_(s64) = COPY $vgpr0_vgpr1
19+
%1:_(s64) = COPY $vgpr2_vgpr3
20+
%2:_(s64) = G_STRICT_FSUB %0, %1
21+
$vgpr0_vgpr1 = COPY %2
22+
...
23+
24+
---
25+
name: test_strict_fsub_v2s16
26+
body: |
27+
bb.0.entry:
28+
liveins: $vgpr0, $vgpr1
29+
30+
; GCN-LABEL: name: test_strict_fsub_v2s16
31+
; GCN: liveins: $vgpr0, $vgpr1
32+
; GCN-NEXT: {{ $}}
33+
; GCN-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
34+
; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
35+
; GCN-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s16>) = G_FNEG [[COPY1]]
36+
; GCN-NEXT: [[STRICT_FADD:%[0-9]+]]:_(<2 x s16>) = G_STRICT_FADD [[COPY]], [[FNEG]]
37+
; GCN-NEXT: $vgpr0 = COPY [[STRICT_FADD]](<2 x s16>)
38+
%0:_(<2 x s16>) = COPY $vgpr0
39+
%1:_(<2 x s16>) = COPY $vgpr1
40+
%2:_(<2 x s16>) = G_STRICT_FSUB %0, %1
41+
$vgpr0 = COPY %2
42+
...

0 commit comments

Comments
 (0)