Skip to content

Commit 22c952f

Browse files
committed
Remove UnsafeFPMath in AMDGPULegalizerInfo.cpp
1 parent 1ca40ff commit 22c952f

File tree

3 files changed

+979
-297
lines changed

3 files changed

+979
-297
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3344,7 +3344,7 @@ static bool allowApproxFunc(const MachineFunction &MF, unsigned Flags) {
33443344
if (Flags & MachineInstr::FmAfn)
33453345
return true;
33463346
const auto &Options = MF.getTarget().Options;
3347-
return Options.UnsafeFPMath || Options.ApproxFuncFPMath;
3347+
return Options.ApproxFuncFPMath;
33483348
}
33493349

33503350
static bool needsDenormHandlingF32(const MachineFunction &MF, Register Src,
@@ -3450,7 +3450,7 @@ bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI,
34503450
static_cast<const AMDGPUTargetMachine &>(MF.getTarget());
34513451

34523452
if (Ty == F16 || MI.getFlag(MachineInstr::FmAfn) ||
3453-
TM.Options.ApproxFuncFPMath || TM.Options.UnsafeFPMath) {
3453+
TM.Options.ApproxFuncFPMath) {
34543454
if (Ty == F16 && !ST.has16BitInsts()) {
34553455
Register LogVal = MRI.createGenericVirtualRegister(F32);
34563456
auto PromoteSrc = B.buildFPExt(F32, X);
@@ -4877,9 +4877,7 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
48774877
uint16_t Flags = MI.getFlags();
48784878
LLT ResTy = MRI.getType(Res);
48794879

4880-
const MachineFunction &MF = B.getMF();
4881-
bool AllowInaccurateRcp = MI.getFlag(MachineInstr::FmAfn) ||
4882-
MF.getTarget().Options.UnsafeFPMath;
4880+
bool AllowInaccurateRcp = MI.getFlag(MachineInstr::FmAfn);
48834881

48844882
if (const auto *CLHS = getConstantFPVRegVal(LHS, MRI)) {
48854883
if (!AllowInaccurateRcp && ResTy != LLT::scalar(16))
@@ -4939,9 +4937,7 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV64(MachineInstr &MI,
49394937
uint16_t Flags = MI.getFlags();
49404938
LLT ResTy = MRI.getType(Res);
49414939

4942-
const MachineFunction &MF = B.getMF();
4943-
bool AllowInaccurateRcp = MF.getTarget().Options.UnsafeFPMath ||
4944-
MI.getFlag(MachineInstr::FmAfn);
4940+
bool AllowInaccurateRcp = MI.getFlag(MachineInstr::FmAfn);
49454941

49464942
if (!AllowInaccurateRcp)
49474943
return false;

llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ define amdgpu_kernel void @unsafe_frem_f16(ptr addrspace(1) %out, ptr addrspace(
157157
%gep2 = getelementptr half, ptr addrspace(1) %in2, i32 4
158158
%r0 = load half, ptr addrspace(1) %in1, align 4
159159
%r1 = load half, ptr addrspace(1) %gep2, align 4
160-
%r2 = frem half %r0, %r1
160+
%r2 = frem afn half %r0, %r1
161161
store half %r2, ptr addrspace(1) %out, align 4
162162
ret void
163163
}
@@ -311,7 +311,7 @@ define amdgpu_kernel void @unsafe_frem_f32(ptr addrspace(1) %out, ptr addrspace(
311311
%gep2 = getelementptr float, ptr addrspace(1) %in2, i32 4
312312
%r0 = load float, ptr addrspace(1) %in1, align 4
313313
%r1 = load float, ptr addrspace(1) %gep2, align 4
314-
%r2 = frem float %r0, %r1
314+
%r2 = frem afn float %r0, %r1
315315
store float %r2, ptr addrspace(1) %out, align 4
316316
ret void
317317
}
@@ -489,7 +489,7 @@ define amdgpu_kernel void @unsafe_frem_f64(ptr addrspace(1) %out, ptr addrspace(
489489
ptr addrspace(1) %in2) #1 {
490490
%r0 = load double, ptr addrspace(1) %in1, align 8
491491
%r1 = load double, ptr addrspace(1) %in2, align 8
492-
%r2 = frem double %r0, %r1
492+
%r2 = frem afn double %r0, %r1
493493
store double %r2, ptr addrspace(1) %out, align 8
494494
ret void
495495
}
@@ -1140,5 +1140,5 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
11401140
ret void
11411141
}
11421142

1143-
attributes #0 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
1144-
attributes #1 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
1143+
attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
1144+
attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }

0 commit comments

Comments
 (0)