AMDGPU: Fix fast math log2 f32

arsenm · tru · commit 1ba6abce1f55 · 2023-08-21T09:54:24.000+02:00
Apparently afn doesn't allow you to drop the denormal handling according to OpenCL conformance. This was hidden by losing the flags during the library linking process. Fast log is still broken and needs more work. https://reviews.llvm.org/D157936 (cherry picked from commit e09b359)
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
@@ -980,8 +980,7 @@ The AMDGPU backend implements the following LLVM IR intrinsics.
                                              half). Not implemented for double. Hardware provides
                                              1ULP accuracy for float, and 0.51ULP for half. Float
                                              instruction does not natively support denormal
-                                             inputs. Backend will optimize out denormal scaling if
-                                             marked with the :ref:`afn <fastmath_afn>` flag.
+                                             inputs.
 
   :ref:`llvm.sqrt <int_sqrt>`                Implemented for double, float and half (and vectors).
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2528,7 +2528,7 @@ SDValue AMDGPUTargetLowering::getIsFinite(SelectionDAG &DAG, SDValue Src,
 std::pair<SDValue, SDValue>
 AMDGPUTargetLowering::getScaledLogInput(SelectionDAG &DAG, const SDLoc SL,
                                         SDValue Src, SDNodeFlags Flags) const {
-  if (allowApproxFunc(DAG, Flags) || !needsDenormHandlingF32(DAG, Src, Flags))
+  if (!needsDenormHandlingF32(DAG, Src, Flags))
     return {};
 
   MVT VT = MVT::f32;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -3037,8 +3037,7 @@ static bool needsDenormHandlingF32(const MachineFunction &MF, Register Src,
 std::pair<Register, Register>
 AMDGPULegalizerInfo::getScaledLogInput(MachineIRBuilder &B, Register Src,
                                        unsigned Flags) const {
-  if (allowApproxFunc(B.getMF(), Flags) ||
-      !needsDenormHandlingF32(B.getMF(), Src, Flags))
+  if (!needsDenormHandlingF32(B.getMF(), Src, Flags))
     return {};
 
   const LLT F32 = LLT::scalar(32);
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll