Add patch to fix fp-contract handling in SPIR-V Translator

AlexeySachkov · AlexeySotkin · commit af1524662dc1 · 2018-12-15T14:18:40.000+03:00
diff --git a/patches/spirv/0002-Fix-disabling-of-fp-contract.patch b/patches/spirv/0002-Fix-disabling-of-fp-contract.patch
@@ -0,0 +1,172 @@
+From 1a4b58b91db2713e5bacf8a7f51c6e825cc2e828 Mon Sep 17 00:00:00 2001
+From: Alexey Sachkov <alexey.sachkov@intel.com>
+Date: Fri, 7 Dec 2018 18:20:49 +0300
+Subject: [PATCH] Fix disabling of fp-contract
+
+There were a test cases where contraction was enabled, but SPIR-V
+Translator set it to off.
+
+The root casue is in poor algorithm of 'disabled fp-contract' detection.
+This patch doesn't bring the good and reliable algorithm and only
+provides a workaround.
+---
+ lib/SPIRV/OCLUtil.cpp              |  7 ++-----
+ lib/SPIRV/SPIRVWriter.cpp          |  9 ++++++++-
+ lib/SPIRV/libSPIRV/SPIRVFunction.h | 17 +++++++++++++++++
+ test/ContractionOff.ll             | 30 ++++++++++++++++++++++++------
+ 4 files changed, 51 insertions(+), 12 deletions(-)
+
+diff --git a/lib/SPIRV/OCLUtil.cpp b/lib/SPIRV/OCLUtil.cpp
+index e7d0312..eeba201 100644
+--- a/lib/SPIRV/OCLUtil.cpp
++++ b/lib/SPIRV/OCLUtil.cpp
+@@ -698,11 +698,8 @@ void checkFpContract(BinaryOperator *B, SPIRVBasicBlock *BB) {
+     if (auto *I = dyn_cast<Instruction>(Op)) {
+       if (I->getOpcode() == Instruction::FMul) {
+         SPIRVFunction *BF = BB->getParent();
+-        if (BB->getModule()->isEntryPoint(ExecutionModelKernel, BF->getId())) {
+-          BF->addExecutionMode(BB->getModule()->add(
+-              new SPIRVExecutionMode(BF, spv::ExecutionModeContractionOff)));
+-          break;
+-        }
++        BF->setUncontractedFMulAddFound();
++        break;
+       }
+     }
+   }
+diff --git a/lib/SPIRV/SPIRVWriter.cpp b/lib/SPIRV/SPIRVWriter.cpp
+index 158ef4d..0677085 100644
+--- a/lib/SPIRV/SPIRVWriter.cpp
++++ b/lib/SPIRV/SPIRVWriter.cpp
+@@ -1115,6 +1115,7 @@ SPIRVValue *LLVMToSPIRV::transIntrinsicInst(IntrinsicInst *II,
+     // For llvm.fmuladd.* fusion is not guaranteed. If a fused multiply-add
+     // is required the corresponding llvm.fma.* intrinsic function should be
+     // used instead.
++    BB->getParent()->setContractedFMulAddFound();
+     SPIRVType *Ty = transType(II->getType());
+     SPIRVValue *Mul =
+         BM->addBinaryInst(OpFMul, Ty, transValue(II->getArgOperand(0), BB),
+@@ -1320,7 +1321,7 @@ void LLVMToSPIRV::mutateFuncArgType(
+ }
+ 
+ void LLVMToSPIRV::transFunction(Function *I) {
+-  transFunctionDecl(I);
++  SPIRVFunction *BF = transFunctionDecl(I);
+   // Creating all basic blocks before creating any instruction.
+   for (auto &FI : *I) {
+     transValue(&FI, nullptr);
+@@ -1332,6 +1333,12 @@ void LLVMToSPIRV::transFunction(Function *I) {
+       transValue(&BI, BB, false);
+     }
+   }
++
++  if (BF->getModule()->isEntryPoint(spv::ExecutionModelKernel, BF->getId()) &&
++      BF->shouldFPContractBeDisabled()) {
++    BF->addExecutionMode(BF->getModule()->add(
++        new SPIRVExecutionMode(BF, spv::ExecutionModeContractionOff)));
++  }
+ }
+ 
+ bool LLVMToSPIRV::translate() {
+diff --git a/lib/SPIRV/libSPIRV/SPIRVFunction.h b/lib/SPIRV/libSPIRV/SPIRVFunction.h
+index d4af072..74429ff 100644
+--- a/lib/SPIRV/libSPIRV/SPIRVFunction.h
++++ b/lib/SPIRV/libSPIRV/SPIRVFunction.h
+@@ -135,6 +135,20 @@ public:
+     assert(FuncType && "Invalid func type");
+   }
+ 
++  bool shouldFPContractBeDisabled() const {
++    // We could find some instructions in LLVM IR which look exactly like an
++    // unfused fmuladd. So, we assume that FP_CONTRACT was disabled only if
++    // there are no calls to @llvm.fmuladd.*
++    return FoundUncontractedFMulAdd && !FoundContractedFMulAdd;
++  }
++
++  void setUncontractedFMulAddFound(bool Value = true) {
++    FoundUncontractedFMulAdd = Value;
++  }
++
++  void setContractedFMulAddFound(bool Value = true) {
++    FoundContractedFMulAdd = Value;
++  }
+ private:
+   SPIRVFunctionParameter *addArgument(unsigned TheArgNo, SPIRVId TheId) {
+     SPIRVFunctionParameter *Arg = new SPIRVFunctionParameter(
+@@ -156,6 +170,9 @@ private:
+   std::vector<SPIRVFunctionParameter *> Parameters;
+   typedef std::vector<SPIRVBasicBlock *> SPIRVLBasicBlockVector;
+   SPIRVLBasicBlockVector BBVec;
++
++  bool FoundUncontractedFMulAdd = false;
++  bool FoundContractedFMulAdd = false;
+ };
+ 
+ typedef SPIRVEntryOpCodeOnly<OpFunctionEnd> SPIRVFunctionEnd;
+diff --git a/test/ContractionOff.ll b/test/ContractionOff.ll
+index 716b750..692e88d 100644
+--- a/test/ContractionOff.ll
++++ b/test/ContractionOff.ll
+@@ -7,19 +7,28 @@
+ ; void kernel k2 (float a, float b, float c) {
+ ;   float d = a * b + c;
+ ; }
++;
++; void kernel k3 (float a, float b, float c) {
++;   float d = a * b; // a * b together with -d in the next statement look
++;   float e = a * c - d; // exactly like an unfused fmuladd in LLVM IR
++; }
++;
++; IR generated using the following commands:
++; clang -cc1 -x cl -emit-llvm -O2 -disable-llvm-passes -triple spir64 1.cl -o 1.ll
++; opt -mem2reg 1.ll -S -o 1.o.ll
+ 
+ ; RUN: llvm-as < %s | llvm-spirv -spirv-text -o %t
+ ; RUN: FileCheck < %t %s
+ 
+ ; CHECK: EntryPoint 6 [[K1:[0-9]+]] "k1"
+ ; CHECK: EntryPoint 6 [[K2:[0-9]+]] "k2"
++; CHECK: EntryPoint 6 [[K3:[0-9]+]] "k3"
+ ; CHECK: ExecutionMode [[K1]] 31
+ ; CHECK-NOT: ExecutionMode [[K2]] 31
++; CHECK-NOT: ExecutionMode [[K3]] 31
+ 
+-;ModuleID = '<stdin>'
+-source_filename = "/tmp/tmp.cl"
+-target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
+-target triple = "spir"
++target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
++target triple = "spir64"
+ 
+ ; Function Attrs: convergent nounwind
+ define spir_kernel void @k1(float %a, float %b, float %c) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
+@@ -39,7 +48,16 @@ entry:
+ ; Function Attrs: nounwind readnone speculatable
+ declare float @llvm.fmuladd.f32(float, float, float) #2
+ 
+-attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
++; Function Attrs: convergent nounwind
++define spir_kernel void @k3(float %a, float %b, float %c) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
++entry:
++  %mul = fmul float %a, %b
++  %neg = fsub float -0.000000e+00, %mul
++  %0 = call float @llvm.fmuladd.f32(float %a, float %c, float %neg)
++  ret void
++}
++
++attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #1 = { argmemonly nounwind }
+ attributes #2 = { nounwind readnone speculatable }
+ 
+@@ -51,7 +69,7 @@ attributes #2 = { nounwind readnone speculatable }
+ !0 = !{i32 1, !"wchar_size", i32 4}
+ !1 = !{i32 1, i32 0}
+ !2 = !{i32 1, i32 2}
+-!3 = !{!"clang version 8.0.0"}
++!3 = !{!"clang version 7.0.1"}
+ !4 = !{i32 0, i32 0, i32 0}
+ !5 = !{!"none", !"none", !"none"}
+ !6 = !{!"float", !"float", !"float"}
+-- 
+2.7.4
+