|
| 1 | +From 1a4b58b91db2713e5bacf8a7f51c6e825cc2e828 Mon Sep 17 00:00:00 2001 |
| 2 | +From: Alexey Sachkov < [email protected]> |
| 3 | +Date: Fri, 7 Dec 2018 18:20:49 +0300 |
| 4 | +Subject: [PATCH] Fix disabling of fp-contract |
| 5 | + |
| 6 | +There were a test cases where contraction was enabled, but SPIR-V |
| 7 | +Translator set it to off. |
| 8 | + |
| 9 | +The root casue is in poor algorithm of 'disabled fp-contract' detection. |
| 10 | +This patch doesn't bring the good and reliable algorithm and only |
| 11 | +provides a workaround. |
| 12 | +--- |
| 13 | + lib/SPIRV/OCLUtil.cpp | 7 ++----- |
| 14 | + lib/SPIRV/SPIRVWriter.cpp | 9 ++++++++- |
| 15 | + lib/SPIRV/libSPIRV/SPIRVFunction.h | 17 +++++++++++++++++ |
| 16 | + test/ContractionOff.ll | 30 ++++++++++++++++++++++++------ |
| 17 | + 4 files changed, 51 insertions(+), 12 deletions(-) |
| 18 | + |
| 19 | +diff --git a/lib/SPIRV/OCLUtil.cpp b/lib/SPIRV/OCLUtil.cpp |
| 20 | +index e7d0312..eeba201 100644 |
| 21 | +--- a/lib/SPIRV/OCLUtil.cpp |
| 22 | ++++ b/lib/SPIRV/OCLUtil.cpp |
| 23 | +@@ -698,11 +698,8 @@ void checkFpContract(BinaryOperator *B, SPIRVBasicBlock *BB) { |
| 24 | + if (auto *I = dyn_cast<Instruction>(Op)) { |
| 25 | + if (I->getOpcode() == Instruction::FMul) { |
| 26 | + SPIRVFunction *BF = BB->getParent(); |
| 27 | +- if (BB->getModule()->isEntryPoint(ExecutionModelKernel, BF->getId())) { |
| 28 | +- BF->addExecutionMode(BB->getModule()->add( |
| 29 | +- new SPIRVExecutionMode(BF, spv::ExecutionModeContractionOff))); |
| 30 | +- break; |
| 31 | +- } |
| 32 | ++ BF->setUncontractedFMulAddFound(); |
| 33 | ++ break; |
| 34 | + } |
| 35 | + } |
| 36 | + } |
| 37 | +diff --git a/lib/SPIRV/SPIRVWriter.cpp b/lib/SPIRV/SPIRVWriter.cpp |
| 38 | +index 158ef4d..0677085 100644 |
| 39 | +--- a/lib/SPIRV/SPIRVWriter.cpp |
| 40 | ++++ b/lib/SPIRV/SPIRVWriter.cpp |
| 41 | +@@ -1115,6 +1115,7 @@ SPIRVValue *LLVMToSPIRV::transIntrinsicInst(IntrinsicInst *II, |
| 42 | + // For llvm.fmuladd.* fusion is not guaranteed. If a fused multiply-add |
| 43 | + // is required the corresponding llvm.fma.* intrinsic function should be |
| 44 | + // used instead. |
| 45 | ++ BB->getParent()->setContractedFMulAddFound(); |
| 46 | + SPIRVType *Ty = transType(II->getType()); |
| 47 | + SPIRVValue *Mul = |
| 48 | + BM->addBinaryInst(OpFMul, Ty, transValue(II->getArgOperand(0), BB), |
| 49 | +@@ -1320,7 +1321,7 @@ void LLVMToSPIRV::mutateFuncArgType( |
| 50 | + } |
| 51 | + |
| 52 | + void LLVMToSPIRV::transFunction(Function *I) { |
| 53 | +- transFunctionDecl(I); |
| 54 | ++ SPIRVFunction *BF = transFunctionDecl(I); |
| 55 | + // Creating all basic blocks before creating any instruction. |
| 56 | + for (auto &FI : *I) { |
| 57 | + transValue(&FI, nullptr); |
| 58 | +@@ -1332,6 +1333,12 @@ void LLVMToSPIRV::transFunction(Function *I) { |
| 59 | + transValue(&BI, BB, false); |
| 60 | + } |
| 61 | + } |
| 62 | ++ |
| 63 | ++ if (BF->getModule()->isEntryPoint(spv::ExecutionModelKernel, BF->getId()) && |
| 64 | ++ BF->shouldFPContractBeDisabled()) { |
| 65 | ++ BF->addExecutionMode(BF->getModule()->add( |
| 66 | ++ new SPIRVExecutionMode(BF, spv::ExecutionModeContractionOff))); |
| 67 | ++ } |
| 68 | + } |
| 69 | + |
| 70 | + bool LLVMToSPIRV::translate() { |
| 71 | +diff --git a/lib/SPIRV/libSPIRV/SPIRVFunction.h b/lib/SPIRV/libSPIRV/SPIRVFunction.h |
| 72 | +index d4af072..74429ff 100644 |
| 73 | +--- a/lib/SPIRV/libSPIRV/SPIRVFunction.h |
| 74 | ++++ b/lib/SPIRV/libSPIRV/SPIRVFunction.h |
| 75 | +@@ -135,6 +135,20 @@ public: |
| 76 | + assert(FuncType && "Invalid func type"); |
| 77 | + } |
| 78 | + |
| 79 | ++ bool shouldFPContractBeDisabled() const { |
| 80 | ++ // We could find some instructions in LLVM IR which look exactly like an |
| 81 | ++ // unfused fmuladd. So, we assume that FP_CONTRACT was disabled only if |
| 82 | ++ // there are no calls to @llvm.fmuladd.* |
| 83 | ++ return FoundUncontractedFMulAdd && !FoundContractedFMulAdd; |
| 84 | ++ } |
| 85 | ++ |
| 86 | ++ void setUncontractedFMulAddFound(bool Value = true) { |
| 87 | ++ FoundUncontractedFMulAdd = Value; |
| 88 | ++ } |
| 89 | ++ |
| 90 | ++ void setContractedFMulAddFound(bool Value = true) { |
| 91 | ++ FoundContractedFMulAdd = Value; |
| 92 | ++ } |
| 93 | + private: |
| 94 | + SPIRVFunctionParameter *addArgument(unsigned TheArgNo, SPIRVId TheId) { |
| 95 | + SPIRVFunctionParameter *Arg = new SPIRVFunctionParameter( |
| 96 | +@@ -156,6 +170,9 @@ private: |
| 97 | + std::vector<SPIRVFunctionParameter *> Parameters; |
| 98 | + typedef std::vector<SPIRVBasicBlock *> SPIRVLBasicBlockVector; |
| 99 | + SPIRVLBasicBlockVector BBVec; |
| 100 | ++ |
| 101 | ++ bool FoundUncontractedFMulAdd = false; |
| 102 | ++ bool FoundContractedFMulAdd = false; |
| 103 | + }; |
| 104 | + |
| 105 | + typedef SPIRVEntryOpCodeOnly<OpFunctionEnd> SPIRVFunctionEnd; |
| 106 | +diff --git a/test/ContractionOff.ll b/test/ContractionOff.ll |
| 107 | +index 716b750..692e88d 100644 |
| 108 | +--- a/test/ContractionOff.ll |
| 109 | ++++ b/test/ContractionOff.ll |
| 110 | +@@ -7,19 +7,28 @@ |
| 111 | + ; void kernel k2 (float a, float b, float c) { |
| 112 | + ; float d = a * b + c; |
| 113 | + ; } |
| 114 | ++; |
| 115 | ++; void kernel k3 (float a, float b, float c) { |
| 116 | ++; float d = a * b; // a * b together with -d in the next statement look |
| 117 | ++; float e = a * c - d; // exactly like an unfused fmuladd in LLVM IR |
| 118 | ++; } |
| 119 | ++; |
| 120 | ++; IR generated using the following commands: |
| 121 | ++; clang -cc1 -x cl -emit-llvm -O2 -disable-llvm-passes -triple spir64 1.cl -o 1.ll |
| 122 | ++; opt -mem2reg 1.ll -S -o 1.o.ll |
| 123 | + |
| 124 | + ; RUN: llvm-as < %s | llvm-spirv -spirv-text -o %t |
| 125 | + ; RUN: FileCheck < %t %s |
| 126 | + |
| 127 | + ; CHECK: EntryPoint 6 [[K1:[0-9]+]] "k1" |
| 128 | + ; CHECK: EntryPoint 6 [[K2:[0-9]+]] "k2" |
| 129 | ++; CHECK: EntryPoint 6 [[K3:[0-9]+]] "k3" |
| 130 | + ; CHECK: ExecutionMode [[K1]] 31 |
| 131 | + ; CHECK-NOT: ExecutionMode [[K2]] 31 |
| 132 | ++; CHECK-NOT: ExecutionMode [[K3]] 31 |
| 133 | + |
| 134 | +-;ModuleID = '<stdin>' |
| 135 | +-source_filename = "/tmp/tmp.cl" |
| 136 | +-target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" |
| 137 | +-target triple = "spir" |
| 138 | ++target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" |
| 139 | ++target triple = "spir64" |
| 140 | + |
| 141 | + ; Function Attrs: convergent nounwind |
| 142 | + define spir_kernel void @k1(float %a, float %b, float %c) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 { |
| 143 | +@@ -39,7 +48,16 @@ entry: |
| 144 | + ; Function Attrs: nounwind readnone speculatable |
| 145 | + declare float @llvm.fmuladd.f32(float, float, float) #2 |
| 146 | + |
| 147 | +-attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } |
| 148 | ++; Function Attrs: convergent nounwind |
| 149 | ++define spir_kernel void @k3(float %a, float %b, float %c) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 { |
| 150 | ++entry: |
| 151 | ++ %mul = fmul float %a, %b |
| 152 | ++ %neg = fsub float -0.000000e+00, %mul |
| 153 | ++ %0 = call float @llvm.fmuladd.f32(float %a, float %c, float %neg) |
| 154 | ++ ret void |
| 155 | ++} |
| 156 | ++ |
| 157 | ++attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } |
| 158 | + attributes #1 = { argmemonly nounwind } |
| 159 | + attributes #2 = { nounwind readnone speculatable } |
| 160 | + |
| 161 | +@@ -51,7 +69,7 @@ attributes #2 = { nounwind readnone speculatable } |
| 162 | + !0 = !{i32 1, !"wchar_size", i32 4} |
| 163 | + !1 = !{i32 1, i32 0} |
| 164 | + !2 = !{i32 1, i32 2} |
| 165 | +-!3 = !{!"clang version 8.0.0"} |
| 166 | ++!3 = !{!"clang version 7.0.1"} |
| 167 | + !4 = !{i32 0, i32 0, i32 0} |
| 168 | + !5 = !{!"none", !"none", !"none"} |
| 169 | + !6 = !{!"float", !"float", !"float"} |
| 170 | +-- |
| 171 | +2.7.4 |
| 172 | + |
0 commit comments