1- From 794c7d32e521060190bbfca7c52f0bb4fa49f2f2 Mon Sep 17 00:00:00 2001
1+ From 89e3be862e9a389ba858faa1270f0f8856b37bcf Mon Sep 17 00:00:00 2001
22From: haonanya <
[email protected] >
33Date: Mon, 19 Jul 2021 10:14:20 +0800
44Subject: [PATCH] Add support for cl_ext_float_atomics in SPIRVWriter
55
66Signed-off-by: haonanya <
[email protected] >
7- Signed-off-by: Haonan Yang <
[email protected] >
87---
98 lib/SPIRV/OCL20ToSPIRV.cpp | 26 +++++-
10- lib/SPIRV/OCLUtil.cpp | 19 + ++--
9+ lib/SPIRV/OCLUtil.cpp | 15 ++--
1110 lib/SPIRV/SPIRVToOCL.h | 3 +
1211 lib/SPIRV/SPIRVToOCL12.cpp | 21 +++++
1312 lib/SPIRV/SPIRVToOCL20.cpp | 28 ++++++-
1413 lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h | 1 -
1514 lib/SPIRV/libSPIRV/SPIRVOpCode.h | 8 +-
16- test/AtomicBuiltinsFloat.ll | 79 ++++++++++++++++++
15+ test/AtomicBuiltinsFloat.ll | 94 +++ ++++++++++++++++++
1716 test/AtomicFAddEXTForOCL.ll | 88 ++++++++++++++++++++
1817 test/AtomicFAddExt.ll | 111 ++++++++-----------------
1918 test/AtomicFMaxEXT.ll | 113 +++++++-------------------
2019 test/AtomicFMaxEXTForOCL.ll | 88 ++++++++++++++++++++
2120 test/AtomicFMinEXT.ll | 113 +++++++-------------------
2221 test/AtomicFMinEXTForOCL.ll | 85 +++++++++++++++++++
2322 test/InvalidAtomicBuiltins.cl | 16 ----
24- 15 files changed, 526 insertions(+), 273 deletions(-)
23+ 15 files changed, 539 insertions(+), 271 deletions(-)
2524 create mode 100644 test/AtomicBuiltinsFloat.ll
2625 create mode 100644 test/AtomicFAddEXTForOCL.ll
2726 create mode 100644 test/AtomicFMaxEXTForOCL.ll
2827 create mode 100644 test/AtomicFMinEXTForOCL.ll
2928
3029diff --git a/lib/SPIRV/OCL20ToSPIRV.cpp b/lib/SPIRV/OCL20ToSPIRV.cpp
31- index e30aa5be..98d4289e 100644
30+ index e30aa5be..79323de2 100644
3231--- a/lib/SPIRV/OCL20ToSPIRV.cpp
3332+++ b/lib/SPIRV/OCL20ToSPIRV.cpp
3433@@ -407,7 +407,6 @@ void OCL20ToSPIRV::visitCallInst(CallInst &CI) {
@@ -53,7 +52,7 @@ index e30aa5be..98d4289e 100644
5352 Args.end() - Offset);
5453 }
5554- return getSPIRVFuncName(OCLSPIRVBuiltinMap::map(Info.UniqName));
56- + llvm::Type* AtomicBuiltinsReturnType =
55+ + llvm::Type* AtomicBuiltinsReturnType =
5756+ CI->getCalledFunction()->getReturnType();
5857+ auto IsFPType = [](llvm::Type *ReturnType) {
5958+ return ReturnType->isHalfTy() || ReturnType->isFloatTy() ||
@@ -79,30 +78,28 @@ index e30aa5be..98d4289e 100644
7978 &Attrs);
8079 }
8180diff --git a/lib/SPIRV/OCLUtil.cpp b/lib/SPIRV/OCLUtil.cpp
82- index 992f173f..539c196c 100644
81+ index 992f173f..8ae882c6 100644
8382--- a/lib/SPIRV/OCLUtil.cpp
8483+++ b/lib/SPIRV/OCLUtil.cpp
8584@@ -120,29 +120,32 @@ size_t getSPIRVAtomicBuiltinNumMemoryOrderArgs(Op OC) {
8685 return 1;
8786 }
8887
89- + // atomic_fetch_[add, sub, min, max] and atomic_fetch_[add, sub, min,
90- + // max]_explicit functions are defined on OpenCL headers, they are not
91- + // translated to function call
88+ + // atomic_fetch_[add, min, max] and atomic_fetch_[add, min, max]_explicit
89+ + // functions declared in clang headers should be translated to corresponding
90+ + // FP-typed Atomic Instructions
9291 bool isComputeAtomicOCLBuiltin(StringRef DemangledName) {
9392 if (!DemangledName.startswith(kOCLBuiltinName::AtomicPrefix) &&
9493 !DemangledName.startswith(kOCLBuiltinName::AtomPrefix))
9594 return false;
9695
9796 return llvm::StringSwitch<bool>(DemangledName)
9897- .EndsWith("add", true)
99- - .EndsWith("sub", true)
98+ .EndsWith("sub", true)
10099+ .EndsWith("atomic_add", true)
101- + .EndsWith("atomic_sub", true)
102100+ .EndsWith("atomic_min", true)
103101+ .EndsWith("atomic_max", true)
104102+ .EndsWith("atom_add", true)
105- + .EndsWith("atom_sub", true)
106103+ .EndsWith("atom_min", true)
107104+ .EndsWith("atom_max", true)
108105 .EndsWith("inc", true)
@@ -114,7 +111,7 @@ index 992f173f..539c196c 100644
114111 .EndsWith("or", true)
115112 .EndsWith("xor", true)
116113- .EndsWith("add_explicit", true)
117- - .EndsWith("sub_explicit", true)
114+ .EndsWith("sub_explicit", true)
118115 .EndsWith("or_explicit", true)
119116 .EndsWith("xor_explicit", true)
120117 .EndsWith("and_explicit", true)
@@ -184,7 +181,7 @@ index 1a62c6b8..dc0ba9cc 100644
184181 }
185182
186183diff --git a/lib/SPIRV/SPIRVToOCL20.cpp b/lib/SPIRV/SPIRVToOCL20.cpp
187- index 8c437858..0033b853 100644
184+ index 8c437858..07612ce6 100644
188185--- a/lib/SPIRV/SPIRVToOCL20.cpp
189186+++ b/lib/SPIRV/SPIRVToOCL20.cpp
190187@@ -82,6 +82,9 @@ public:
@@ -232,8 +229,8 @@ index 8c437858..0033b853 100644
232229 }
233230 auto Ptr = findFirstPtr(Args);
234231- auto Name = OCLSPIRVBuiltinMap::rmap(OC);
235- + std::string Name;
236- + // Map fp atomic instructions to regular OpenCL built-ins.
232+ + std::string Name;
233+ + // Map fp atomic instructions to regular OpenCL built-ins.
237234+ if (isFPAtomicOpCode(OC))
238235+ Name = mapFPAtomicName(OC);
239236+ else
@@ -278,11 +275,13 @@ index feec70f6..8e595e83 100644
278275 return ((unsigned)OpCode >= OpIAdd && (unsigned)OpCode <= OpFMod) ||
279276diff --git a/test/AtomicBuiltinsFloat.ll b/test/AtomicBuiltinsFloat.ll
280277new file mode 100644
281- index 00000000..18ee1c86
278+ index 00000000..b688cb2a
282279--- /dev/null
283280+++ b/test/AtomicBuiltinsFloat.ll
284- @@ -0,0 +1,79 @@
281+ @@ -0,0 +1,94 @@
285282+ ; Check that translator generate atomic instructions for atomic builtins
283+ + ; FP-typed atomic_fetch_sub and atomic_fetch_sub_explicit should be translated
284+ + ; to FunctionCall
286285+ ; RUN: llvm-as %s -o %t.bc
287286+ ; RUN: llvm-spirv %t.bc -spirv-text -o - | FileCheck %s
288287+ ; RUN: llvm-spirv %t.bc -o %t.spv
@@ -293,6 +292,7 @@ index 00000000..18ee1c86
293292+ ; CHECK-COUNT-3: AtomicStore
294293+ ; CHECK-COUNT-3: AtomicLoad
295294+ ; CHECK-COUNT-3: AtomicExchange
295+ + ; CHECK-COUNT-3: FunctionCall
296296+
297297+ target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
298298+ target triple = "spir-unknown-unknown"
@@ -311,6 +311,9 @@ index 00000000..18ee1c86
311311+ %call3 = tail call spir_func float @_Z15atomic_exchangePU3AS4VU7_Atomicff(float addrspace(4)* %0, float 1.000000e+00) #2
312312+ %call4 = tail call spir_func float @_Z24atomic_exchange_explicitPU3AS4VU7_Atomicff12memory_order(float addrspace(4)* %0, float 1.000000e+00, i32 0) #2
313313+ %call5 = tail call spir_func float @_Z24atomic_exchange_explicitPU3AS4VU7_Atomicff12memory_order12memory_scope(float addrspace(4)* %0, float 1.000000e+00, i32 0, i32 1) #2
314+ + %call6 = tail call spir_func float @_Z16atomic_fetch_subPU3AS3VU7_Atomicff(float addrspace(3)* %ff, float 1.000000e+00) #2
315+ + %call7 = tail call spir_func float @_Z25atomic_fetch_sub_explicitPU3AS3VU7_Atomicff12memory_order(float addrspace(3)* %ff, float 1.000000e+00, i32 0) #2
316+ + %call8 = tail call spir_func float @_Z25atomic_fetch_sub_explicitPU3AS3VU7_Atomicff12memory_order12memory_scope(float addrspace(3)* %ff, float 1.000000e+00, i32 0, i32 1) #2
314317+ ret void
315318+ }
316319+
@@ -344,6 +347,15 @@ index 00000000..18ee1c86
344347+ ; Function Attrs: convergent
345348+ declare spir_func float @_Z24atomic_exchange_explicitPU3AS4VU7_Atomicff12memory_order12memory_scope(float addrspace(4)*, float, i32, i32) local_unnamed_addr #1
346349+
350+ + ; Function Attrs: convergent
351+ + declare spir_func float @_Z16atomic_fetch_subPU3AS3VU7_Atomicff(float addrspace(3)*, float) local_unnamed_addr #1
352+ +
353+ + ; Function Attrs: convergent
354+ + declare spir_func float @_Z25atomic_fetch_sub_explicitPU3AS3VU7_Atomicff12memory_order(float addrspace(3)*, float, i32) local_unnamed_addr #1
355+ +
356+ + ; Function Attrs: convergent
357+ + declare spir_func float @_Z25atomic_fetch_sub_explicitPU3AS3VU7_Atomicff12memory_order12memory_scope(float addrspace(3)*, float, i32, i32) local_unnamed_addr #1
358+ +
347359+ attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
348360+ attributes #1 = { convergent "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
349361+ attributes #2 = { convergent nounwind }
@@ -355,7 +367,7 @@ index 00000000..18ee1c86
355367+
356368+ !0 = !{i32 1, !"wchar_size", i32 4}
357369+ !1 = !{i32 2, i32 0}
358- + !2 = !{!"clang version 10.0.1 (8560093eba963fba2edd47ca85404cdaff22f174 )"}
370+ + !2 = !{!"clang version 10.0.1 (0d3ce3267dd78b77ab8f302347cc547afaf43ede )"}
359371+ !3 = !{i32 3}
360372+ !4 = !{!"none"}
361373+ !5 = !{!"atomic_float*"}
0 commit comments