Skip to content

Commit d79ea0a

Browse files
committed
[OpenCL] Fix bugs on cl_ext_float_atomics patch
1. Update patch because of OpenCL-Docs changes. 2. Fix bugs about incorrect atomic function translation on SPIRV. Signed-off-by: haonanya <[email protected]> Signed-off-by: Haonan Yang <[email protected]>
1 parent 4fd9176 commit d79ea0a

File tree

2 files changed

+63
-100
lines changed

2 files changed

+63
-100
lines changed

patches/clang/0005-OpenCL-support-cl_ext_float_atomics.patch

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,25 @@
1-
From baa0fc843cd55f9da25afbc576c5ae56c0b20536 Mon Sep 17 00:00:00 2001
1+
From 9b48f70bae77fdc752ee5e98949a7ed2c9373037 Mon Sep 17 00:00:00 2001
22
From: haonanya <[email protected]>
33
Date: Fri, 13 Aug 2021 10:00:02 +0800
44
Subject: [PATCH] [OpenCL] support cl_ext_float_atomics
55

66
Signed-off-by: haonanya <[email protected]>
7+
Signed-off-by: Haonan Yang <[email protected]>
78
---
8-
clang/lib/Headers/opencl-c-base.h | 25 ++++
9+
clang/lib/Headers/opencl-c-base.h | 22 +++
910
clang/lib/Headers/opencl-c.h | 208 ++++++++++++++++++++++++++
1011
clang/test/Headers/opencl-c-header.cl | 96 ++++++++++++
11-
3 files changed, 329 insertions(+)
12+
3 files changed, 326 insertions(+)
1213

1314
diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h
14-
index 2cc688ccc3da..86bbee12fdf8 100644
15+
index 2cc688ccc3da..18d367de68ec 100644
1516
--- a/clang/lib/Headers/opencl-c-base.h
1617
+++ b/clang/lib/Headers/opencl-c-base.h
17-
@@ -14,6 +14,31 @@
18+
@@ -14,6 +14,28 @@
1819
#define CL_VERSION_3_0 300
1920
#endif
2021

2122
+#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
22-
+// For SPIR all extensions are supported.
23-
+#if defined(__SPIR__)
2423
+#define cl_ext_float_atomics 1
2524
+#ifdef cl_khr_fp16
2625
+#define __opencl_c_ext_fp16_global_atomic_load_store 1
@@ -30,7 +29,7 @@ index 2cc688ccc3da..86bbee12fdf8 100644
3029
+#define __opencl_c_ext_fp16_global_atomic_min_max 1
3130
+#define __opencl_c_ext_fp16_local_atomic_min_max 1
3231
+#endif
33-
+#ifdef __opencl_c_fp64
32+
+#ifdef cl_khr_fp64
3433
+#define __opencl_c_ext_fp64_global_atomic_add 1
3534
+#define __opencl_c_ext_fp64_local_atomic_add 1
3635
+#define __opencl_c_ext_fp64_global_atomic_min_max 1
@@ -40,14 +39,13 @@ index 2cc688ccc3da..86bbee12fdf8 100644
4039
+#define __opencl_c_ext_fp32_local_atomic_add 1
4140
+#define __opencl_c_ext_fp32_global_atomic_min_max 1
4241
+#define __opencl_c_ext_fp32_local_atomic_min_max 1
43-
+#endif // defined(__SPIR__)
4442
+#endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
4543
+
4644
// Define features for 2.0 for header backward compatibility
4745
#ifndef __opencl_c_int64
4846
#define __opencl_c_int64 1
4947
diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
50-
index d8173f0aa843..90944fe2d7e6 100644
48+
index d8173f0aa843..50515ac17a0c 100644
5149
--- a/clang/lib/Headers/opencl-c.h
5250
+++ b/clang/lib/Headers/opencl-c.h
5351
@@ -14354,6 +14354,214 @@ intptr_t __ovld atomic_fetch_max_explicit(
@@ -90,7 +88,7 @@ index d8173f0aa843..90944fe2d7e6 100644
9088
+ memory_scope scope);
9189
+#endif // defined(__opencl_c_ext_fp32_local_atomic_min_max)
9290
+
93-
+#if defined(__opencl_c_ext_fp32_global_atomic_min_max) || \
91+
+#if defined(__opencl_c_ext_fp32_global_atomic_min_max) && \
9492
+ defined(__opencl_c_ext_fp32_local_atomic_min_max)
9593
+float __ovld atomic_fetch_min(volatile atomic_float *object, float operand);
9694
+float __ovld atomic_fetch_max(volatile atomic_float *object, float operand);
@@ -104,7 +102,7 @@ index d8173f0aa843..90944fe2d7e6 100644
104102
+float __ovld atomic_fetch_max_explicit(volatile atomic_float *object,
105103
+ float operand, memory_order order,
106104
+ memory_scope scope);
107-
+#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max) || \
105+
+#endif // defined(__opencl_c_ext_fp32_global_atomic_min_max) && \
108106
+ defined(__opencl_c_ext_fp32_local_atomic_min_max)
109107
+
110108
+#if defined(__opencl_c_ext_fp64_global_atomic_min_max)
@@ -141,7 +139,7 @@ index d8173f0aa843..90944fe2d7e6 100644
141139
+ memory_scope scope);
142140
+#endif // defined(__opencl_c_ext_fp64_local_atomic_min_max)
143141
+
144-
+#if defined(__opencl_c_ext_fp64_global_atomic_min_max) || \
142+
+#if defined(__opencl_c_ext_fp64_global_atomic_min_max) && \
145143
+ defined(__opencl_c_ext_fp64_local_atomic_min_max)
146144
+double __ovld atomic_fetch_min(volatile atomic_double *object, double operand);
147145
+double __ovld atomic_fetch_max(volatile atomic_double *object, double operand);
@@ -155,7 +153,7 @@ index d8173f0aa843..90944fe2d7e6 100644
155153
+double __ovld atomic_fetch_max_explicit(volatile atomic_double *object,
156154
+ double operand, memory_order order,
157155
+ memory_scope scope);
158-
+#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) || \
156+
+#endif // defined(__opencl_c_ext_fp64_global_atomic_min_max) && \
159157
+ defined(__opencl_c_ext_fp64_local_atomic_min_max)
160158
+
161159
+#if defined(__opencl_c_ext_fp32_global_atomic_add)
@@ -192,7 +190,7 @@ index d8173f0aa843..90944fe2d7e6 100644
192190
+ memory_scope scope);
193191
+#endif // defined(__opencl_c_ext_fp32_local_atomic_add)
194192
+
195-
+#if defined(__opencl_c_ext_fp32_global_atomic_add) || \
193+
+#if defined(__opencl_c_ext_fp32_global_atomic_add) && \
196194
+ defined(__opencl_c_ext_fp32_local_atomic_add)
197195
+float __ovld atomic_fetch_add(volatile atomic_float *object, float operand);
198196
+float __ovld atomic_fetch_sub(volatile atomic_float *object, float operand);
@@ -206,7 +204,7 @@ index d8173f0aa843..90944fe2d7e6 100644
206204
+float __ovld atomic_fetch_sub_explicit(volatile atomic_float *object,
207205
+ float operand, memory_order order,
208206
+ memory_scope scope);
209-
+#endif // defined(__opencl_c_ext_fp32_global_atomic_add) || \
207+
+#endif // defined(__opencl_c_ext_fp32_global_atomic_add) && \
210208
+ defined(__opencl_c_ext_fp32_local_atomic_add)
211209
+
212210
+#if defined(__opencl_c_ext_fp64_global_atomic_add)
@@ -243,7 +241,7 @@ index d8173f0aa843..90944fe2d7e6 100644
243241
+ memory_scope scope);
244242
+#endif // defined(__opencl_c_ext_fp64_local_atomic_add)
245243
+
246-
+#if defined(__opencl_c_ext_fp64_global_atomic_add) || \
244+
+#if defined(__opencl_c_ext_fp64_global_atomic_add) && \
247245
+ defined(__opencl_c_ext_fp64_local_atomic_add)
248246
+double __ovld atomic_fetch_add(volatile atomic_double *object, double operand);
249247
+double __ovld atomic_fetch_sub(volatile atomic_double *object, double operand);
@@ -257,7 +255,7 @@ index d8173f0aa843..90944fe2d7e6 100644
257255
+double __ovld atomic_fetch_sub_explicit(volatile atomic_double *object,
258256
+ double operand, memory_order order,
259257
+ memory_scope scope);
260-
+#endif // defined(__opencl_c_ext_fp64_global_atomic_add) || \
258+
+#endif // defined(__opencl_c_ext_fp64_global_atomic_add) && \
261259
+ defined(__opencl_c_ext_fp64_local_atomic_add)
262260
+
263261
+#endif // cl_ext_float_atomics
@@ -370,5 +368,5 @@ index 2716076acdcf..7f720cf28142 100644
370368
+
371369
+#endif // defined(__SPIR__)
372370
--
373-
2.17.1
371+
2.18.1
374372

patches/spirv/0001-Add-support-for-cl_ext_float_atomics-in-SPIRVWriter.patch

Lines changed: 46 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
From 3f41fcc74ab5f8a153bd04850b7001aadc10be62 Mon Sep 17 00:00:00 2001
1+
From 5b21454c542aea71a447afb5a652a713cf53b111 Mon Sep 17 00:00:00 2001
22
From: haonanya <[email protected]>
33
Date: Mon, 19 Jul 2021 10:14:20 +0800
44
Subject: [PATCH] Add support for cl_ext_float_atomics in SPIRVWriter
55

66
Signed-off-by: haonanya <[email protected]>
77
---
8-
lib/SPIRV/OCL20ToSPIRV.cpp | 79 ++++++++++++++++--
8+
lib/SPIRV/OCL20ToSPIRV.cpp | 25 +++++-
9+
lib/SPIRV/OCLUtil.cpp | 4 -
910
lib/SPIRV/SPIRVToOCL.h | 3 +
1011
lib/SPIRV/SPIRVToOCL12.cpp | 21 +++++
1112
lib/SPIRV/SPIRVToOCL20.cpp | 28 ++++++-
@@ -18,84 +19,24 @@ Signed-off-by: haonanya <[email protected]>
1819
test/AtomicFMinEXT.ll | 113 +++++++-------------------
1920
test/AtomicFMinEXTForOCL.ll | 64 +++++++++++++++
2021
test/InvalidAtomicBuiltins.cl | 8 --
21-
13 files changed, 417 insertions(+), 260 deletions(-)
22+
14 files changed, 366 insertions(+), 261 deletions(-)
2223
create mode 100644 test/AtomicFAddEXTForOCL.ll
2324
create mode 100644 test/AtomicFMaxEXTForOCL.ll
2425
create mode 100644 test/AtomicFMinEXTForOCL.ll
2526

2627
diff --git a/lib/SPIRV/OCL20ToSPIRV.cpp b/lib/SPIRV/OCL20ToSPIRV.cpp
27-
index e30aa5be..b676a009 100644
28+
index e30aa5be..faa5be2b 100644
2829
--- a/lib/SPIRV/OCL20ToSPIRV.cpp
2930
+++ b/lib/SPIRV/OCL20ToSPIRV.cpp
30-
@@ -408,10 +408,63 @@ void OCL20ToSPIRV::visitCallInst(CallInst &CI) {
31+
@@ -407,7 +407,6 @@ void OCL20ToSPIRV::visitCallInst(CallInst &CI) {
32+
}
3133
if (DemangledName.find(kOCLBuiltinName::AtomicPrefix) == 0 ||
3234
DemangledName.find(kOCLBuiltinName::AtomPrefix) == 0) {
33-
34-
- // Compute atomic builtins do not support floating types.
35-
- if (CI.getType()->isFloatingPointTy() &&
36-
- isComputeAtomicOCLBuiltin(DemangledName))
37-
- return;
38-
+ // Compute "atom" prefixed builtins do not support floating types.
39-
+ if (CI.getType()->isFloatingPointTy()) {
40-
+ if (DemangledName.find(kOCLBuiltinName::AtomPrefix) == 0)
41-
+ return;
42-
+ // handle functions which are "atomic_" prefixed.
43-
+ StringRef Stem = DemangledName;
44-
+ Stem = Stem.drop_front(strlen("atomic_"));
45-
+ // FP-typed atomic_{add, sub, inc, dec, exchange, min, max, or, and, xor,
46-
+ // fetch_or, fetch_xor, fetch_and, fetch_or_explicit, fetch_xor_explicit,
47-
+ // fetch_and_explicit} should be identified as function call
48-
+ bool IsFunctionCall = llvm::StringSwitch<bool>(Stem)
49-
+ .Case("add", true)
50-
+ .Case("sub", true)
51-
+ .Case("inc", true)
52-
+ .Case("dec", true)
53-
+ .Case("cmpxchg", true)
54-
+ .Case("min", true)
55-
+ .Case("max", true)
56-
+ .Case("or", true)
57-
+ .Case("xor", true)
58-
+ .Case("and", true)
59-
+ .Case("fetch_or", true)
60-
+ .Case("fetch_and", true)
61-
+ .Case("fetch_xor", true)
62-
+ .Case("fetch_or_explicit", true)
63-
+ .Case("fetch_xor_explicit", true)
64-
+ .Case("fetch_and_explicit", true)
65-
+ .Default(false);
66-
+ if (IsFunctionCall)
67-
+ return;
68-
+ if (F->arg_size() != 2) {
69-
+ IsFunctionCall = llvm::StringSwitch<bool>(Stem)
70-
+ .Case("exchange", true)
71-
+ .Case("fetch_add", true)
72-
+ .Case("fetch_sub", true)
73-
+ .Case("fetch_min", true)
74-
+ .Case("fetch_max", true)
75-
+ .Case("load", true)
76-
+ .Case("store", true)
77-
+ .Default(false);
78-
+ if (IsFunctionCall)
79-
+ return;
80-
+ }
81-
+ if (F->arg_size() != 3 && F->arg_size() != 4) {
82-
+ IsFunctionCall = llvm::StringSwitch<bool>(Stem)
83-
+ .Case("exchange_explicit", true)
84-
+ .Case("fetch_add_explicit", true)
85-
+ .Case("fetch_sub_explicit", true)
86-
+ .Case("fetch_min_explicit", true)
87-
+ .Case("fetch_max_explicit", true)
88-
+ .Case("load_explicit", true)
89-
+ .Case("store_explicit", true)
90-
+ .Default(false);
91-
+ if (IsFunctionCall)
92-
+ return;
93-
+ }
94-
+ }
95-
96-
auto PCI = &CI;
97-
if (DemangledName == kOCLBuiltinName::AtomicInit) {
98-
@@ -819,7 +872,7 @@ void OCL20ToSPIRV::transAtomicBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) {
35+
-
36+
// Compute atomic builtins do not support floating types.
37+
if (CI.getType()->isFloatingPointTy() &&
38+
isComputeAtomicOCLBuiltin(DemangledName))
39+
@@ -819,7 +818,7 @@ void OCL20ToSPIRV::transAtomicBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) {
9940
AttributeList Attrs = CI->getCalledFunction()->getAttributes();
10041
mutateCallInstSPIRV(
10142
M, CI,
@@ -104,7 +45,7 @@ index e30aa5be..b676a009 100644
10445
Info.PostProc(Args);
10546
// Order of args in OCL20:
10647
// object, 0-2 other args, 1-2 order, scope
107-
@@ -864,7 +917,21 @@ void OCL20ToSPIRV::transAtomicBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) {
48+
@@ -864,7 +863,27 @@ void OCL20ToSPIRV::transAtomicBuiltin(CallInst *CI, OCLBuiltinTransInfo &Info) {
10849
std::rotate(Args.begin() + 2, Args.begin() + OrderIdx,
10950
Args.end() - Offset);
11051
}
@@ -119,19 +60,43 @@ index e30aa5be..b676a009 100644
11960
+ getSPIRVFuncName(OCLSPIRVBuiltinMap::map(Info.UniqName));
12061
+ if (!IsFPType(AtomicBuiltinsReturnType))
12162
+ return SPIRVFunctionName;
122-
+ // Translate FP-typed atomic builtins.
123-
+ return llvm::StringSwitch<std::string>(SPIRVFunctionName)
124-
+ .Case("__spirv_AtomicIAdd", "__spirv_AtomicFAddEXT")
125-
+ .Case("__spirv_AtomicSMax", "__spirv_AtomicFMaxEXT")
126-
+ .Case("__spirv_AtomicSMin", "__spirv_AtomicFMinEXT");
63+
+ // Translate FP-typed atomic builtins. Currently we only need to
64+
+ // translate atomic_fetch_[add, max, min]* to related float instructions
65+
+ auto SPIRFunctionNameForFloatAtomics =
66+
+ llvm::StringSwitch<std::string>(SPIRVFunctionName)
67+
+ .Case("__spirv_AtomicIAdd", "__spirv_AtomicFAddEXT")
68+
+ .Case("__spirv_AtomicSMax", "__spirv_AtomicFMaxEXT")
69+
+ .Case("__spirv_AtomicSMin", "__spirv_AtomicFMinEXT")
70+
+ .Default("others");
71+
+ return SPIRFunctionNameForFloatAtomics == "others"
72+
+ ? SPIRVFunctionName
73+
+ : SPIRFunctionNameForFloatAtomics;
12774
},
12875
&Attrs);
12976
}
77+
diff --git a/lib/SPIRV/OCLUtil.cpp b/lib/SPIRV/OCLUtil.cpp
78+
index c7232623..9a4c8ab9 100644
79+
--- a/lib/SPIRV/OCLUtil.cpp
80+
+++ b/lib/SPIRV/OCLUtil.cpp
81+
@@ -136,13 +136,9 @@ bool isComputeAtomicOCLBuiltin(StringRef DemangledName) {
82+
.EndsWith("and", true)
83+
.EndsWith("or", true)
84+
.EndsWith("xor", true)
85+
- .EndsWith("add_explicit", true)
86+
- .EndsWith("sub_explicit", true)
87+
.EndsWith("or_explicit", true)
88+
.EndsWith("xor_explicit", true)
89+
.EndsWith("and_explicit", true)
90+
- .EndsWith("min_explicit", true)
91+
- .EndsWith("max_explicit", true)
92+
.Default(false);
93+
}
94+
13095
diff --git a/lib/SPIRV/SPIRVToOCL.h b/lib/SPIRV/SPIRVToOCL.h
131-
index f75195d4..64bf0f84 100644
96+
index ddeec0b6..006fb0b1 100644
13297
--- a/lib/SPIRV/SPIRVToOCL.h
13398
+++ b/lib/SPIRV/SPIRVToOCL.h
134-
@@ -171,6 +171,9 @@ public:
99+
@@ -178,6 +178,9 @@ public:
135100
/// using separate maps for OpenCL 1.2 and OpenCL 2.0
136101
virtual Instruction *mutateAtomicName(CallInst *CI, Op OC) = 0;
137102

@@ -246,7 +211,7 @@ index d829ff42..01d088e9 100644
246211
auto ScopeIdx = Ptr + 1;
247212
auto OrderIdx = Ptr + 2;
248213
diff --git a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h
249-
index cc1dd1ab..63180888 100644
214+
index 13f93fbe..7b707993 100644
250215
--- a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h
251216
+++ b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h
252217
@@ -521,7 +521,6 @@ template <> inline void SPIRVMap<Capability, std::string>::init() {
@@ -258,7 +223,7 @@ index cc1dd1ab..63180888 100644
258223
add(CapabilitySubgroupBufferBlockIOINTEL, "SubgroupBufferBlockIOINTEL");
259224
add(CapabilitySubgroupImageBlockIOINTEL, "SubgroupImageBlockIOINTEL");
260225
diff --git a/lib/SPIRV/libSPIRV/SPIRVOpCode.h b/lib/SPIRV/libSPIRV/SPIRVOpCode.h
261-
index 9e520512..cc2ad200 100644
226+
index feec70f6..8e595e83 100644
262227
--- a/lib/SPIRV/libSPIRV/SPIRVOpCode.h
263228
+++ b/lib/SPIRV/libSPIRV/SPIRVOpCode.h
264229
@@ -54,11 +54,17 @@ template <> inline void SPIRVMap<Op, std::string>::init() {

0 commit comments

Comments
 (0)