Skip to content

Commit 5f80189

Browse files
committed
[Clang] Support floating point vectors with atomic builtins
1 parent 98542a3 commit 5f80189

File tree

5 files changed

+130
-27
lines changed

5 files changed

+130
-27
lines changed

clang/lib/CodeGen/CGAtomic.cpp

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,12 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
531531
bool PostOpMinMax = false;
532532
unsigned PostOp = 0;
533533

534+
auto IsFloat = E->getValueType()->isVectorType()
535+
? E->getValueType()
536+
->castAs<VectorType>()
537+
->getElementType()
538+
->isFloatingType()
539+
: E->getValueType()->isFloatingType();
534540
switch (E->getOp()) {
535541
case AtomicExpr::AO__c11_atomic_init:
536542
case AtomicExpr::AO__opencl_atomic_init:
@@ -620,30 +626,26 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
620626

621627
case AtomicExpr::AO__atomic_add_fetch:
622628
case AtomicExpr::AO__scoped_atomic_add_fetch:
623-
PostOp = E->getValueType()->isFloatingType() ? llvm::Instruction::FAdd
624-
: llvm::Instruction::Add;
629+
PostOp = IsFloat ? llvm::Instruction::FAdd : llvm::Instruction::Add;
625630
[[fallthrough]];
626631
case AtomicExpr::AO__c11_atomic_fetch_add:
627632
case AtomicExpr::AO__hip_atomic_fetch_add:
628633
case AtomicExpr::AO__opencl_atomic_fetch_add:
629634
case AtomicExpr::AO__atomic_fetch_add:
630635
case AtomicExpr::AO__scoped_atomic_fetch_add:
631-
Op = E->getValueType()->isFloatingType() ? llvm::AtomicRMWInst::FAdd
632-
: llvm::AtomicRMWInst::Add;
636+
Op = IsFloat ? llvm::AtomicRMWInst::FAdd : llvm::AtomicRMWInst::Add;
633637
break;
634638

635639
case AtomicExpr::AO__atomic_sub_fetch:
636640
case AtomicExpr::AO__scoped_atomic_sub_fetch:
637-
PostOp = E->getValueType()->isFloatingType() ? llvm::Instruction::FSub
638-
: llvm::Instruction::Sub;
641+
PostOp = IsFloat ? llvm::Instruction::FSub : llvm::Instruction::Sub;
639642
[[fallthrough]];
640643
case AtomicExpr::AO__c11_atomic_fetch_sub:
641644
case AtomicExpr::AO__hip_atomic_fetch_sub:
642645
case AtomicExpr::AO__opencl_atomic_fetch_sub:
643646
case AtomicExpr::AO__atomic_fetch_sub:
644647
case AtomicExpr::AO__scoped_atomic_fetch_sub:
645-
Op = E->getValueType()->isFloatingType() ? llvm::AtomicRMWInst::FSub
646-
: llvm::AtomicRMWInst::Sub;
648+
Op = IsFloat ? llvm::AtomicRMWInst::FSub : llvm::AtomicRMWInst::Sub;
647649
break;
648650

649651
case AtomicExpr::AO__atomic_min_fetch:
@@ -655,11 +657,10 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
655657
case AtomicExpr::AO__opencl_atomic_fetch_min:
656658
case AtomicExpr::AO__atomic_fetch_min:
657659
case AtomicExpr::AO__scoped_atomic_fetch_min:
658-
Op = E->getValueType()->isFloatingType()
659-
? llvm::AtomicRMWInst::FMin
660-
: (E->getValueType()->isSignedIntegerType()
661-
? llvm::AtomicRMWInst::Min
662-
: llvm::AtomicRMWInst::UMin);
660+
Op = IsFloat ? llvm::AtomicRMWInst::FMin
661+
: (E->getValueType()->isSignedIntegerType()
662+
? llvm::AtomicRMWInst::Min
663+
: llvm::AtomicRMWInst::UMin);
663664
break;
664665

665666
case AtomicExpr::AO__atomic_max_fetch:
@@ -671,11 +672,10 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
671672
case AtomicExpr::AO__opencl_atomic_fetch_max:
672673
case AtomicExpr::AO__atomic_fetch_max:
673674
case AtomicExpr::AO__scoped_atomic_fetch_max:
674-
Op = E->getValueType()->isFloatingType()
675-
? llvm::AtomicRMWInst::FMax
676-
: (E->getValueType()->isSignedIntegerType()
677-
? llvm::AtomicRMWInst::Max
678-
: llvm::AtomicRMWInst::UMax);
675+
Op = IsFloat ? llvm::AtomicRMWInst::FMax
676+
: (E->getValueType()->isSignedIntegerType()
677+
? llvm::AtomicRMWInst::Max
678+
: llvm::AtomicRMWInst::UMax);
679679
break;
680680

681681
case AtomicExpr::AO__atomic_and_fetch:
@@ -984,9 +984,11 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
984984
case AtomicExpr::AO__scoped_atomic_max_fetch:
985985
case AtomicExpr::AO__scoped_atomic_min_fetch:
986986
case AtomicExpr::AO__scoped_atomic_sub_fetch:
987-
ShouldCastToIntPtrTy = !MemTy->isFloatingType();
987+
ShouldCastToIntPtrTy =
988+
MemTy->isVectorType()
989+
? !MemTy->castAs<VectorType>()->getElementType()->isFloatingType()
990+
: !MemTy->isFloatingType();
988991
[[fallthrough]];
989-
990992
case AtomicExpr::AO__atomic_fetch_and:
991993
case AtomicExpr::AO__atomic_fetch_nand:
992994
case AtomicExpr::AO__atomic_fetch_or:

clang/lib/Sema/SemaChecking.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3758,7 +3758,7 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
37583758
enum ArithOpExtraValueType {
37593759
AOEVT_None = 0,
37603760
AOEVT_Pointer = 1,
3761-
AOEVT_FP = 2,
3761+
AOEVT_FPorFPVec = 2,
37623762
};
37633763
unsigned ArithAllows = AOEVT_None;
37643764

@@ -3804,7 +3804,7 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
38043804
case AtomicExpr::AO__opencl_atomic_fetch_sub:
38053805
case AtomicExpr::AO__hip_atomic_fetch_add:
38063806
case AtomicExpr::AO__hip_atomic_fetch_sub:
3807-
ArithAllows = AOEVT_Pointer | AOEVT_FP;
3807+
ArithAllows = AOEVT_Pointer | AOEVT_FPorFPVec;
38083808
Form = Arithmetic;
38093809
break;
38103810
case AtomicExpr::AO__atomic_fetch_max:
@@ -3821,7 +3821,7 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
38213821
case AtomicExpr::AO__opencl_atomic_fetch_min:
38223822
case AtomicExpr::AO__hip_atomic_fetch_max:
38233823
case AtomicExpr::AO__hip_atomic_fetch_min:
3824-
ArithAllows = AOEVT_FP;
3824+
ArithAllows = AOEVT_FPorFPVec;
38253825
Form = Arithmetic;
38263826
break;
38273827
case AtomicExpr::AO__c11_atomic_fetch_and:
@@ -3982,7 +3982,17 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
39823982
return true;
39833983
if (ValType->isPointerType())
39843984
return AllowedType & AOEVT_Pointer;
3985-
if (!(ValType->isFloatingType() && (AllowedType & AOEVT_FP)))
3985+
if (ValType->isVectorType()) {
3986+
if (ValType->isSizelessVectorType() ||
3987+
!ValType->castAs<VectorType>()
3988+
->getElementType()
3989+
->isFloatingType() ||
3990+
!(AllowedType & AOEVT_FPorFPVec))
3991+
return false;
3992+
// Only floating point fixed vectors are supported in IR
3993+
return true;
3994+
}
3995+
if (!(ValType->isFloatingType() && (AllowedType & AOEVT_FPorFPVec)))
39863996
return false;
39873997
// LLVM Parser does not allow atomicrmw with x86_fp80 type.
39883998
if (ValType->isSpecificBuiltinType(BuiltinType::LongDouble) &&
@@ -3992,7 +4002,7 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
39924002
return true;
39934003
};
39944004
if (!IsAllowedValueType(ValType, ArithAllows)) {
3995-
auto DID = ArithAllows & AOEVT_FP
4005+
auto DID = ArithAllows & AOEVT_FPorFPVec
39964006
? (ArithAllows & AOEVT_Pointer
39974007
? diag::err_atomic_op_needs_atomic_int_ptr_or_fp
39984008
: diag::err_atomic_op_needs_atomic_int_or_fp)

clang/test/CodeGen/fp-atomic-ops.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ typedef enum memory_order {
2727
memory_order_seq_cst = __ATOMIC_SEQ_CST
2828
} memory_order;
2929

30+
typedef float float2 __attribute__((ext_vector_type(2)));
31+
typedef double double2 __attribute__((ext_vector_type(2)));
32+
3033
void test(float *f, float ff, double *d, double dd) {
3134
// FLOAT: atomicrmw fadd ptr {{.*}} monotonic
3235
__atomic_fetch_add(f, ff, memory_order_relaxed);
@@ -42,3 +45,22 @@ void test(float *f, float ff, double *d, double dd) {
4245
__atomic_fetch_sub(d, dd, memory_order_relaxed);
4346
#endif
4447
}
48+
49+
typedef float float2 __attribute__((ext_vector_type(2)));
50+
typedef double double2 __attribute__((ext_vector_type(2)));
51+
52+
void test_vector(float2 *f, float2 ff, double2 *d, double2 dd) {
53+
// FLOAT: atomicrmw fadd ptr {{.*}} <2 x float> {{.*}} monotonic
54+
__atomic_fetch_add(f, ff, memory_order_relaxed);
55+
56+
// FLOAT: atomicrmw fsub ptr {{.*}} <2 x float> {{.*}} monotonic
57+
__atomic_fetch_sub(f, ff, memory_order_relaxed);
58+
59+
#ifdef DOUBLE
60+
// DOUBLE: atomicrmw fadd ptr {{.*}} <2 x double> {{.*}} monotonic
61+
__atomic_fetch_add(d, dd, memory_order_relaxed);
62+
63+
// DOUBLE: atomicrmw fsub ptr {{.*}} <2 x double> {{.*}} monotonic
64+
__atomic_fetch_sub(d, dd, memory_order_relaxed);
65+
#endif
66+
}

clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
#include "Inputs/cuda.h"
2121
#include <stdatomic.h>
2222

23+
typedef float __attribute__((ext_vector_type(2))) vector_float;
24+
2325
__global__ void ffp1(float *p) {
2426
// CHECK-LABEL: @_Z4ffp1Pf
2527
// SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4{{$}}
@@ -225,6 +227,55 @@ __global__ void ffp6(_Float16 *p) {
225227
__hip_atomic_fetch_min(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP);
226228
}
227229

230+
__global__ void ffp7(vector_float *p) {
231+
// CHECK-LABEL: @_Z4ffp7PDv2_f
232+
// SAFEIR: atomicrmw fadd ptr {{.*}}<2 x float>{{.*}} monotonic, align 8{{$}}
233+
// SAFEIR: atomicrmw fsub ptr {{.*}}<2 x float>{{.*}} monotonic, align 8{{$}}
234+
// SAFEIR: atomicrmw fmax ptr {{.*}}<2 x float>{{.*}} monotonic, align 8{{$}}
235+
// SAFEIR: atomicrmw fmin ptr {{.*}}<2 x float>{{.*}} monotonic, align 8{{$}}
236+
// SAFEIR: atomicrmw fadd ptr {{.*}}<2 x float>{{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
237+
// SAFEIR: atomicrmw fsub ptr {{.*}}<2 x float>{{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
238+
// SAFEIR: atomicrmw fmax ptr {{.*}}<2 x float>{{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
239+
// SAFEIR: atomicrmw fmin ptr {{.*}}<2 x float>{{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
240+
241+
// UNSAFEIR: atomicrmw fadd ptr {{.*}}<2 x float>{{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
242+
// UNSAFEIR: atomicrmw fsub ptr {{.*}}<2 x float>{{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
243+
// UNSAFEIR: atomicrmw fmax ptr {{.*}}<2 x float>{{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
244+
// UNSAFEIR: atomicrmw fmin ptr {{.*}}<2 x float>{{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
245+
// UNSAFEIR: atomicrmw fadd ptr {{.*}}<2 x float>{{.*}} monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
246+
// UNSAFEIR: atomicrmw fsub ptr {{.*}}<2 x float>{{.*}} monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
247+
// UNSAFEIR: atomicrmw fmax ptr {{.*}}<2 x float>{{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
248+
// UNSAFEIR: atomicrmw fmin ptr {{.*}}<2 x float>{{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
249+
250+
// SAFE: _Z4ffp7PDv2_f
251+
// SAFE: global_atomic_cmpswap
252+
// SAFE: global_atomic_cmpswap
253+
// SAFE: global_atomic_cmpswap
254+
// SAFE: global_atomic_cmpswap
255+
// SAFE: global_atomic_cmpswap
256+
// SAFE: global_atomic_cmpswap
257+
// SAFE: global_atomic_cmpswap
258+
// SAFE: global_atomic_cmpswap
259+
260+
// UNSAFE: _Z4ffp7PDv2_f
261+
// UNSAFE: global_atomic_cmpswap
262+
// UNSAFE: global_atomic_cmpswap
263+
// UNSAFE: global_atomic_cmpswap
264+
// UNSAFE: global_atomic_cmpswap
265+
// UNSAFE: global_atomic_cmpswap
266+
// UNSAFE: global_atomic_cmpswap
267+
// UNSAFE: global_atomic_cmpswap
268+
// UNSAFE: global_atomic_cmpswap
269+
__atomic_fetch_add(p, {1.0f, 1.0f}, memory_order_relaxed);
270+
__atomic_fetch_sub(p, {1.0f, 1.0f}, memory_order_relaxed);
271+
__atomic_fetch_max(p, {1.0f, 1.0f}, memory_order_relaxed);
272+
__atomic_fetch_min(p, {1.0f, 1.0f}, memory_order_relaxed);
273+
__hip_atomic_fetch_add(p, {1.0f, 1.0f}, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
274+
__hip_atomic_fetch_sub(p, {1.0f, 1.0f}, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP);
275+
__hip_atomic_fetch_max(p, {1.0f, 1.0f}, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
276+
__hip_atomic_fetch_min(p, {1.0f, 1.0f}, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP);
277+
}
278+
228279
// CHECK-LABEL: @_Z12test_cmpxchgPiii
229280
// CHECK: cmpxchg ptr %{{.+}}, i32 %{{.+}}, i32 %{{.+}} acquire acquire, align 4{{$}}
230281
// CHECK: cmpxchg weak ptr %{{.+}}, i32 %{{.+}}, i32 %{{.+}} acquire acquire, align 4{{$}}

clang/test/Sema/atomic-ops.c

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,12 @@ _Static_assert(__atomic_always_lock_free(2, (int[2]){}), "");
147147
void dummyfn();
148148
_Static_assert(__atomic_always_lock_free(2, dummyfn) || 1, "");
149149

150-
150+
typedef _Atomic(float __attribute__((vector_size(16)))) atomic_vector_float;
151+
typedef _Atomic(double __attribute__((vector_size(16)))) atomic_vector_double;
152+
typedef _Atomic(int __attribute__((vector_size(16)))) atomic_vector_int;
153+
typedef float __attribute__((ext_vector_type(4))) vector_float;
154+
typedef double __attribute__((ext_vector_type(2))) vector_double;
155+
typedef int __attribute__((ext_vector_type(4))) vector_int;
151156

152157
#define _AS1 __attribute__((address_space(1)))
153158
#define _AS2 __attribute__((address_space(2)))
@@ -156,7 +161,10 @@ void f(_Atomic(int) *i, const _Atomic(int) *ci,
156161
_Atomic(int*) *p, _Atomic(float) *f, _Atomic(double) *d,
157162
_Atomic(long double) *ld,
158163
int *I, const int *CI,
159-
int **P, float *F, double *D, struct S *s1, struct S *s2) {
164+
int **P, float *F, double *D, struct S *s1, struct S *s2,
165+
atomic_vector_float* vf, atomic_vector_double* vd,
166+
atomic_vector_int* vi, vector_float* evf,
167+
vector_double* evd, vector_int* evi) {
160168
__c11_atomic_init(I, 5); // expected-error {{pointer to _Atomic}}
161169
__c11_atomic_init(ci, 5); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const _Atomic(int) *' invalid)}}
162170

@@ -224,6 +232,13 @@ void f(_Atomic(int) *i, const _Atomic(int) *ci,
224232
__c11_atomic_fetch_add(f, 1.0f, memory_order_seq_cst);
225233
__c11_atomic_fetch_add(d, 1.0, memory_order_seq_cst);
226234
__c11_atomic_fetch_add(ld, 1.0, memory_order_seq_cst); // fp80-error {{must be a pointer to atomic integer, pointer or supported floating point type}}
235+
236+
vector_float fvec = {1.0f, 1.0f, 1.0f, 1.0f};
237+
vector_double dvec = {1.0, 1.0};
238+
vector_int ivec = {1, 1, 1, 1};
239+
__c11_atomic_fetch_add(vf, fvec, memory_order_seq_cst);
240+
__c11_atomic_fetch_add(vd, dvec, memory_order_seq_cst);
241+
__c11_atomic_fetch_add(vi, ivec, memory_order_seq_cst); // expected-error {{must be a pointer to atomic integer, pointer or supported floating point type}}
227242
__c11_atomic_fetch_min(i, 1, memory_order_seq_cst);
228243
__c11_atomic_fetch_min(p, 1, memory_order_seq_cst); // expected-error {{must be a pointer to atomic integer or supported floating point type}}
229244
__c11_atomic_fetch_min(f, 1.0f, memory_order_seq_cst);
@@ -240,6 +255,9 @@ void f(_Atomic(int) *i, const _Atomic(int) *ci,
240255
__atomic_fetch_sub(P, 3, memory_order_seq_cst);
241256
__atomic_fetch_sub(F, 3, memory_order_seq_cst);
242257
__atomic_fetch_sub(s1, 3, memory_order_seq_cst); // expected-error {{must be a pointer to integer, pointer or supported floating point type}}
258+
__atomic_fetch_sub(evf, fvec, memory_order_seq_cst);
259+
__atomic_fetch_sub(evd, dvec, memory_order_seq_cst);
260+
__atomic_fetch_sub(evi, ivec, memory_order_seq_cst); // expected-error {{must be a pointer to integer, pointer or supported floating point type}}
243261
__atomic_fetch_min(F, 3, memory_order_seq_cst);
244262
__atomic_fetch_min(D, 3, memory_order_seq_cst);
245263
__atomic_fetch_max(F, 3, memory_order_seq_cst);

0 commit comments

Comments
 (0)