diff --git a/clang/lib/Sema/SPIRVBuiltins.td b/clang/lib/Sema/SPIRVBuiltins.td index 4f7bd7d182d19..d6d63e36abc19 100644 --- a/clang/lib/Sema/SPIRVBuiltins.td +++ b/clang/lib/Sema/SPIRVBuiltins.td @@ -398,11 +398,14 @@ def AIGenTypeNNoScalar : GenericType<"AIGenTypeNNoScalar", TLAllInts, VecNoS // All integer to unsigned def AI2UGenTypeN : GenericType<"AI2UGenTypeN", TLAllUIntsTwice, VecAndScalar>; // Signed integer +def SGenType1 : GenericType<"SGenType1", TLSignedInts, Vec1>; def SGenTypeN : GenericType<"SGenTypeN", TLSignedInts, VecAndScalar>; // Unsigned integer +def UGenType1 : GenericType<"UGenType1", TLUnsignedInts, Vec1>; def UGenTypeN : GenericType<"UGenTypeN", TLUnsignedInts, VecAndScalar>; def UInt4 : GenericType<"UInt4", TypeList<[UInt]>, Vec4>; // Float +def FGenType1 : GenericType<"FGenType1", TLFloat, Vec1>; def FGenTypeN : GenericType<"FGenTypeN", TLFloat, VecAndScalar>; // (u)int, (u)long, and all floats def IntLongFloatGenType1 : GenericType<"IntLongFloatGenType1", TLIntLongFloats, Vec1>; @@ -1020,6 +1023,70 @@ foreach name = ["SubgroupBlockWriteINTEL"] in { } } +// 3.56.18. Atomic Instructions + +foreach AS = [GlobalAS, LocalAS, PrivateAS, GenericAS] in { + def : SPVBuiltin< + "AtomicLoad", [AGenType1, PointerType, Int, Int], + Attr.Convergent>; + + def : SPVBuiltin<"AtomicStore", + [Void, PointerType, Int, Int, AGenType1], + Attr.Convergent>; + + def : SPVBuiltin<"AtomicExchange", + [AGenType1, PointerType, Int, Int, AGenType1], + Attr.Convergent>; + + foreach name = ["AtomicCompareExchange", "AtomicCompareExchangeWeak"] in { + def : SPVBuiltin, Int, Int, Int, + AIGenType1, AIGenType1], + Attr.Convergent>; + } + + foreach name = ["AtomicIIncrement", "AtomicIDecrement"] in { + def : SPVBuiltin, Int, Int], + Attr.Convergent>; + } + + foreach name = ["AtomicSMin", "AtomicSMax"] in { + def : SPVBuiltin, Int, Int, + SGenType1], + Attr.Convergent>; + } + + foreach name = ["AtomicUMin", "AtomicUMax"] in { + def : SPVBuiltin, Int, Int, + UGenType1], + Attr.Convergent>; + } + + foreach name = ["AtomicIAdd", "AtomicISub", "AtomicAnd", "AtomicOr", + "AtomicXor"] in { + def : SPVBuiltin, Int, Int, + AIGenType1], + Attr.Convergent>; + } + + def : SPVBuiltin< + "AtomicFlagTestAndSet", [Bool, PointerType, Int, Int], + Attr.Convergent>; + + def : SPVBuiltin<"AtomicFlagClear", [Void, PointerType, Int, Int], + Attr.Convergent>; + + foreach name = ["AtomicFMaxEXT", "AtomicFMinEXT", "AtomicFAddEXT"] in { + def : SPVBuiltin, Int, Int, + FGenType1], + Attr.Convergent>; + } +} + // 3.56.24. Non-Uniform Instructions foreach name = ["GroupNonUniformElect"] in { diff --git a/clang/test/CodeGenSPIRV/spirv-builtin-lookup-atomic.cpp b/clang/test/CodeGenSPIRV/spirv-builtin-lookup-atomic.cpp new file mode 100644 index 0000000000000..8b596f2f70ed5 --- /dev/null +++ b/clang/test/CodeGenSPIRV/spirv-builtin-lookup-atomic.cpp @@ -0,0 +1,237 @@ +// RUN: %clang_cc1 -triple spir64 -fdeclare-spirv-builtins -emit-llvm %s -o - | FileCheck %s + +#define AS_GLOBAL __attribute__((opencl_global)) +#define AS_LOCAL __attribute__((opencl_local)) +#define AS_PRIVATE __attribute__((opencl_private)) +#define AS_GENERIC __attribute__((opencl_generic)) + +void test_flag(int AS_GLOBAL *a, int AS_LOCAL *b, int AS_PRIVATE *c, + int AS_GENERIC *d) { + __spirv_AtomicFlagTestAndSet(a, 1, 16); + __spirv_AtomicFlagTestAndSet(b, 2, 8); + __spirv_AtomicFlagTestAndSet(c, 4, 4); + __spirv_AtomicFlagTestAndSet(d, 2, 0); + + __spirv_AtomicFlagClear(a, 1, 16); + __spirv_AtomicFlagClear(b, 2, 4); + __spirv_AtomicFlagClear(c, 4, 0); + __spirv_AtomicFlagClear(d, 2, 0); +} + +template +void test_signed(T AS_GLOBAL *a, T AS_LOCAL *b, T AS_PRIVATE *c, + T AS_GENERIC *d) { + __spirv_AtomicLoad(a, 1, 16); + __spirv_AtomicLoad(b, 2, 8); + __spirv_AtomicLoad(c, 4, 4); + __spirv_AtomicLoad(d, 2, 0); + + __spirv_AtomicStore(a, 1, 16, (T)0); + __spirv_AtomicStore(b, 2, 8, (T)0); + __spirv_AtomicStore(c, 4, 4, (T)0); + __spirv_AtomicStore(d, 2, 0, (T)0); + + __spirv_AtomicExchange(a, 1, 16, (T)0); + __spirv_AtomicExchange(b, 2, 8, (T)0); + __spirv_AtomicExchange(c, 4, 4, (T)0); + __spirv_AtomicExchange(d, 2, 0, (T)0); + + __spirv_AtomicCompareExchange(a, 1, 16, 0, (T)1, (T)0); + __spirv_AtomicCompareExchange(b, 2, 8, 0, (T)1, (T)0); + __spirv_AtomicCompareExchange(c, 4, 4, 0, (T)1, (T)0); + __spirv_AtomicCompareExchange(d, 2, 0, 0, (T)1, (T)0); + + __spirv_AtomicCompareExchangeWeak(a, 1, 16, 0, (T)1, (T)0); + __spirv_AtomicCompareExchangeWeak(b, 2, 8, 0, (T)1, (T)0); + __spirv_AtomicCompareExchangeWeak(c, 4, 4, 0, (T)1, (T)0); + __spirv_AtomicCompareExchangeWeak(d, 2, 0, 0, (T)1, (T)0); + + __spirv_AtomicIIncrement(a, 1, 16); + __spirv_AtomicIIncrement(b, 2, 8); + __spirv_AtomicIIncrement(c, 4, 4); + __spirv_AtomicIIncrement(d, 2, 0); + + __spirv_AtomicIDecrement(a, 1, 16); + __spirv_AtomicIDecrement(b, 2, 8); + __spirv_AtomicIDecrement(c, 4, 4); + __spirv_AtomicIDecrement(d, 2, 0); + + __spirv_AtomicSMin(a, 1, 16, (T)0); + __spirv_AtomicSMin(b, 2, 8, (T)0); + __spirv_AtomicSMin(c, 4, 4, (T)0); + __spirv_AtomicSMin(d, 2, 0, (T)0); + + __spirv_AtomicSMax(a, 1, 16, (T)0); + __spirv_AtomicSMax(b, 2, 8, (T)0); + __spirv_AtomicSMax(c, 4, 4, (T)0); + __spirv_AtomicSMax(d, 2, 0, (T)0); + + __spirv_AtomicIAdd(a, 1, 16, (T)0); + __spirv_AtomicIAdd(b, 2, 8, (T)0); + __spirv_AtomicIAdd(c, 4, 4, (T)0); + __spirv_AtomicIAdd(d, 2, 0, (T)0); + + __spirv_AtomicISub(a, 1, 16, (T)0); + __spirv_AtomicISub(b, 2, 8, (T)0); + __spirv_AtomicISub(c, 4, 4, (T)0); + __spirv_AtomicISub(d, 2, 0, (T)0); + + __spirv_AtomicAnd(a, 1, 16, (T)0); + __spirv_AtomicAnd(b, 2, 8, (T)0); + __spirv_AtomicAnd(c, 4, 4, (T)0); + __spirv_AtomicAnd(d, 2, 0, (T)0); + + __spirv_AtomicOr(a, 1, 16, (T)0); + __spirv_AtomicOr(b, 2, 8, (T)0); + __spirv_AtomicOr(c, 4, 4, (T)0); + __spirv_AtomicOr(d, 2, 0, (T)0); + + __spirv_AtomicXor(a, 1, 16, (T)0); + __spirv_AtomicXor(b, 2, 8, (T)0); + __spirv_AtomicXor(c, 4, 4, (T)0); + __spirv_AtomicXor(d, 2, 0, (T)0); +} + +template +void test_unsigned(T AS_GLOBAL *a, T AS_LOCAL *b, T AS_PRIVATE *c, + T AS_GENERIC *d) { + + __spirv_AtomicUMin(a, 1, 16, (T)0); + __spirv_AtomicUMin(b, 2, 8, (T)0); + __spirv_AtomicUMin(c, 4, 4, (T)0); + __spirv_AtomicUMin(d, 2, 0, (T)0); + + __spirv_AtomicUMax(a, 1, 16, (T)0); + __spirv_AtomicUMax(b, 2, 8, (T)0); + __spirv_AtomicUMax(c, 4, 4, (T)0); + __spirv_AtomicUMax(d, 2, 0, (T)0); +} + +template +void test_float(T AS_GLOBAL *a, T AS_LOCAL *b, T AS_PRIVATE *c, + T AS_GENERIC *d) { + __spirv_AtomicFMaxEXT(a, 1, 16, (T)0); + __spirv_AtomicFMaxEXT(b, 2, 8, (T)0); + __spirv_AtomicFMaxEXT(c, 4, 4, (T)0); + __spirv_AtomicFMaxEXT(d, 2, 0, (T)0); + + __spirv_AtomicFMinEXT(a, 1, 16, (T)0); + __spirv_AtomicFMinEXT(b, 2, 8, (T)0); + __spirv_AtomicFMinEXT(c, 4, 4, (T)0); + __spirv_AtomicFMinEXT(d, 2, 0, (T)0); + + __spirv_AtomicFAddEXT(a, 1, 16, (T)0); + __spirv_AtomicFAddEXT(b, 2, 8, (T)0); + __spirv_AtomicFAddEXT(c, 4, 4, (T)0); + __spirv_AtomicFAddEXT(d, 2, 0, (T)0); +} + +void foo() { + int AS_GLOBAL *a; + int AS_LOCAL *b; + int AS_PRIVATE *c; + int AS_GENERIC *d; + test_flag(a, b, c, d); + + test_signed(a, b, c, d); + + unsigned int AS_GLOBAL *ua; + unsigned int AS_LOCAL *ub; + unsigned int AS_PRIVATE *uc; + unsigned int AS_GENERIC *ud; + test_unsigned(ua, ub, uc, ud); + + float AS_GLOBAL *fa; + float AS_LOCAL *fb; + float AS_PRIVATE *fc; + float AS_GENERIC *fd; + test_float(fa, fb, fc, fd); +} + +// CHECK: call spir_func noundef zeroext i1 @_Z28__spirv_AtomicFlagTestAndSetPU3AS1iii( +// CHECK: call spir_func noundef zeroext i1 @_Z28__spirv_AtomicFlagTestAndSetPU3AS3iii( +// CHECK: call spir_func noundef zeroext i1 @_Z28__spirv_AtomicFlagTestAndSetPiii( +// CHECK: call spir_func noundef zeroext i1 @_Z28__spirv_AtomicFlagTestAndSetPU3AS4iii( +// CHECK: call spir_func void @_Z23__spirv_AtomicFlagClearPU3AS1iii( +// CHECK: call spir_func void @_Z23__spirv_AtomicFlagClearPU3AS3iii( +// CHECK: call spir_func void @_Z23__spirv_AtomicFlagClearPiii( +// CHECK: call spir_func void @_Z23__spirv_AtomicFlagClearPU3AS4iii( + +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicLoadPU3AS1iii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicLoadPU3AS3iii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicLoadPiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicLoadPU3AS4iii( +// CHECK: call spir_func void @_Z19__spirv_AtomicStorePU3AS1iiii( +// CHECK: call spir_func void @_Z19__spirv_AtomicStorePU3AS3iiii( +// CHECK: call spir_func void @_Z19__spirv_AtomicStorePiiii( +// CHECK: call spir_func void @_Z19__spirv_AtomicStorePU3AS4iiii( +// CHECK: call spir_func noundef i32 @_Z22__spirv_AtomicExchangePU3AS1iiii( +// CHECK: call spir_func noundef i32 @_Z22__spirv_AtomicExchangePU3AS3iiii( +// CHECK: call spir_func noundef i32 @_Z22__spirv_AtomicExchangePiiii( +// CHECK: call spir_func noundef i32 @_Z22__spirv_AtomicExchangePU3AS4iiii( +// CHECK: call spir_func noundef i32 @_Z29__spirv_AtomicCompareExchangePU3AS1iiiiii( +// CHECK: call spir_func noundef i32 @_Z29__spirv_AtomicCompareExchangePU3AS3iiiiii( +// CHECK: call spir_func noundef i32 @_Z29__spirv_AtomicCompareExchangePiiiiii( +// CHECK: call spir_func noundef i32 @_Z29__spirv_AtomicCompareExchangePU3AS4iiiiii( +// CHECK: call spir_func noundef i32 @_Z33__spirv_AtomicCompareExchangeWeakPU3AS1iiiiii( +// CHECK: call spir_func noundef i32 @_Z33__spirv_AtomicCompareExchangeWeakPU3AS3iiiiii( +// CHECK: call spir_func noundef i32 @_Z33__spirv_AtomicCompareExchangeWeakPiiiiii( +// CHECK: call spir_func noundef i32 @_Z33__spirv_AtomicCompareExchangeWeakPU3AS4iiiiii( +// CHECK: call spir_func noundef i32 @_Z24__spirv_AtomicIIncrementPU3AS1iii( +// CHECK: call spir_func noundef i32 @_Z24__spirv_AtomicIIncrementPU3AS3iii( +// CHECK: call spir_func noundef i32 @_Z24__spirv_AtomicIIncrementPiii( +// CHECK: call spir_func noundef i32 @_Z24__spirv_AtomicIIncrementPU3AS4iii( +// CHECK: call spir_func noundef i32 @_Z24__spirv_AtomicIDecrementPU3AS1iii( +// CHECK: call spir_func noundef i32 @_Z24__spirv_AtomicIDecrementPU3AS3iii( +// CHECK: call spir_func noundef i32 @_Z24__spirv_AtomicIDecrementPiii( +// CHECK: call spir_func noundef i32 @_Z24__spirv_AtomicIDecrementPU3AS4iii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicSMinPU3AS1iiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicSMinPU3AS3iiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicSMinPiiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicSMinPU3AS4iiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicSMaxPU3AS1iiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicSMaxPU3AS3iiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicSMaxPiiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicSMaxPU3AS4iiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicIAddPU3AS1iiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicIAddPU3AS3iiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicIAddPiiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicIAddPU3AS4iiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicISubPU3AS1iiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicISubPU3AS3iiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicISubPiiii( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicISubPU3AS4iiii( +// CHECK: call spir_func noundef i32 @_Z17__spirv_AtomicAndPU3AS1iiii( +// CHECK: call spir_func noundef i32 @_Z17__spirv_AtomicAndPU3AS3iiii( +// CHECK: call spir_func noundef i32 @_Z17__spirv_AtomicAndPiiii( +// CHECK: call spir_func noundef i32 @_Z17__spirv_AtomicAndPU3AS4iiii( +// CHECK: call spir_func noundef i32 @_Z16__spirv_AtomicOrPU3AS1iiii( +// CHECK: call spir_func noundef i32 @_Z16__spirv_AtomicOrPU3AS3iiii( +// CHECK: call spir_func noundef i32 @_Z16__spirv_AtomicOrPiiii( +// CHECK: call spir_func noundef i32 @_Z16__spirv_AtomicOrPU3AS4iiii( +// CHECK: call spir_func noundef i32 @_Z17__spirv_AtomicXorPU3AS1iiii( +// CHECK: call spir_func noundef i32 @_Z17__spirv_AtomicXorPU3AS3iiii( +// CHECK: call spir_func noundef i32 @_Z17__spirv_AtomicXorPiiii( +// CHECK: call spir_func noundef i32 @_Z17__spirv_AtomicXorPU3AS4iiii( + +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicUMinPU3AS1jiij( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicUMinPU3AS3jiij( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicUMinPjiij( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicUMinPU3AS4jiij( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicUMaxPU3AS1jiij( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicUMaxPU3AS3jiij( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicUMaxPjiij( +// CHECK: call spir_func noundef i32 @_Z18__spirv_AtomicUMaxPU3AS4jiij( + +// CHECK: call spir_func noundef float @_Z21__spirv_AtomicFMaxEXTPU3AS1fiif( +// CHECK: call spir_func noundef float @_Z21__spirv_AtomicFMaxEXTPU3AS3fiif( +// CHECK: call spir_func noundef float @_Z21__spirv_AtomicFMaxEXTPfiif( +// CHECK: call spir_func noundef float @_Z21__spirv_AtomicFMaxEXTPU3AS4fiif( +// CHECK: call spir_func noundef float @_Z21__spirv_AtomicFMinEXTPU3AS1fiif( +// CHECK: call spir_func noundef float @_Z21__spirv_AtomicFMinEXTPU3AS3fiif( +// CHECK: call spir_func noundef float @_Z21__spirv_AtomicFMinEXTPfiif( +// CHECK: call spir_func noundef float @_Z21__spirv_AtomicFMinEXTPU3AS4fiif( +// CHECK: call spir_func noundef float @_Z21__spirv_AtomicFAddEXTPU3AS1fiif( +// CHECK: call spir_func noundef float @_Z21__spirv_AtomicFAddEXTPU3AS3fiif( +// CHECK: call spir_func noundef float @_Z21__spirv_AtomicFAddEXTPfiif( +// CHECK: call spir_func noundef float @_Z21__spirv_AtomicFAddEXTPU3AS4fiif( diff --git a/libclc/generic/lib/atomic/atomic_add.cl b/libclc/generic/lib/atomic/atomic_add.cl index 00e37399d602c..340e4445241f1 100644 --- a/libclc/generic/lib/atomic/atomic_add.cl +++ b/libclc/generic/lib/atomic/atomic_add.cl @@ -1,16 +1,14 @@ #include #include -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_add(volatile AS TYPE *p, TYPE val) { \ - /* TODO: Stop manually mangling this name. Need C++ namespaces to get the \ - * exact mangling. */ \ - return _Z18__spirv_AtomicIAddPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_add(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicIAdd((AS TYPE *)p, Device, SequentiallyConsistent, \ + val); \ } -IMPL(int, i, global, AS1) -IMPL(unsigned int, j, global, AS1) -IMPL(int, i, local, AS3) -IMPL(unsigned int, j, local, AS3) +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) #undef IMPL diff --git a/libclc/generic/lib/atomic/atomic_and.cl b/libclc/generic/lib/atomic/atomic_and.cl index 79369868bb356..5e162e7e66901 100644 --- a/libclc/generic/lib/atomic/atomic_and.cl +++ b/libclc/generic/lib/atomic/atomic_and.cl @@ -1,16 +1,14 @@ #include #include -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_and(volatile AS TYPE *p, TYPE val) { \ - /* TODO: Stop manually mangling this name. Need C++ namespaces to get the \ - * exact mangling. */ \ - return _Z17__spirv_AtomicAndPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_and(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicAnd((AS TYPE *)p, Device, SequentiallyConsistent, \ + val); \ } -IMPL(int, i, global, AS1) -IMPL(unsigned int, j, global, AS1) -IMPL(int, i, local, AS3) -IMPL(unsigned int, j, local, AS3) +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) #undef IMPL diff --git a/libclc/generic/lib/atomic/atomic_cmpxchg.cl b/libclc/generic/lib/atomic/atomic_cmpxchg.cl index 9e0375978bb48..25ceaf6cd8d4f 100644 --- a/libclc/generic/lib/atomic/atomic_cmpxchg.cl +++ b/libclc/generic/lib/atomic/atomic_cmpxchg.cl @@ -1,17 +1,16 @@ #include #include -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_cmpxchg(volatile AS TYPE *p, TYPE cmp, \ - TYPE val) { \ - /* TODO: Stop manually mangling this name. Need C++ namespaces to get the \ - * exact mangling. */ \ - return _Z29__spirv_AtomicCompareExchangePU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_MANGLED##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, SequentiallyConsistent, val, cmp); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_cmpxchg(volatile AS TYPE *p, TYPE cmp, \ + TYPE val) { \ + return __spirv_AtomicCompareExchange((AS TYPE *)p, Device, \ + SequentiallyConsistent, \ + SequentiallyConsistent, val, cmp); \ } -IMPL(int, i, global, AS1) -IMPL(unsigned int, j, global, AS1) -IMPL(int, i, local, AS3) -IMPL(unsigned int, j, local, AS3) +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) #undef IMPL diff --git a/libclc/generic/lib/atomic/atomic_dec.cl b/libclc/generic/lib/atomic/atomic_dec.cl index f1ff777e5dd44..68fc37fad022a 100644 --- a/libclc/generic/lib/atomic/atomic_dec.cl +++ b/libclc/generic/lib/atomic/atomic_dec.cl @@ -1,16 +1,14 @@ #include #include -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_dec(volatile AS TYPE *p) { \ - /* TODO: Stop manually mangling this name. Need C++ namespaces to get the \ - * exact mangling. */ \ - return _Z24__spirv_AtomicIDecrementPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - p, Device, SequentiallyConsistent); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_dec(volatile AS TYPE *p) { \ + return __spirv_AtomicIDecrement((AS TYPE *)p, Device, \ + SequentiallyConsistent); \ } -IMPL(int, i, global, AS1) -IMPL(unsigned int, j, global, AS1) -IMPL(int, i, local, AS3) -IMPL(unsigned int, j, local, AS3) +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) #undef IMPL diff --git a/libclc/generic/lib/atomic/atomic_inc.cl b/libclc/generic/lib/atomic/atomic_inc.cl index d8120426a7575..5c92c4d800cf2 100644 --- a/libclc/generic/lib/atomic/atomic_inc.cl +++ b/libclc/generic/lib/atomic/atomic_inc.cl @@ -1,16 +1,14 @@ #include #include -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_inc(volatile AS TYPE *p) { \ - /* TODO: Stop manually mangling this name. Need C++ namespaces to get the \ - * exact mangling. */ \ - return _Z24__spirv_AtomicIIncrementPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - p, Device, SequentiallyConsistent); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_inc(volatile AS TYPE *p) { \ + return __spirv_AtomicIIncrement((AS TYPE *)p, Device, \ + SequentiallyConsistent); \ } -IMPL(int, i, global, AS1) -IMPL(unsigned int, j, global, AS1) -IMPL(int, i, local, AS3) -IMPL(unsigned int, j, local, AS3) +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) #undef IMPL diff --git a/libclc/generic/lib/atomic/atomic_max.cl b/libclc/generic/lib/atomic/atomic_max.cl index 8ec8acb0d3681..b2ec938c43638 100644 --- a/libclc/generic/lib/atomic/atomic_max.cl +++ b/libclc/generic/lib/atomic/atomic_max.cl @@ -1,16 +1,13 @@ #include #include -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, OP) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_max(volatile AS TYPE *p, TYPE val) { \ - /* TODO: Stop manually mangling this name. Need C++ namespaces to get the \ - * exact mangling. */ \ - return _Z18##OP##PU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS, OP) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_max(volatile AS TYPE *p, TYPE val) { \ + return OP((AS TYPE *)p, Device, SequentiallyConsistent, val); \ } -IMPL(int, i, global, AS1, __spirv_AtomicSMax) -IMPL(unsigned int, j, global, AS1, __spirv_AtomicUMax) -IMPL(int, i, local, AS3, __spirv_AtomicSMax) -IMPL(unsigned int, j, local, AS3, __spirv_AtomicUMax) +IMPL(int, global, __spirv_AtomicSMax) +IMPL(unsigned int, global, __spirv_AtomicUMax) +IMPL(int, local, __spirv_AtomicSMax) +IMPL(unsigned int, local, __spirv_AtomicUMax) #undef IMPL diff --git a/libclc/generic/lib/atomic/atomic_min.cl b/libclc/generic/lib/atomic/atomic_min.cl index af1208035446f..86208e21ab110 100644 --- a/libclc/generic/lib/atomic/atomic_min.cl +++ b/libclc/generic/lib/atomic/atomic_min.cl @@ -1,16 +1,13 @@ #include #include -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, OP) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_min(volatile AS TYPE *p, TYPE val) { \ - /* TODO: Stop manually mangling this name. Need C++ namespaces to get the \ - * exact mangling. */ \ - return _Z18##OP##PU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS, OP) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_min(volatile AS TYPE *p, TYPE val) { \ + return OP((AS TYPE *)p, Device, SequentiallyConsistent, val); \ } -IMPL(int, i, global, AS1, __spirv_AtomicSMin) -IMPL(unsigned int, j, global, AS1, __spirv_AtomicUMin) -IMPL(int, i, local, AS3, __spirv_AtomicSMin) -IMPL(unsigned int, j, local, AS3, __spirv_AtomicUMin) +IMPL(int, global, __spirv_AtomicSMin) +IMPL(unsigned int, global, __spirv_AtomicUMin) +IMPL(int, local, __spirv_AtomicSMin) +IMPL(unsigned int, local, __spirv_AtomicUMin) #undef IMPL diff --git a/libclc/generic/lib/atomic/atomic_or.cl b/libclc/generic/lib/atomic/atomic_or.cl index 2eee23494462b..3454d5ea7c4cd 100644 --- a/libclc/generic/lib/atomic/atomic_or.cl +++ b/libclc/generic/lib/atomic/atomic_or.cl @@ -1,16 +1,14 @@ #include #include -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_or(volatile AS TYPE *p, TYPE val) { \ - /* TODO: Stop manually mangling this name. Need C++ namespaces to get the \ - * exact mangling. */ \ - return _Z16__spirv_AtomicOrPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_or(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicOr((AS TYPE *)p, Device, SequentiallyConsistent, \ + val); \ } -IMPL(int, i, global, AS1) -IMPL(unsigned int, j, global, AS1) -IMPL(int, i, local, AS3) -IMPL(unsigned int, j, local, AS3) +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) #undef IMPL diff --git a/libclc/generic/lib/atomic/atomic_sub.cl b/libclc/generic/lib/atomic/atomic_sub.cl index 04d93010f606a..a2a7d06ebda43 100644 --- a/libclc/generic/lib/atomic/atomic_sub.cl +++ b/libclc/generic/lib/atomic/atomic_sub.cl @@ -1,16 +1,14 @@ #include #include -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_sub(volatile AS TYPE *p, TYPE val) { \ - /* TODO: Stop manually mangling this name. Need C++ namespaces to get the \ - * exact mangling. */ \ - return _Z18__spirv_AtomicISubPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_sub(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicISub((AS TYPE *)p, Device, SequentiallyConsistent, \ + val); \ } -IMPL(int, i, global, AS1) -IMPL(unsigned int, j, global, AS1) -IMPL(int, i, local, AS3) -IMPL(unsigned int, j, local, AS3) +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) #undef IMPL diff --git a/libclc/generic/lib/atomic/atomic_xchg.cl b/libclc/generic/lib/atomic/atomic_xchg.cl index b658c834b9ecf..706e8c76aacff 100644 --- a/libclc/generic/lib/atomic/atomic_xchg.cl +++ b/libclc/generic/lib/atomic/atomic_xchg.cl @@ -2,27 +2,23 @@ #include _CLC_OVERLOAD _CLC_DEF float atomic_xchg(volatile global float *p, float val) { - /* TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. */ - return _Z22__spirv_AtomicExchangePU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf( - p, Device, SequentiallyConsistent, val); + return __spirv_AtomicExchange((global float *)p, Device, + SequentiallyConsistent, val); } _CLC_OVERLOAD _CLC_DEF float atomic_xchg(volatile local float *p, float val) { - /* TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. */ - return _Z22__spirv_AtomicExchangePU3AS3fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf( - p, Device, SequentiallyConsistent, val); + return __spirv_AtomicExchange((local float *)p, Device, + SequentiallyConsistent, val); } -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_xchg(volatile AS TYPE *p, TYPE val) { \ - /* TODO: Stop manually mangling this name. Need C++ namespaces to get the \ - * exact mangling. */ \ - return _Z22__spirv_AtomicExchangePU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_xchg(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicExchange((AS TYPE *)p, Device, \ + SequentiallyConsistent, val); \ } -IMPL(int, i, global, AS1) -IMPL(unsigned int, j, global, AS1) -IMPL(int, i, local, AS3) -IMPL(unsigned int, j, local, AS3) +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) #undef IMPL diff --git a/libclc/generic/lib/atomic/atomic_xor.cl b/libclc/generic/lib/atomic/atomic_xor.cl index 27b28ac82ed24..571a0d0282484 100644 --- a/libclc/generic/lib/atomic/atomic_xor.cl +++ b/libclc/generic/lib/atomic/atomic_xor.cl @@ -1,16 +1,14 @@ #include #include -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atomic_xor(volatile AS TYPE *p, TYPE val) { \ - /* TODO: Stop manually mangling this name. Need C++ namespaces to get the \ - * exact mangling. */ \ - return _Z17__spirv_AtomicXorPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atomic_xor(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicXor((AS TYPE *)p, Device, SequentiallyConsistent, \ + val); \ } -IMPL(int, i, global, AS1) -IMPL(unsigned int, j, global, AS1) -IMPL(int, i, local, AS3) -IMPL(unsigned int, j, local, AS3) +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) #undef IMPL diff --git a/libclc/generic/lib/cl_khr_int64_base_atomics/atom_add.cl b/libclc/generic/lib/cl_khr_int64_base_atomics/atom_add.cl index 78d8d5b8b6f15..c48a11e5aa23c 100644 --- a/libclc/generic/lib/cl_khr_int64_base_atomics/atom_add.cl +++ b/libclc/generic/lib/cl_khr_int64_base_atomics/atom_add.cl @@ -1,20 +1,18 @@ #include #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - #ifdef cl_khr_int64_base_atomics -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atom_add(volatile AS TYPE *p, TYPE val) { \ - return _Z18__spirv_AtomicIAddPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atom_add(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicIAdd((AS TYPE *)p, Device, SequentiallyConsistent, \ + val); \ } -IMPL(long, l, global, AS1) -IMPL(unsigned long, m, global, AS1) -IMPL(long, l, local, AS3) -IMPL(unsigned long, m, local, AS3) +IMPL(long, global) +IMPL(unsigned long, global) +IMPL(long, local) +IMPL(unsigned long, local) #undef IMPL #endif diff --git a/libclc/generic/lib/cl_khr_int64_base_atomics/atom_cmpxchg.cl b/libclc/generic/lib/cl_khr_int64_base_atomics/atom_cmpxchg.cl index e6225d6acfd78..377f64d06b582 100644 --- a/libclc/generic/lib/cl_khr_int64_base_atomics/atom_cmpxchg.cl +++ b/libclc/generic/lib/cl_khr_int64_base_atomics/atom_cmpxchg.cl @@ -3,17 +3,18 @@ #ifdef cl_khr_int64_base_atomics -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(volatile AS TYPE *p, TYPE cmp, \ - TYPE val) { \ - return _Z29__spirv_AtomicCompareExchangePU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_##TYPE_MANGLED##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, SequentiallyConsistent, cmp, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(volatile AS TYPE *p, TYPE cmp, \ + TYPE val) { \ + return __spirv_AtomicCompareExchange((AS TYPE *)p, Device, \ + SequentiallyConsistent, \ + SequentiallyConsistent, cmp, val); \ } -IMPL(long, l, global, AS1) -IMPL(unsigned long, m, global, AS1) -IMPL(long, l, local, AS3) -IMPL(unsigned long, m, local, AS3) +IMPL(long, global) +IMPL(unsigned long, global) +IMPL(long, local) +IMPL(unsigned long, local) #undef IMPL #endif diff --git a/libclc/generic/lib/cl_khr_int64_base_atomics/atom_sub.cl b/libclc/generic/lib/cl_khr_int64_base_atomics/atom_sub.cl index c3a54b161118f..3ccd958cc2de9 100644 --- a/libclc/generic/lib/cl_khr_int64_base_atomics/atom_sub.cl +++ b/libclc/generic/lib/cl_khr_int64_base_atomics/atom_sub.cl @@ -1,20 +1,18 @@ #include #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - #ifdef cl_khr_int64_base_atomics -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atom_sub(volatile AS TYPE *p, TYPE val) { \ - return _Z18__spirv_AtomicISubPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atom_sub(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicISub((AS TYPE *)p, Device, SequentiallyConsistent, \ + val); \ } -IMPL(long, l, global, AS1) -IMPL(unsigned long, m, global, AS1) -IMPL(long, l, local, AS3) -IMPL(unsigned long, m, local, AS3) +IMPL(long, global) +IMPL(unsigned long, global) +IMPL(long, local) +IMPL(unsigned long, local) #undef IMPL #endif diff --git a/libclc/generic/lib/cl_khr_int64_base_atomics/atom_xchg.cl b/libclc/generic/lib/cl_khr_int64_base_atomics/atom_xchg.cl index 75ac67c44522a..00fd573376965 100644 --- a/libclc/generic/lib/cl_khr_int64_base_atomics/atom_xchg.cl +++ b/libclc/generic/lib/cl_khr_int64_base_atomics/atom_xchg.cl @@ -3,16 +3,16 @@ #ifdef cl_khr_int64_base_atomics -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atom_xchg(volatile AS TYPE *p, TYPE val) { \ - return _Z22__spirv_AtomicExchangePU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atom_xchg(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicExchange((AS TYPE *)p, Device, \ + SequentiallyConsistent, val); \ } -IMPL(long, l, global, AS1) -IMPL(unsigned long, m, global, AS1) -IMPL(long, l, local, AS3) -IMPL(unsigned long, m, local, AS3) +IMPL(long, global) +IMPL(unsigned long, global) +IMPL(long, local) +IMPL(unsigned long, local) #undef IMPL #endif diff --git a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_and.cl b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_and.cl index cce06ee739b6e..0f5c3057bcb19 100644 --- a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_and.cl +++ b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_and.cl @@ -3,16 +3,16 @@ #ifdef cl_khr_int64_extended_atomics -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atom_and(volatile AS TYPE *p, TYPE val) { \ - return _Z17__spirv_AtomicAndPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atom_and(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicAnd((AS TYPE *)p, Device, SequentiallyConsistent, \ + val); \ } -IMPL(long, l, global, AS1) -IMPL(unsigned long, m, global, AS1) -IMPL(long, l, local, AS3) -IMPL(unsigned long, m, local, AS3) +IMPL(long, global) +IMPL(unsigned long, global) +IMPL(long, local) +IMPL(unsigned long, local) #undef IMPL #endif diff --git a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_max.cl b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_max.cl index e183488fb3a9d..0462bed336dcf 100644 --- a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_max.cl +++ b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_max.cl @@ -3,16 +3,15 @@ #ifdef cl_khr_int64_extended_atomics -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, NAME) \ - _CLC_OVERLOAD _CLC_DEF TYPE atom_max(volatile AS TYPE *p, TYPE val) { \ - return _Z18##NAME##PU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS, NAME) \ + _CLC_OVERLOAD _CLC_DEF TYPE atom_max(volatile AS TYPE *p, TYPE val) { \ + return NAME((AS TYPE *)p, Device, SequentiallyConsistent, val); \ } -IMPL(long, l, global, AS1, __spirv_AtomicSMax) -IMPL(unsigned long, m, global, AS1, __spirv_AtomicUMax) -IMPL(long, l, local, AS3, __spirv_AtomicSMax) -IMPL(unsigned long, m, local, AS3, __spirv_AtomicUMax) +IMPL(long, global, __spirv_AtomicSMax) +IMPL(unsigned long, global, __spirv_AtomicUMax) +IMPL(long, local, __spirv_AtomicSMax) +IMPL(unsigned long, local, __spirv_AtomicUMax) #undef IMPL #endif diff --git a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_min.cl b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_min.cl index 061a94de710de..f9ea77fad0b77 100644 --- a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_min.cl +++ b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_min.cl @@ -3,16 +3,15 @@ #ifdef cl_khr_int64_extended_atomics -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, NAME) \ - _CLC_OVERLOAD _CLC_DEF TYPE atom_min(volatile AS TYPE *p, TYPE val) { \ - return _Z18##NAME##PU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS, NAME) \ + _CLC_OVERLOAD _CLC_DEF TYPE atom_min(volatile AS TYPE *p, TYPE val) { \ + return NAME((AS TYPE *)p, Device, SequentiallyConsistent, val); \ } -IMPL(long, l, global, AS1, __spirv_AtomicSMin) -IMPL(unsigned long, m, global, AS1, __spirv_AtomicUMin) -IMPL(long, l, local, AS3, __spirv_AtomicSMin) -IMPL(unsigned long, m, local, AS3, __spirv_AtomicUMin) +IMPL(long, global, __spirv_AtomicSMin) +IMPL(unsigned long, global, __spirv_AtomicUMin) +IMPL(long, local, __spirv_AtomicSMin) +IMPL(unsigned long, local, __spirv_AtomicUMin) #undef IMPL #endif diff --git a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_or.cl b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_or.cl index c4ada923fb6f1..c130a9468635a 100644 --- a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_or.cl +++ b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_or.cl @@ -3,16 +3,16 @@ #ifdef cl_khr_int64_extended_atomics -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atom_or(volatile AS TYPE *p, TYPE val) { \ - return _Z16__spirv_AtomicOrPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atom_or(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicOr((AS TYPE *)p, Device, SequentiallyConsistent, \ + val); \ } -IMPL(long, l, global, AS1) -IMPL(unsigned long, m, global, AS1) -IMPL(long, l, local, AS3) -IMPL(unsigned long, m, local, AS3) +IMPL(long, global) +IMPL(unsigned long, global) +IMPL(long, local) +IMPL(unsigned long, local) #undef IMPL #endif diff --git a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_xor.cl b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_xor.cl index bae73ea266820..af3d6452470ca 100644 --- a/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_xor.cl +++ b/libclc/generic/lib/cl_khr_int64_extended_atomics/atom_xor.cl @@ -3,16 +3,16 @@ #ifdef cl_khr_int64_extended_atomics -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_OVERLOAD _CLC_DEF TYPE atom_xor(volatile AS TYPE *p, TYPE val) { \ - return _Z17__spirv_AtomicXorPU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - p, Device, SequentiallyConsistent, val); \ +#define IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE atom_xor(volatile AS TYPE *p, TYPE val) { \ + return __spirv_AtomicXor((AS TYPE *)p, Device, SequentiallyConsistent, \ + val); \ } -IMPL(long, l, global, AS1) -IMPL(unsigned long, m, global, AS1) -IMPL(long, l, local, AS3) -IMPL(unsigned long, m, local, AS3) +IMPL(long, global) +IMPL(unsigned long, global) +IMPL(long, local) +IMPL(unsigned long, local) #undef IMPL #endif diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_add.h b/libclc/libspirv/include/libspirv/atomic/atomic_add.h index ff05f7176be3e..af05f0fa80007 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_add.h +++ b/libclc/libspirv/include/libspirv/atomic/atomic_add.h @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// #define __SPIRV_FUNCTION_S __spirv_AtomicIAdd -#define __SPIRV_FUNCTION_S_LEN 18 #define __SPIRV_FUNCTION_U __spirv_AtomicIAdd -#define __SPIRV_FUNCTION_U_LEN 18 #define __SPIRV_INT64_BASE #include diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_and.h b/libclc/libspirv/include/libspirv/atomic/atomic_and.h index 9df1e5050a9e5..948433bc44f34 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_and.h +++ b/libclc/libspirv/include/libspirv/atomic/atomic_and.h @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// #define __SPIRV_FUNCTION_S __spirv_AtomicAnd -#define __SPIRV_FUNCTION_S_LEN 17 #define __SPIRV_FUNCTION_U __spirv_AtomicAnd -#define __SPIRV_FUNCTION_U_LEN 17 #define __SPIRV_INT64_EXTENDED #include diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_cmpxchg.h b/libclc/libspirv/include/libspirv/atomic/atomic_cmpxchg.h index 9a643d4295857..9c4dac2864a8e 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_cmpxchg.h +++ b/libclc/libspirv/include/libspirv/atomic/atomic_cmpxchg.h @@ -6,40 +6,22 @@ // //===----------------------------------------------------------------------===// -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact -// mangling. -_CLC_DECL int -_Z29__spirv_AtomicCompareExchangePU3AS3iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_ii( - volatile local int *, enum Scope, enum MemorySemanticsMask, - enum MemorySemanticsMask, int, int); -_CLC_DECL int -_Z29__spirv_AtomicCompareExchangePU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_ii( - volatile global int *, enum Scope, enum MemorySemanticsMask, - enum MemorySemanticsMask, int, int); -_CLC_DECL uint -_Z29__spirv_AtomicCompareExchangePU3AS3jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_jj( - volatile local uint *, enum Scope, enum MemorySemanticsMask, - enum MemorySemanticsMask, uint, uint); -_CLC_DECL uint -_Z29__spirv_AtomicCompareExchangePU3AS1jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_jj( - volatile global uint *, enum Scope, enum MemorySemanticsMask, - enum MemorySemanticsMask, uint, uint); +#define DECL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DECL TYPE __spirv_AtomicCompareExchange( \ + AS TYPE *, int, int, int, TYPE, TYPE); + +#define DECL_AS(TYPE) \ + DECL(TYPE, global) \ + DECL(TYPE, local) \ + DECL(TYPE, ) + +DECL_AS(int) +DECL_AS(unsigned int) #ifdef cl_khr_int64_base_atomics -_CLC_DECL long -_Z29__spirv_AtomicCompareExchangePU3AS3lN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_ll( - volatile local long *, enum Scope, enum MemorySemanticsMask, - enum MemorySemanticsMask, long, long); -_CLC_DECL long -_Z29__spirv_AtomicCompareExchangePU3AS1lN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_ll( - volatile global long *, enum Scope, enum MemorySemanticsMask, - enum MemorySemanticsMask, long, long); -_CLC_DECL unsigned long -_Z29__spirv_AtomicCompareExchangePU3AS3mN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_mm( - volatile local unsigned long *, enum Scope, enum MemorySemanticsMask, - enum MemorySemanticsMask, unsigned long, unsigned long); -_CLC_DECL unsigned long -_Z29__spirv_AtomicCompareExchangePU3AS1mN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_mm( - volatile global unsigned long *, enum Scope, enum MemorySemanticsMask, - enum MemorySemanticsMask, unsigned long, unsigned long); +DECL_AS(long) +DECL_AS(unsigned long) #endif + +#undef DECL_AS +#undef DECL diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_dec.h b/libclc/libspirv/include/libspirv/atomic/atomic_dec.h index d1908c1958b12..2335c3d6dfa24 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_dec.h +++ b/libclc/libspirv/include/libspirv/atomic/atomic_dec.h @@ -6,30 +6,24 @@ // //===----------------------------------------------------------------------===// -_CLC_DECL int -_Z24__spirv_AtomicIDecrementPU3AS3iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local int *, enum Scope, enum MemorySemanticsMask); -_CLC_DECL int -_Z24__spirv_AtomicIDecrementPU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global int *, enum Scope, enum MemorySemanticsMask); -_CLC_DECL uint -_Z24__spirv_AtomicIDecrementPU3AS3jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local uint *, enum Scope, enum MemorySemanticsMask); -_CLC_DECL uint -_Z24__spirv_AtomicIDecrementPU3AS1jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global uint *, enum Scope, enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL int __spirv_AtomicIDecrement(local int *, int Scope, + enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL int __spirv_AtomicIDecrement(global int *, int Scope, + enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL uint __spirv_AtomicIDecrement(local uint *, int Scope, + enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL uint __spirv_AtomicIDecrement(global uint *, int Scope, + enum MemorySemanticsMask); #ifdef cl_khr_int64_base_atomics -_CLC_DECL long -_Z24__spirv_AtomicIDecrementPU3AS3lN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local long *, enum Scope, enum MemorySemanticsMask); -_CLC_DECL long -_Z24__spirv_AtomicIDecrementPU3AS1lN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global long *, enum Scope, enum MemorySemanticsMask); -_CLC_DECL unsigned long -_Z24__spirv_AtomicIDecrementPU3AS3mN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local unsigned long *, enum Scope, enum MemorySemanticsMask); -_CLC_DECL unsigned long -_Z24__spirv_AtomicIDecrementPU3AS1mN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global unsigned long *, enum Scope, enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL long __spirv_AtomicIDecrement(local long *, int Scope, + enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL long __spirv_AtomicIDecrement(global long *, int Scope, + enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL unsigned long +__spirv_AtomicIDecrement(local unsigned long *, int Scope, + enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL unsigned long +__spirv_AtomicIDecrement(global unsigned long *, int Scope, + enum MemorySemanticsMask); #endif diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_decl.inc b/libclc/libspirv/include/libspirv/atomic/atomic_decl.inc index 61f386af817c2..21122f9dd5585 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_decl.inc +++ b/libclc/libspirv/include/libspirv/atomic/atomic_decl.inc @@ -6,39 +6,29 @@ // //===----------------------------------------------------------------------===// -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact -// mangling. -#define __CLC_DECLARE_ATOMIC(ADDRSPACE, ADDRSPACE_MANGLED, TYPE, TYPE_MANGLED, \ - NAME, NAME_LEN) \ - _CLC_DECL TYPE \ - _Z##NAME_LEN##NAME##PU3##ADDRSPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDRSPACE TYPE *, enum Scope, enum MemorySemanticsMask, \ - TYPE); +#define __CLC_DECLARE_ATOMIC(ADDRSPACE, TYPE, NAME) \ + _CLC_OVERLOAD _CLC_DECL TYPE NAME(ADDRSPACE TYPE *, int Scope, \ + int MemorySemanticsMask, TYPE); -#define __CLC_DECLARE_ATOMIC_ADDRSPACE(TYPE, TYPE_MANGLED, NAME, NAME_LEN) \ - __CLC_DECLARE_ATOMIC(global, AS1, TYPE, TYPE_MANGLED, NAME, NAME_LEN) \ - __CLC_DECLARE_ATOMIC(local, AS3, TYPE, TYPE_MANGLED, NAME, NAME_LEN) +#define __CLC_DECLARE_ATOMIC_ADDRSPACE(TYPE, NAME) \ + __CLC_DECLARE_ATOMIC(global, TYPE, NAME) \ + __CLC_DECLARE_ATOMIC(local, TYPE, NAME) \ + __CLC_DECLARE_ATOMIC(, TYPE, NAME) -__CLC_DECLARE_ATOMIC_ADDRSPACE(int, i, __SPIRV_FUNCTION_S, - __SPIRV_FUNCTION_S_LEN) -__CLC_DECLARE_ATOMIC_ADDRSPACE(uint, j, __SPIRV_FUNCTION_U, - __SPIRV_FUNCTION_U_LEN) +__CLC_DECLARE_ATOMIC_ADDRSPACE(int, __SPIRV_FUNCTION_S) +__CLC_DECLARE_ATOMIC_ADDRSPACE(uint, __SPIRV_FUNCTION_U) #ifdef __SPIRV_INT64_EXTENDED #ifdef cl_khr_int64_extended_atomics -__CLC_DECLARE_ATOMIC_ADDRSPACE(long, l, __SPIRV_FUNCTION_S, - __SPIRV_FUNCTION_S_LEN) -__CLC_DECLARE_ATOMIC_ADDRSPACE(ulong, m, __SPIRV_FUNCTION_U, - __SPIRV_FUNCTION_U_LEN) +__CLC_DECLARE_ATOMIC_ADDRSPACE(long, __SPIRV_FUNCTION_S) +__CLC_DECLARE_ATOMIC_ADDRSPACE(ulong, __SPIRV_FUNCTION_U) #endif #endif #ifdef __SPIRV_INT64_BASE #ifdef cl_khr_int64_base_atomics -__CLC_DECLARE_ATOMIC_ADDRSPACE(long, l, __SPIRV_FUNCTION_S, - __SPIRV_FUNCTION_S_LEN) -__CLC_DECLARE_ATOMIC_ADDRSPACE(ulong, m, __SPIRV_FUNCTION_U, - __SPIRV_FUNCTION_U_LEN) +__CLC_DECLARE_ATOMIC_ADDRSPACE(long, __SPIRV_FUNCTION_S) +__CLC_DECLARE_ATOMIC_ADDRSPACE(ulong, __SPIRV_FUNCTION_U) #endif #endif diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_inc.h b/libclc/libspirv/include/libspirv/atomic/atomic_inc.h index 12bf4e2123dc3..d7c6e42bbed66 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_inc.h +++ b/libclc/libspirv/include/libspirv/atomic/atomic_inc.h @@ -6,30 +6,24 @@ // //===----------------------------------------------------------------------===// -_CLC_DECL int -_Z24__spirv_AtomicIIncrementPU3AS3iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local int *, enum Scope, enum MemorySemanticsMask); -_CLC_DECL int -_Z24__spirv_AtomicIIncrementPU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global int *, enum Scope, enum MemorySemanticsMask); -_CLC_DECL uint -_Z24__spirv_AtomicIIncrementPU3AS3jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local uint *, enum Scope, enum MemorySemanticsMask); -_CLC_DECL uint -_Z24__spirv_AtomicIIncrementPU3AS1jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global uint *, enum Scope, enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL int __spirv_AtomicIIncrement(local int *, int Scope, + enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL int __spirv_AtomicIIncrement(global int *, int Scope, + enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL uint __spirv_AtomicIIncrement(local uint *, int Scope, + enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL uint __spirv_AtomicIIncrement(global uint *, int Scope, + enum MemorySemanticsMask); #ifdef cl_khr_int64_base_atomics -_CLC_DECL long -_Z24__spirv_AtomicIIncrementPU3AS3lN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local long *, enum Scope, enum MemorySemanticsMask); -_CLC_DECL long -_Z24__spirv_AtomicIIncrementPU3AS1lN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global long *, enum Scope, enum MemorySemanticsMask); -_CLC_DECL unsigned long -_Z24__spirv_AtomicIIncrementPU3AS3mN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local unsigned long *, enum Scope, enum MemorySemanticsMask); -_CLC_DECL unsigned long -_Z24__spirv_AtomicIIncrementPU3AS1mN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global unsigned long *, enum Scope, enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL long __spirv_AtomicIIncrement(local long *, int Scope, + enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL long __spirv_AtomicIIncrement(global long *, int Scope, + enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL unsigned long +__spirv_AtomicIIncrement(local unsigned long *, int Scope, + enum MemorySemanticsMask); +_CLC_OVERLOAD _CLC_DECL unsigned long +__spirv_AtomicIIncrement(global unsigned long *, int Scope, + enum MemorySemanticsMask); #endif diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_load.h b/libclc/libspirv/include/libspirv/atomic/atomic_load.h index 75a8ab305e2c6..bd51ca74aaf23 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_load.h +++ b/libclc/libspirv/include/libspirv/atomic/atomic_load.h @@ -6,23 +6,20 @@ // //===----------------------------------------------------------------------===// -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact -// mangling. -#define DECL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_DECL TYPE \ - _Z18__spirv_AtomicLoadPU3##AS_MANGLED##K##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - volatile AS const TYPE *, enum Scope, enum MemorySemanticsMask); +#define DECL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DECL TYPE __spirv_AtomicLoad(AS TYPE *, int, int); -#define DECL_AS(TYPE, TYPE_MANGLED) \ - DECL(TYPE, TYPE_MANGLED, global, AS1) \ - DECL(TYPE, TYPE_MANGLED, local, AS3) +#define DECL_AS(TYPE) \ + DECL(TYPE, global) \ + DECL(TYPE, local) \ + DECL(TYPE, ) -DECL_AS(int, i) -DECL_AS(unsigned int, j) +DECL_AS(int) +DECL_AS(unsigned int) #ifdef cl_khr_int64_base_atomics -DECL_AS(long, l) -DECL_AS(unsigned long, m) +DECL_AS(long) +DECL_AS(unsigned long) #endif #undef DECL_AS diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_max.h b/libclc/libspirv/include/libspirv/atomic/atomic_max.h index 8f14d230e42a9..95ebeac7ec19e 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_max.h +++ b/libclc/libspirv/include/libspirv/atomic/atomic_max.h @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// #define __SPIRV_FUNCTION_S __spirv_AtomicSMax -#define __SPIRV_FUNCTION_S_LEN 18 #define __SPIRV_FUNCTION_U __spirv_AtomicUMax -#define __SPIRV_FUNCTION_U_LEN 18 #define __SPIRV_INT64_EXTENDED #include diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_min.h b/libclc/libspirv/include/libspirv/atomic/atomic_min.h index c7b5e73f33bf6..6228d01176e96 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_min.h +++ b/libclc/libspirv/include/libspirv/atomic/atomic_min.h @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// #define __SPIRV_FUNCTION_S __spirv_AtomicSMin -#define __SPIRV_FUNCTION_S_LEN 18 #define __SPIRV_FUNCTION_U __spirv_AtomicUMin -#define __SPIRV_FUNCTION_U_LEN 18 #define __SPIRV_INT64_EXTENDED #include diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_or.h b/libclc/libspirv/include/libspirv/atomic/atomic_or.h index 73cda335dd24b..c69ab3df64a61 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_or.h +++ b/libclc/libspirv/include/libspirv/atomic/atomic_or.h @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// #define __SPIRV_FUNCTION_S __spirv_AtomicOr -#define __SPIRV_FUNCTION_S_LEN 16 #define __SPIRV_FUNCTION_U __spirv_AtomicOr -#define __SPIRV_FUNCTION_U_LEN 16 #define __SPIRV_INT64_EXTENDED #include diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_store.h b/libclc/libspirv/include/libspirv/atomic/atomic_store.h index c56cee7283b85..b513e758e4755 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_store.h +++ b/libclc/libspirv/include/libspirv/atomic/atomic_store.h @@ -6,23 +6,20 @@ // //===----------------------------------------------------------------------===// -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact -// mangling. -#define DECL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_DECL void \ - _Z19__spirv_AtomicStorePU3##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *, enum Scope, enum MemorySemanticsMask, TYPE); +#define DECL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DECL void __spirv_AtomicStore( \ + AS TYPE *, int Scope, int MemorySemanticsMask, TYPE); -#define DECL_AS(TYPE, TYPE_MANGLED) \ - DECL(TYPE, TYPE_MANGLED, global, AS1) \ - DECL(TYPE, TYPE_MANGLED, local, AS3) +#define DECL_AS(TYPE) \ + DECL(TYPE, global) \ + DECL(TYPE, local) -DECL_AS(int, i) -DECL_AS(unsigned int, j) +DECL_AS(int) +DECL_AS(unsigned int) #ifdef cl_khr_int64_base_atomics -DECL_AS(long, l) -DECL_AS(unsigned long, m) +DECL_AS(long) +DECL_AS(unsigned long) #endif #undef DECL_AS diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_sub.h b/libclc/libspirv/include/libspirv/atomic/atomic_sub.h index 62bec07cb8fbe..7ecd3ccd2b806 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_sub.h +++ b/libclc/libspirv/include/libspirv/atomic/atomic_sub.h @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// #define __SPIRV_FUNCTION_S __spirv_AtomicISub -#define __SPIRV_FUNCTION_S_LEN 18 #define __SPIRV_FUNCTION_U __spirv_AtomicISub -#define __SPIRV_FUNCTION_U_LEN 18 #define __SPIRV_INT64_BASE #include diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_xchg.h b/libclc/libspirv/include/libspirv/atomic/atomic_xchg.h index a274589a40f3d..31dcf8948f3da 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_xchg.h +++ b/libclc/libspirv/include/libspirv/atomic/atomic_xchg.h @@ -7,17 +7,13 @@ //===----------------------------------------------------------------------===// #define __SPIRV_FUNCTION_S __spirv_AtomicExchange -#define __SPIRV_FUNCTION_S_LEN 22 #define __SPIRV_FUNCTION_U __spirv_AtomicExchange -#define __SPIRV_FUNCTION_U_LEN 22 #define __SPIRV_INT64_BASE -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact -// mangling. -_CLC_DECL float -_Z22__spirv_AtomicExchangePU3AS3fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf( - volatile local float *, enum Scope, enum MemorySemanticsMask, float); -_CLC_DECL float -_Z22__spirv_AtomicExchangePU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf( - volatile global float *, enum Scope, enum MemorySemanticsMask, float); +_CLC_OVERLOAD _CLC_DECL float __spirv_AtomicExchange(local float *, int Scope, + int MemorySemanticsMask, + float); +_CLC_OVERLOAD _CLC_DECL float __spirv_AtomicExchange(global float *, int Scope, + int MemorySemanticsMask, + float); #include diff --git a/libclc/libspirv/include/libspirv/atomic/atomic_xor.h b/libclc/libspirv/include/libspirv/atomic/atomic_xor.h index 0e2007bbdb29c..0ab8d1faec680 100644 --- a/libclc/libspirv/include/libspirv/atomic/atomic_xor.h +++ b/libclc/libspirv/include/libspirv/atomic/atomic_xor.h @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// #define __SPIRV_FUNCTION_S __spirv_AtomicXor -#define __SPIRV_FUNCTION_S_LEN 17 #define __SPIRV_FUNCTION_U __spirv_AtomicXor -#define __SPIRV_FUNCTION_U_LEN 17 #define __SPIRV_INT64_EXTENDED #include diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_add.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_add.cl index 77c9396210798..be59bec58fa77 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_add.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_add.cl @@ -12,79 +12,63 @@ extern constant int __oclc_ISA_version; -AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, int, i, __hip_atomic_fetch_add) -AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned int, j, __hip_atomic_fetch_add) -AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, long, l, __hip_atomic_fetch_add) -AMDGPU_ATOMIC(_Z18__spirv_AtomicIAdd, unsigned long, m, __hip_atomic_fetch_add) +AMDGPU_ATOMIC(__spirv_AtomicIAdd, int, __hip_atomic_fetch_add) +AMDGPU_ATOMIC(__spirv_AtomicIAdd, unsigned int, __hip_atomic_fetch_add) +AMDGPU_ATOMIC(__spirv_AtomicIAdd, long, __hip_atomic_fetch_add) +AMDGPU_ATOMIC(__spirv_AtomicIAdd, unsigned long, __hip_atomic_fetch_add) -#define AMDGPU_ATOMIC_FP32_ADD_IMPL(AS, AS_MANGLED, SUB1, CHECK, NEW_BUILTIN) \ - _CLC_DEF float \ - _Z21__spirv_AtomicFAddEXTP##AS_MANGLED##fN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagEf( \ - volatile AS float *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, float val) { \ - if (CHECK) \ - return NEW_BUILTIN(p, val); \ - int atomic_scope = 0, memory_order = 0; \ - GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ - return __hip_atomic_fetch_add(p, val, memory_order, atomic_scope); \ +#define AMDGPU_ATOMIC_FP32_ADD_IMPL(AS, CHECK, NEW_BUILTIN) \ + _CLC_OVERLOAD _CLC_DECL float __spirv_AtomicFAddEXT( \ + AS float *p, int scope, int semantics, float val) { \ + if (CHECK) \ + return NEW_BUILTIN(p, val); \ + int atomic_scope = 0, memory_order = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ + return __hip_atomic_fetch_add(p, val, memory_order, atomic_scope); \ } // Global AS atomics can be unsafe for malloc shared atomics, so should be opt // in AMDGPU_ATOMIC_FP32_ADD_IMPL( - global, U3AS1, 1, + global, AMDGPU_ARCH_BETWEEN(9010, 10000) && __oclc_amdgpu_reflect("AMDGPU_OCLC_UNSAFE_FP_ATOMICS"), __builtin_amdgcn_global_atomic_fadd_f32) -AMDGPU_ATOMIC_FP32_ADD_IMPL(local, U3AS3, 1, AMDGPU_ARCH_GEQ(8000), +AMDGPU_ATOMIC_FP32_ADD_IMPL(local, AMDGPU_ARCH_GEQ(8000), __builtin_amdgcn_ds_atomic_fadd_f32) -AMDGPU_ATOMIC_FP32_ADD_IMPL(, , 0, AMDGPU_ARCH_BETWEEN(9400, 10000), +AMDGPU_ATOMIC_FP32_ADD_IMPL(, AMDGPU_ARCH_BETWEEN(9400, 10000), __builtin_amdgcn_flat_atomic_fadd_f32) -#define AMDGPU_ATOMIC_FP64_ADD_IMPL(AS, AS_MANGLED, SUB1, SUB2, CHECK, \ - NEW_BUILTIN) \ - _CLC_DEF long \ - _Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##lN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2##_ll( \ - volatile AS long *, enum Scope, enum MemorySemanticsMask, \ - enum MemorySemanticsMask, long desired, long expected); \ - _CLC_DEF long \ - _Z18__spirv_AtomicLoadP##AS_MANGLED##KlN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - const volatile AS long *, enum Scope, enum MemorySemanticsMask); \ - _CLC_DEF double \ - _Z21__spirv_AtomicFAddEXTP##AS_MANGLED##dN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagEd( \ - volatile AS double *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, double val) { \ - if (CHECK) \ - return NEW_BUILTIN(p, val); \ - int atomic_scope = 0, memory_order = 0; \ - volatile AS long *int_pointer = (volatile AS long *)p; \ - long old_int_val = 0, new_int_val = 0; \ - do { \ - old_int_val = \ - _Z18__spirv_AtomicLoadP##AS_MANGLED##KlN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - int_pointer, scope, semantics); \ - double new_double_val = *(double *)&old_int_val + val; \ - new_int_val = *(long *)&new_double_val; \ - } while ( \ - _Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##lN5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2##_ll( \ - int_pointer, scope, semantics, semantics, new_int_val, \ - old_int_val) != old_int_val); \ - \ - return *(double *)&old_int_val; \ +#define AMDGPU_ATOMIC_FP64_ADD_IMPL(AS, CHECK, NEW_BUILTIN) \ + _CLC_OVERLOAD _CLC_DECL double __spirv_AtomicFAddEXT( \ + AS double *p, int scope, int semantics, double val) { \ + if (CHECK) \ + return NEW_BUILTIN(p, val); \ + int atomic_scope = 0, memory_order = 0; \ + AS long *int_pointer = (AS long *)p; \ + long old_int_val = 0, new_int_val = 0; \ + do { \ + old_int_val = __spirv_AtomicLoad(int_pointer, scope, semantics); \ + double new_double_val = *(double *)&old_int_val + val; \ + new_int_val = *(long *)&new_double_val; \ + } while (__spirv_AtomicCompareExchange(int_pointer, scope, semantics, \ + semantics, new_int_val, \ + old_int_val) != old_int_val); \ + \ + return *(double *)&old_int_val; \ } #ifdef cl_khr_int64_base_atomics // Global AS atomics can be unsafe for malloc shared atomics, so should be opt // in AMDGPU_ATOMIC_FP64_ADD_IMPL( - global, U3AS1, 1, 5, + global, AMDGPU_ARCH_BETWEEN(9010, 10000) && __oclc_amdgpu_reflect("AMDGPU_OCLC_UNSAFE_FP_ATOMICS"), __builtin_amdgcn_global_atomic_fadd_f64) -AMDGPU_ATOMIC_FP64_ADD_IMPL(local, U3AS3, 1, 5, - AMDGPU_ARCH_BETWEEN(9010, 10000), +AMDGPU_ATOMIC_FP64_ADD_IMPL(local, AMDGPU_ARCH_BETWEEN(9010, 10000), __builtin_amdgcn_ds_atomic_fadd_f64) -AMDGPU_ATOMIC_FP64_ADD_IMPL(, , 0, 4, AMDGPU_ARCH_BETWEEN(9400, 10000), +AMDGPU_ATOMIC_FP64_ADD_IMPL(, AMDGPU_ARCH_BETWEEN(9400, 10000), __builtin_amdgcn_flat_atomic_fadd_f64) #endif diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_and.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_and.cl index 5f8c587de5046..f5e9991221f93 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_and.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_and.cl @@ -11,7 +11,7 @@ #include #define __CLC_OP & -#define __SPIRV_BUILTIN _Z17__spirv_AtomicAnd +#define __SPIRV_BUILTIN __spirv_AtomicAnd #define __HIP_BUILTIN __hip_atomic_fetch_and #include "atomic_safe.def" diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_cmpxchg.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_cmpxchg.cl index 480a13e405511..6a6b39f9c70ca 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_cmpxchg.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_cmpxchg.cl @@ -10,42 +10,38 @@ #include #include -#define AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB1, \ - SUB2) \ - _CLC_DEF TYPE \ - _Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2##_##TYPE_MANGLED##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask success_semantics, \ - enum MemorySemanticsMask failure_semantics, TYPE desired, \ - TYPE expected) { \ - int atomic_scope = 0, memory_order_success = 0, memory_order_failure = 0; \ - GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, success_semantics, \ - memory_order_success) \ - GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, failure_semantics, \ - memory_order_failure) \ - __hip_atomic_compare_exchange_strong(p, &expected, desired, \ - memory_order_success, \ - memory_order_failure, atomic_scope); \ - /* If cmpxchg \ - * succeeds: \ - - `expected` is unchanged, holding the old val that was at `p` \ - - `p` is changed to hold `desired` \ - * fails: \ - - `expected` is changed to hold the current val at `p` \ - - `p` is unchanged*/ \ - return expected; \ +#define AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_AtomicCompareExchange( \ + AS TYPE *p, int scope, int success_semantics, int failure_semantics, \ + TYPE desired, TYPE expected) { \ + int atomic_scope = 0, memory_order_success = 0, memory_order_failure = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, success_semantics, \ + memory_order_success) \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, failure_semantics, \ + memory_order_failure) \ + __hip_atomic_compare_exchange_strong(p, &expected, desired, \ + memory_order_success, \ + memory_order_failure, atomic_scope); \ + /* If cmpxchg \ + * succeeds: \ + - `expected` is unchanged, holding the old val that was at `p` \ + - `p` is changed to hold `desired` \ + * fails: \ + - `expected` is changed to hold the current val at `p` \ + - `p` is unchanged*/ \ + return expected; \ } -#define AMDGPU_ATOMIC_CMPXCHG(TYPE, TYPE_MANGLED) \ - AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, TYPE_MANGLED, global, U3AS1, 1, 5) \ - AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, TYPE_MANGLED, local, U3AS3, 1, 5) \ - AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, TYPE_MANGLED, , , 0, 4) +#define AMDGPU_ATOMIC_CMPXCHG(TYPE) \ + AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, global) \ + AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, local) \ + AMDGPU_ATOMIC_CMPXCHG_IMPL(TYPE, ) -AMDGPU_ATOMIC_CMPXCHG(int, i) -AMDGPU_ATOMIC_CMPXCHG(unsigned, j) -AMDGPU_ATOMIC_CMPXCHG(long, l) -AMDGPU_ATOMIC_CMPXCHG(unsigned long, m) -AMDGPU_ATOMIC_CMPXCHG(float, f) +AMDGPU_ATOMIC_CMPXCHG(int) +AMDGPU_ATOMIC_CMPXCHG(unsigned) +AMDGPU_ATOMIC_CMPXCHG(long) +AMDGPU_ATOMIC_CMPXCHG(unsigned long) +AMDGPU_ATOMIC_CMPXCHG(float) // TODO implement for fp64 diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_helpers.h b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_helpers.h index bc40d1def1e76..5ca852ce5523c 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_helpers.h +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_helpers.h @@ -59,54 +59,49 @@ extern int __oclc_amdgpu_reflect(__constant char *); } \ } -#define AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, AS, AS_MANGLED, \ - SUB1, BUILTIN) \ - _CLC_DEF TYPE \ - FUNC_NAME##P##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - int atomic_scope = 0, memory_order = 0; \ - GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ - return BUILTIN(p, val, memory_order, atomic_scope); \ +#define AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, AS, BUILTIN) \ + _CLC_OVERLOAD _CLC_DECL TYPE FUNC_NAME(AS TYPE *p, int scope, int semantics, \ + TYPE val) { \ + int atomic_scope = 0, memory_order = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ + return BUILTIN(p, val, memory_order, atomic_scope); \ } -#define AMDGPU_ATOMIC(FUNC_NAME, TYPE, TYPE_MANGLED, BUILTIN) \ - AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, global, U3AS1, 1, BUILTIN) \ - AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, local, U3AS3, 1, BUILTIN) \ - AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, , , 0, BUILTIN) +#define AMDGPU_ATOMIC(FUNC_NAME, TYPE, BUILTIN) \ + AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, global, BUILTIN) \ + AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, local, BUILTIN) \ + AMDGPU_ATOMIC_IMPL(FUNC_NAME, TYPE, , BUILTIN) // Safe atomics will either choose a slow CAS atomic impl (default) or a fast // native atomic if --amdgpu-unsafe-int-atomics is passed to LLVM. // // Safe atomics using CAS may be necessary if PCIe does not support atomic // operations such as and, or, xor -#define AMDGPU_SAFE_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, AS, AS_MANGLED, \ - SUB1, OP, USE_BUILTIN_COND, BUILTIN) \ - _CLC_DEF TYPE \ - FUNC_NAME##P##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - int atomic_scope = 0, memory_order = 0; \ - GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ - if (USE_BUILTIN_COND) \ - return BUILTIN(p, val, memory_order, atomic_scope); \ - /* CAS atomics*/ \ - TYPE oldval = __hip_atomic_load(p, memory_order, atomic_scope); \ - TYPE newval = 0; \ - do { \ - newval = oldval OP val; \ - } while (!__hip_atomic_compare_exchange_strong( \ - p, &oldval, newval, atomic_scope, atomic_scope, memory_order)); \ - return oldval; \ +#define AMDGPU_SAFE_ATOMIC_IMPL(FUNC_NAME, TYPE, AS, OP, USE_BUILTIN_COND, \ + BUILTIN) \ + _CLC_OVERLOAD _CLC_DEF TYPE FUNC_NAME(AS TYPE *p, int scope, int semantics, \ + TYPE val) { \ + int atomic_scope = 0, memory_order = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ + if (USE_BUILTIN_COND) \ + return BUILTIN(p, val, memory_order, atomic_scope); \ + /* CAS atomics*/ \ + TYPE oldval = __hip_atomic_load(p, memory_order, atomic_scope); \ + TYPE newval = 0; \ + do { \ + newval = oldval OP val; \ + } while (!__hip_atomic_compare_exchange_strong( \ + p, &oldval, newval, atomic_scope, atomic_scope, memory_order)); \ + return oldval; \ } -#define AMDGPU_SAFE_ATOMIC(FUNC_NAME, TYPE, TYPE_MANGLED, OP, BUILTIN) \ +#define AMDGPU_SAFE_ATOMIC(FUNC_NAME, TYPE, OP, BUILTIN) \ AMDGPU_SAFE_ATOMIC_IMPL( \ - FUNC_NAME, TYPE, TYPE_MANGLED, global, U3AS1, 1, OP, \ + FUNC_NAME, TYPE, global, OP, \ __oclc_amdgpu_reflect("AMDGPU_OCLC_UNSAFE_INT_ATOMICS"), BUILTIN) \ - AMDGPU_SAFE_ATOMIC_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, local, U3AS3, 1, OP, \ + AMDGPU_SAFE_ATOMIC_IMPL(FUNC_NAME, TYPE, local, OP, \ true /* local AS should always use builtin*/, \ BUILTIN) \ AMDGPU_SAFE_ATOMIC_IMPL( \ - FUNC_NAME, TYPE, TYPE_MANGLED, , , 0, OP, \ + FUNC_NAME, TYPE, , OP, \ __oclc_amdgpu_reflect("AMDGPU_OCLC_UNSAFE_INT_ATOMICS"), BUILTIN) diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_load.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_load.cl index 9430c21d44031..6e03d2fd9e0e9 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_load.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_load.cl @@ -10,26 +10,24 @@ #include #include -#define AMDGPU_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED) \ - _CLC_DEF TYPE \ - _Z18__spirv_AtomicLoadP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - const volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics) { \ - int atomic_scope = 0, memory_order = 0; \ - GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ - return __hip_atomic_load(p, memory_order, atomic_scope); \ +#define AMDGPU_ATOMIC_LOAD_IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_AtomicLoad(AS TYPE *p, int scope, \ + int semantics) { \ + int atomic_scope = 0, memory_order = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ + return __hip_atomic_load(p, memory_order, atomic_scope); \ } -#define AMDGPU_ATOMIC_LOAD(TYPE, TYPE_MANGLED) \ - AMDGPU_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, global, U3AS1) \ - AMDGPU_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, local, U3AS3) \ - AMDGPU_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, , ) +#define AMDGPU_ATOMIC_LOAD(TYPE) \ + AMDGPU_ATOMIC_LOAD_IMPL(TYPE, global) \ + AMDGPU_ATOMIC_LOAD_IMPL(TYPE, local) \ + AMDGPU_ATOMIC_LOAD_IMPL(TYPE, ) -AMDGPU_ATOMIC_LOAD(int, Ki) -AMDGPU_ATOMIC_LOAD(unsigned int, Kj) -AMDGPU_ATOMIC_LOAD(long, Kl) -AMDGPU_ATOMIC_LOAD(unsigned long, Km) -AMDGPU_ATOMIC_LOAD(float, Kf) +AMDGPU_ATOMIC_LOAD(int) +AMDGPU_ATOMIC_LOAD(unsigned int) +AMDGPU_ATOMIC_LOAD(long) +AMDGPU_ATOMIC_LOAD(unsigned long) +AMDGPU_ATOMIC_LOAD(float) // TODO implement for fp64 diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_max.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_max.cl index f838ae950a991..7b3b47ab1af0b 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_max.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_max.cl @@ -13,26 +13,23 @@ extern constant int __oclc_ISA_version; -AMDGPU_ATOMIC(_Z18__spirv_AtomicSMax, int, i, __hip_atomic_fetch_max) -AMDGPU_ATOMIC(_Z18__spirv_AtomicUMax, unsigned int, j, __hip_atomic_fetch_max) -AMDGPU_ATOMIC(_Z18__spirv_AtomicSMax, long, l, __hip_atomic_fetch_max) -AMDGPU_ATOMIC(_Z18__spirv_AtomicUMax, unsigned long, m, __hip_atomic_fetch_max) +AMDGPU_ATOMIC(__spirv_AtomicSMax, int, __hip_atomic_fetch_max) +AMDGPU_ATOMIC(__spirv_AtomicUMax, unsigned int, __hip_atomic_fetch_max) +AMDGPU_ATOMIC(__spirv_AtomicSMax, long, __hip_atomic_fetch_max) +AMDGPU_ATOMIC(__spirv_AtomicUMax, unsigned long, __hip_atomic_fetch_max) -AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, f, int, i, global, U3AS1, 1, 5_ii, - false, ) -AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, f, int, i, local, U3AS3, 1, 5_ii, - false, ) -AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, f, int, i, , , 0, 4_ii, false, ) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, int, global) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, int, local) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, float, int, ) #ifdef cl_khr_int64_base_atomics -AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, double, d, long, l, global, U3AS1, 1, 5_ll, - AMDGPU_ARCH_BETWEEN(9010, 10000), - __builtin_amdgcn_global_atomic_fmax_f64) -AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, double, d, long, l, local, U3AS3, 1, 5_ll, - false, ) -AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, double, d, long, l, , , 0, 4_ll, - AMDGPU_ARCH_BETWEEN(9010, 10000), - __builtin_amdgcn_flat_atomic_fmax_f64) +AMDGPU_ATOMIC_FP_MINMAX_IMPL_CHECK(Max, >, double, long, global, + AMDGPU_ARCH_BETWEEN(9010, 10000), + __builtin_amdgcn_global_atomic_fmax_f64) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Max, >, double, long, local) +AMDGPU_ATOMIC_FP_MINMAX_IMPL_CHECK(Max, >, double, long, , + AMDGPU_ARCH_BETWEEN(9010, 10000), + __builtin_amdgcn_flat_atomic_fmax_f64) #endif #undef AMDGPU_ATOMIC diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_min.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_min.cl index 7592c473ea735..71b7382fec8d4 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_min.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_min.cl @@ -13,26 +13,23 @@ extern constant int __oclc_ISA_version; -AMDGPU_ATOMIC(_Z18__spirv_AtomicSMin, int, i, __hip_atomic_fetch_min) -AMDGPU_ATOMIC(_Z18__spirv_AtomicUMin, unsigned int, j, __hip_atomic_fetch_min) -AMDGPU_ATOMIC(_Z18__spirv_AtomicSMin, long, l, __hip_atomic_fetch_min) -AMDGPU_ATOMIC(_Z18__spirv_AtomicUMin, unsigned long, m, __hip_atomic_fetch_min) +AMDGPU_ATOMIC(__spirv_AtomicSMin, int, __hip_atomic_fetch_min) +AMDGPU_ATOMIC(__spirv_AtomicUMin, unsigned int, __hip_atomic_fetch_min) +AMDGPU_ATOMIC(__spirv_AtomicSMin, long, __hip_atomic_fetch_min) +AMDGPU_ATOMIC(__spirv_AtomicUMin, unsigned long, __hip_atomic_fetch_min) -AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, f, int, i, global, U3AS1, 1, 5_ii, - false, ) -AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, f, int, i, local, U3AS3, 1, 5_ii, - false, ) -AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, f, int, i, , , 0, 4_ii, false, ) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, int, global) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, int, local) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, float, int, ) #ifdef cl_khr_int64_base_atomics -AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, double, d, long, l, global, U3AS1, 1, 5_ll, - AMDGPU_ARCH_BETWEEN(9010, 10000), - __builtin_amdgcn_global_atomic_fmin_f64) -AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, double, d, long, l, local, U3AS3, 1, 5_ll, - false, ) -AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, double, d, long, l, , , 0, 4_ll, - AMDGPU_ARCH_BETWEEN(9010, 10000), - __builtin_amdgcn_flat_atomic_fmin_f64) +AMDGPU_ATOMIC_FP_MINMAX_IMPL_CHECK(Min, <, double, long, global, + AMDGPU_ARCH_BETWEEN(9010, 10000), + __builtin_amdgcn_global_atomic_fmin_f64) +AMDGPU_ATOMIC_FP_MINMAX_IMPL(Min, <, double, long, local) +AMDGPU_ATOMIC_FP_MINMAX_IMPL_CHECK(Min, <, double, long, , + AMDGPU_ARCH_BETWEEN(9010, 10000), + __builtin_amdgcn_flat_atomic_fmin_f64) #endif #undef AMDGPU_ATOMIC diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_minmax.h b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_minmax.h index 1b65cdffb2f65..81c18d1799c78 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_minmax.h +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_minmax.h @@ -10,40 +10,36 @@ #include #include -#define AMDGPU_ATOMIC_FP_MINMAX_IMPL( \ - OPNAME, OP, TYPE, TYPE_MANGLED, STORAGE_TYPE, STORAGE_TYPE_MANGLED, AS, \ - AS_MANGLED, SUB1, SUB2, CHECK, NEW_BUILTIN) \ - _CLC_DEF STORAGE_TYPE \ - _Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##STORAGE_TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2( \ - volatile AS STORAGE_TYPE *, enum Scope, enum MemorySemanticsMask, \ - enum MemorySemanticsMask, STORAGE_TYPE desired, \ - STORAGE_TYPE expected); \ - _CLC_DEF STORAGE_TYPE \ - _Z18__spirv_AtomicLoadP##AS_MANGLED##K##STORAGE_TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - const volatile AS STORAGE_TYPE *, enum Scope, \ - enum MemorySemanticsMask); \ - _CLC_DEF TYPE \ - _Z21__spirv_AtomicF##OPNAME##EXTP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - if (CHECK) \ - return NEW_BUILTIN(p, val); \ - int atomic_scope = 0, memory_order = 0; \ - volatile AS STORAGE_TYPE *int_pointer = (volatile AS STORAGE_TYPE *)p; \ - STORAGE_TYPE old_int_val = 0, new_int_val = 0; \ - TYPE old_val = 0; \ - do { \ - old_int_val = \ - _Z18__spirv_AtomicLoadP##AS_MANGLED##K##STORAGE_TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - int_pointer, scope, semantics); \ - old_val = *(TYPE *)&old_int_val; \ - if (old_val OP val) \ - return old_val; \ - new_int_val = *(STORAGE_TYPE *)&val; \ - } while ( \ - _Z29__spirv_AtomicCompareExchangeP##AS_MANGLED##STORAGE_TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagES##SUB2( \ - int_pointer, scope, semantics, semantics, new_int_val, \ - old_int_val) != old_int_val); \ - \ - return old_val; \ +#define FUNC_BODY(OP, TYPE, STORAGE_TYPE, AS) \ + { \ + int atomic_scope = 0, memory_order = 0; \ + AS STORAGE_TYPE *int_pointer = (AS STORAGE_TYPE *)p; \ + STORAGE_TYPE old_int_val = 0, new_int_val = 0; \ + TYPE old_val = 0; \ + do { \ + old_int_val = __spirv_AtomicLoad(int_pointer, scope, semantics); \ + old_val = *(TYPE *)&old_int_val; \ + if (old_val OP val) \ + return old_val; \ + new_int_val = *(STORAGE_TYPE *)&val; \ + } while (__spirv_AtomicCompareExchange(int_pointer, scope, semantics, \ + semantics, new_int_val, \ + old_int_val) != old_int_val); \ + \ + return old_val; \ + } + +#define AMDGPU_ATOMIC_FP_MINMAX_IMPL(OPNAME, OP, TYPE, STORAGE_TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_AtomicF##OPNAME##EXT( \ + AS TYPE *p, int scope, int semantics, TYPE val) { \ + FUNC_BODY(OP, TYPE, STORAGE_TYPE, AS) \ + } + +#define AMDGPU_ATOMIC_FP_MINMAX_IMPL_CHECK(OPNAME, OP, TYPE, STORAGE_TYPE, AS, \ + CHECK, NEW_BUILTIN) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_AtomicF##OPNAME##EXT( \ + AS TYPE *p, int scope, int semantics, TYPE val) { \ + if (CHECK) \ + return NEW_BUILTIN(p, val); \ + FUNC_BODY(OP, TYPE, STORAGE_TYPE, AS) \ } diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_or.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_or.cl index 4f33d6acbff01..8bfd25483b13b 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_or.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_or.cl @@ -11,7 +11,7 @@ #include #define __CLC_OP | -#define __SPIRV_BUILTIN _Z16__spirv_AtomicOr +#define __SPIRV_BUILTIN __spirv_AtomicOr #define __HIP_BUILTIN __hip_atomic_fetch_or #include "atomic_safe.def" diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_safe.def b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_safe.def index fb2024869615b..848ff0c30df59 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_safe.def +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_safe.def @@ -1,9 +1,9 @@ // Before including, define: __SPIRV_BUILTIN, __CLC_OP, __HIP_BUILTIN // and include atomic_helpers.h to get AMDGPU_SAFE_ATOMIC -AMDGPU_SAFE_ATOMIC(__SPIRV_BUILTIN, int, i, __CLC_OP, __HIP_BUILTIN) -AMDGPU_SAFE_ATOMIC(__SPIRV_BUILTIN, unsigned int, j, __CLC_OP, +AMDGPU_SAFE_ATOMIC(__SPIRV_BUILTIN, int, __CLC_OP, __HIP_BUILTIN) +AMDGPU_SAFE_ATOMIC(__SPIRV_BUILTIN, unsigned int, __CLC_OP, __HIP_BUILTIN) -AMDGPU_SAFE_ATOMIC(__SPIRV_BUILTIN, long, l, __CLC_OP, __HIP_BUILTIN) -AMDGPU_SAFE_ATOMIC(__SPIRV_BUILTIN, unsigned long, m, __CLC_OP, +AMDGPU_SAFE_ATOMIC(__SPIRV_BUILTIN, long, __CLC_OP, __HIP_BUILTIN) +AMDGPU_SAFE_ATOMIC(__SPIRV_BUILTIN, unsigned long, __CLC_OP, __HIP_BUILTIN) diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_store.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_store.cl index a8b35e0522ba5..80adc8f7058e0 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_store.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_store.cl @@ -10,27 +10,25 @@ #include #include -#define AMDGPU_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB1) \ - _CLC_DEF void \ - _Z19__spirv_AtomicStore##P##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - int atomic_scope = 0, memory_order = 0; \ - GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ - __hip_atomic_store(p, val, memory_order, atomic_scope); \ - return; \ +#define AMDGPU_ATOMIC_STORE_IMPL(TYPE, AS) \ + _CLC_OVERLOAD _CLC_DEF void __spirv_AtomicStore(AS TYPE *p, int scope, \ + int semantics, TYPE val) { \ + int atomic_scope = 0, memory_order = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ + __hip_atomic_store(p, val, memory_order, atomic_scope); \ + return; \ } -#define AMDGPU_ATOMIC_STORE(TYPE, TYPE_MANGLED) \ - AMDGPU_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, global, U3AS1, 1) \ - AMDGPU_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, local, U3AS3, 1) \ - AMDGPU_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, , , 0) +#define AMDGPU_ATOMIC_STORE(TYPE) \ + AMDGPU_ATOMIC_STORE_IMPL(TYPE, global) \ + AMDGPU_ATOMIC_STORE_IMPL(TYPE, local) \ + AMDGPU_ATOMIC_STORE_IMPL(TYPE, ) -AMDGPU_ATOMIC_STORE(int, i) -AMDGPU_ATOMIC_STORE(unsigned int, j) -AMDGPU_ATOMIC_STORE(long, l) -AMDGPU_ATOMIC_STORE(unsigned long, m) -AMDGPU_ATOMIC_STORE(float, f) +AMDGPU_ATOMIC_STORE(int) +AMDGPU_ATOMIC_STORE(unsigned int) +AMDGPU_ATOMIC_STORE(long) +AMDGPU_ATOMIC_STORE(unsigned long) +AMDGPU_ATOMIC_STORE(float) // TODO implement for fp64 diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_sub.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_sub.cl index 72539460e89ea..2df7315844d1b 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_sub.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_sub.cl @@ -10,28 +10,23 @@ #include #include -#define AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, AS, AS_MANGLED, \ - NOT_GENERIC, BUILTIN) \ - _CLC_DEF TYPE \ - FUNC_NAME##P##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##NOT_GENERIC##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - int atomic_scope = 0, memory_order = 0; \ - GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ - return BUILTIN(p, -val, memory_order); \ +#define AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, AS, BUILTIN) \ + _CLC_OVERLOAD _CLC_DEF TYPE FUNC_NAME(AS TYPE *p, int scope, int semantics, \ + TYPE val) { \ + int atomic_scope = 0, memory_order = 0; \ + GET_ATOMIC_SCOPE_AND_ORDER(scope, atomic_scope, semantics, memory_order) \ + return BUILTIN(p, -val, memory_order); \ } -#define AMDGPU_ATOMIC_SUB(FUNC_NAME, TYPE, TYPE_MANGLED, BUILTIN) \ - AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, global, U3AS1, 1, \ - BUILTIN) \ - AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, local, U3AS3, 1, \ - BUILTIN) \ - AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, TYPE_MANGLED, , , 0, BUILTIN) +#define AMDGPU_ATOMIC_SUB(FUNC_NAME, TYPE, BUILTIN) \ + AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, global, BUILTIN) \ + AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, local, BUILTIN) \ + AMDGPU_ATOMIC_SUB_IMPL(FUNC_NAME, TYPE, , BUILTIN) -AMDGPU_ATOMIC_SUB(_Z18__spirv_AtomicISub, int, i, __atomic_fetch_add) -AMDGPU_ATOMIC_SUB(_Z18__spirv_AtomicISub, unsigned int, j, __atomic_fetch_add) -AMDGPU_ATOMIC_SUB(_Z18__spirv_AtomicISub, long, l, __atomic_fetch_add) -AMDGPU_ATOMIC_SUB(_Z18__spirv_AtomicISub, unsigned long, m, __atomic_fetch_add) +AMDGPU_ATOMIC_SUB(__spirv_AtomicISub, int, __atomic_fetch_add) +AMDGPU_ATOMIC_SUB(__spirv_AtomicISub, unsigned int, __atomic_fetch_add) +AMDGPU_ATOMIC_SUB(__spirv_AtomicISub, long, __atomic_fetch_add) +AMDGPU_ATOMIC_SUB(__spirv_AtomicISub, unsigned long, __atomic_fetch_add) #undef AMDGPU_ATOMIC #undef AMDGPU_ATOMIC_IMPL diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_xchg.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_xchg.cl index 3a1f8ae5f1324..13922673e092a 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_xchg.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_xchg.cl @@ -10,13 +10,11 @@ #include #include -AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, int, i, __hip_atomic_exchange) -AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, unsigned int, j, - __hip_atomic_exchange) -AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, long, l, __hip_atomic_exchange) -AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, unsigned long, m, - __hip_atomic_exchange) -AMDGPU_ATOMIC(_Z22__spirv_AtomicExchange, float, f, __hip_atomic_exchange) +AMDGPU_ATOMIC(__spirv_AtomicExchange, int, __hip_atomic_exchange) +AMDGPU_ATOMIC(__spirv_AtomicExchange, unsigned int, __hip_atomic_exchange) +AMDGPU_ATOMIC(__spirv_AtomicExchange, long, __hip_atomic_exchange) +AMDGPU_ATOMIC(__spirv_AtomicExchange, unsigned long, __hip_atomic_exchange) +AMDGPU_ATOMIC(__spirv_AtomicExchange, float, __hip_atomic_exchange) // TODO implement for fp64 diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_xor.cl b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_xor.cl index ef17188acaf93..ac34aba3e098a 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_xor.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/atomic/atomic_xor.cl @@ -11,7 +11,7 @@ #include #define __CLC_OP ^ -#define __SPIRV_BUILTIN _Z17__spirv_AtomicXor +#define __SPIRV_BUILTIN __spirv_AtomicXor #define __HIP_BUILTIN __hip_atomic_fetch_xor #include "atomic_safe.def" diff --git a/libclc/libspirv/lib/generic/atomic/atomic_add.cl b/libclc/libspirv/lib/generic/atomic/atomic_add.cl index 5a5fd9018d4db..96b170d263f70 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_add.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_add.cl @@ -8,39 +8,34 @@ #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB, FN_NAME) \ - _CLC_DEF TYPE \ - _Z18__spirv_AtomicIAddP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - return FN_NAME(p, val); \ +#define IMPL(TYPE, AS, FN_NAME) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_AtomicIAdd(AS TYPE *p, int scope, \ + int semantics, TYPE val) { \ + return FN_NAME(p, val); \ } -IMPL(int, i, global, U3AS1, 1, __sync_fetch_and_add) -IMPL(unsigned int, j, global, U3AS1, 1, __sync_fetch_and_add) -IMPL(int, i, local, U3AS3, 1, __sync_fetch_and_add) -IMPL(unsigned int, j, local, U3AS3, 1, __sync_fetch_and_add) +IMPL(int, global, __sync_fetch_and_add) +IMPL(unsigned int, global, __sync_fetch_and_add) +IMPL(int, local, __sync_fetch_and_add) +IMPL(unsigned int, local, __sync_fetch_and_add) #ifdef cl_khr_int64_base_atomics -IMPL(long, l, global, U3AS1, 1, __sync_fetch_and_add_8) -IMPL(unsigned long, m, global, U3AS1, 1, __sync_fetch_and_add_8) -IMPL(long, l, local, U3AS3, 1, __sync_fetch_and_add_8) -IMPL(unsigned long, m, local, U3AS3, 1, __sync_fetch_and_add_8) +IMPL(long, global, __sync_fetch_and_add_8) +IMPL(unsigned long, global, __sync_fetch_and_add_8) +IMPL(long, local, __sync_fetch_and_add_8) +IMPL(unsigned long, local, __sync_fetch_and_add_8) #endif #if _CLC_GENERIC_AS_SUPPORTED -#define IMPL_GENERIC(TYPE, TYPE_MANGLED, FN_NAME) \ - IMPL(TYPE, TYPE_MANGLED, , , 0, FN_NAME) +#define IMPL_GENERIC(TYPE, FN_NAME) IMPL(TYPE, , FN_NAME) -IMPL_GENERIC(int, i, __sync_fetch_and_add) -IMPL_GENERIC(unsigned int, j, __sync_fetch_and_add) +IMPL_GENERIC(int, __sync_fetch_and_add) +IMPL_GENERIC(unsigned int, __sync_fetch_and_add) #ifdef cl_khr_int64_base_atomics -IMPL_GENERIC(long, l, __sync_fetch_and_add_8) -IMPL_GENERIC(unsigned long, m, __sync_fetch_and_add_8) +IMPL_GENERIC(long, __sync_fetch_and_add_8) +IMPL_GENERIC(unsigned long, __sync_fetch_and_add_8) #endif #endif //_CLC_GENERIC_AS_SUPPORTED diff --git a/libclc/libspirv/lib/generic/atomic/atomic_and.cl b/libclc/libspirv/lib/generic/atomic/atomic_and.cl index 7965ac77e4ed2..a332fe17d793e 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_and.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_and.cl @@ -8,39 +8,34 @@ #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB, FN_NAME) \ - _CLC_DEF TYPE \ - _Z17__spirv_AtomicAndP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - return FN_NAME(p, val); \ +#define IMPL(TYPE, AS, FN_NAME) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_AtomicAnd(AS TYPE *p, int scope, \ + int semantics, TYPE val) { \ + return FN_NAME(p, val); \ } -IMPL(int, i, global, U3AS1, 1, __sync_fetch_and_and) -IMPL(unsigned int, j, global, U3AS1, 1, __sync_fetch_and_and) -IMPL(int, i, local, U3AS3, 1, __sync_fetch_and_and) -IMPL(unsigned int, j, local, U3AS3, 1, __sync_fetch_and_and) +IMPL(int, global, __sync_fetch_and_and) +IMPL(unsigned int, global, __sync_fetch_and_and) +IMPL(int, local, __sync_fetch_and_and) +IMPL(unsigned int, local, __sync_fetch_and_and) #ifdef cl_khr_int64_extended_atomics -IMPL(long, l, global, U3AS1, 1, __sync_fetch_and_and_8) -IMPL(unsigned long, m, global, U3AS1, 1, __sync_fetch_and_and_8) -IMPL(long, l, local, U3AS3, 1, __sync_fetch_and_and_8) -IMPL(unsigned long, m, local, U3AS3, 1, __sync_fetch_and_and_8) +IMPL(long, global, __sync_fetch_and_and_8) +IMPL(unsigned long, global, __sync_fetch_and_and_8) +IMPL(long, local, __sync_fetch_and_and_8) +IMPL(unsigned long, local, __sync_fetch_and_and_8) #endif #if _CLC_GENERIC_AS_SUPPORTED -#define IMPL_GENERIC(TYPE, TYPE_MANGLED, FN_NAME) \ - IMPL(TYPE, TYPE_MANGLED, , , 0, FN_NAME) +#define IMPL_GENERIC(TYPE, FN_NAME) IMPL(TYPE, , FN_NAME) -IMPL_GENERIC(int, i, __sync_fetch_and_and) -IMPL_GENERIC(unsigned int, j, __sync_fetch_and_and) +IMPL_GENERIC(int, __sync_fetch_and_and) +IMPL_GENERIC(unsigned int, __sync_fetch_and_and) #ifdef cl_khr_int64_base_atomics -IMPL_GENERIC(long, l, __sync_fetch_and_and_8) -IMPL_GENERIC(unsigned long, m, __sync_fetch_and_and_8) +IMPL_GENERIC(long, __sync_fetch_and_and_8) +IMPL_GENERIC(unsigned long, __sync_fetch_and_and_8) #endif #endif //_CLC_GENERIC_AS_SUPPORTED diff --git a/libclc/libspirv/lib/generic/atomic/atomic_cmpxchg.cl b/libclc/libspirv/lib/generic/atomic/atomic_cmpxchg.cl index 98da0d36160cc..cce74d0e7b91e 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_cmpxchg.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_cmpxchg.cl @@ -8,62 +8,60 @@ #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - -_CLC_DEF int -_Z29__spirv_AtomicCompareExchangePU3AS3iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_ii( - volatile local int *p, enum Scope scope, enum MemorySemanticsMask eq, - enum MemorySemanticsMask neq, int val, int cmp) { +_CLC_OVERLOAD _CLC_DEF int __spirv_AtomicCompareExchange(local int *p, + int scope, int eq, + int neq, int val, + int cmp) { return __sync_val_compare_and_swap(p, cmp, val); } -_CLC_DEF int -_Z29__spirv_AtomicCompareExchangePU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_ii( - volatile global int *p, enum Scope scope, enum MemorySemanticsMask eq, - enum MemorySemanticsMask neq, int val, int cmp) { +_CLC_OVERLOAD _CLC_DEF int __spirv_AtomicCompareExchange(global int *p, + int scope, int eq, + int neq, int val, + int cmp) { return __sync_val_compare_and_swap(p, cmp, val); } -_CLC_DEF uint -_Z29__spirv_AtomicCompareExchangePU3AS3jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_jj( - volatile local uint *p, enum Scope scope, enum MemorySemanticsMask eq, - enum MemorySemanticsMask neq, uint val, uint cmp) { +_CLC_OVERLOAD _CLC_DEF uint __spirv_AtomicCompareExchange(local uint *p, + int scope, int eq, + int neq, uint val, + uint cmp) { return __sync_val_compare_and_swap(p, cmp, val); } -_CLC_DEF uint -_Z29__spirv_AtomicCompareExchangePU3AS1jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_jj( - volatile global uint *p, enum Scope scope, enum MemorySemanticsMask eq, - enum MemorySemanticsMask neq, uint val, uint cmp) { +_CLC_OVERLOAD _CLC_DEF uint __spirv_AtomicCompareExchange(global uint *p, + int scope, int eq, + int neq, uint val, + uint cmp) { return __sync_val_compare_and_swap(p, cmp, val); } #ifdef cl_khr_int64_base_atomics -_CLC_DEF long -_Z29__spirv_AtomicCompareExchangePU3AS3lN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_ll( - volatile local long *p, enum Scope scope, enum MemorySemanticsMask eq, - enum MemorySemanticsMask neq, long val, long cmp) { +_CLC_OVERLOAD _CLC_DEF long __spirv_AtomicCompareExchange(local long *p, + int scope, int eq, + int neq, long val, + long cmp) { return __sync_val_compare_and_swap_8(p, cmp, val); } -_CLC_DEF long -_Z29__spirv_AtomicCompareExchangePU3AS1lN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_ll( - volatile global long *p, enum Scope scope, enum MemorySemanticsMask eq, - enum MemorySemanticsMask neq, long val, long cmp) { +_CLC_OVERLOAD _CLC_DEF long __spirv_AtomicCompareExchange(global long *p, + int scope, int eq, + int neq, long val, + long cmp) { return __sync_val_compare_and_swap_8(p, cmp, val); } -_CLC_DEF ulong -_Z29__spirv_AtomicCompareExchangePU3AS3mN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_mm( - volatile local ulong *p, enum Scope scope, enum MemorySemanticsMask eq, - enum MemorySemanticsMask neq, ulong val, ulong cmp) { +_CLC_OVERLOAD _CLC_DEF ulong __spirv_AtomicCompareExchange(local ulong *p, + int scope, int eq, + int neq, ulong val, + ulong cmp) { return __sync_val_compare_and_swap_8(p, cmp, val); } -_CLC_DEF ulong -_Z29__spirv_AtomicCompareExchangePU3AS1mN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagES5_mm( - volatile global ulong *p, enum Scope scope, enum MemorySemanticsMask eq, - enum MemorySemanticsMask neq, ulong val, ulong cmp) { +_CLC_OVERLOAD _CLC_DEF ulong __spirv_AtomicCompareExchange(global ulong *p, + int scope, int eq, + int neq, ulong val, + ulong cmp) { return __sync_val_compare_and_swap_8(p, cmp, val); } @@ -71,33 +69,30 @@ _Z29__spirv_AtomicCompareExchangePU3AS1mN5__spv5Scope4FlagENS1_19MemorySemantics #if _CLC_GENERIC_AS_SUPPORTED -_CLC_DEF int -_Z29__spirv_AtomicCompareExchangePiN5__spv5Scope4FlagENS0_19MemorySemanticsMask4FlagES4_ii( - volatile int *p, enum Scope scope, enum MemorySemanticsMask eq, - enum MemorySemanticsMask neq, int val, int cmp) { +_CLC_OVERLOAD _CLC_DEF int __spirv_AtomicCompareExchange(int *p, int scope, + int eq, int neq, + int val, int cmp) { return __sync_val_compare_and_swap(p, cmp, val); } -_CLC_DEF uint -_Z29__spirv_AtomicCompareExchangePjN5__spv5Scope4FlagENS0_19MemorySemanticsMask4FlagES4_jj( - volatile uint *p, enum Scope scope, enum MemorySemanticsMask eq, - enum MemorySemanticsMask neq, uint val, uint cmp) { +_CLC_OVERLOAD _CLC_DEF uint __spirv_AtomicCompareExchange(uint *p, int scope, + int eq, int neq, + uint val, uint cmp) { return __sync_val_compare_and_swap(p, cmp, val); } #ifdef cl_khr_int64_base_atomics -_CLC_DEF long -_Z29__spirv_AtomicCompareExchangePlN5__spv5Scope4FlagENS0_19MemorySemanticsMask4FlagES4_ll( - volatile long *p, enum Scope scope, enum MemorySemanticsMask eq, - enum MemorySemanticsMask neq, long val, long cmp) { +_CLC_OVERLOAD _CLC_DEF long __spirv_AtomicCompareExchange(long *p, int scope, + int eq, int neq, + long val, long cmp) { return __sync_val_compare_and_swap_8(p, cmp, val); } -_CLC_DEF ulong -_Z29__spirv_AtomicCompareExchangePmN5__spv5Scope4FlagENS0_19MemorySemanticsMask4FlagES4_mm( - volatile ulong *p, enum Scope scope, enum MemorySemanticsMask eq, - enum MemorySemanticsMask neq, ulong val, ulong cmp) { +_CLC_OVERLOAD _CLC_DEF ulong __spirv_AtomicCompareExchange(ulong *p, int scope, + int eq, int neq, + ulong val, + ulong cmp) { return __sync_val_compare_and_swap_8(p, cmp, val); } diff --git a/libclc/libspirv/lib/generic/atomic/atomic_dec.cl b/libclc/libspirv/lib/generic/atomic/atomic_dec.cl index 4726e52272ef6..6363432bc4958 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_dec.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_dec.cl @@ -8,62 +8,46 @@ #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - -_CLC_DEF int -_Z24__spirv_AtomicIDecrementPU3AS3iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local int *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL int __spirv_AtomicIDecrement(local int *p, int scope, + int semantics) { return __sync_fetch_and_sub(p, (int)1); } -_CLC_DEF int -_Z24__spirv_AtomicIDecrementPU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global int *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL int __spirv_AtomicIDecrement(global int *p, int scope, + int semantics) { return __sync_fetch_and_sub(p, (int)1); } -_CLC_DEF uint -_Z24__spirv_AtomicIDecrementPU3AS3jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local uint *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL uint __spirv_AtomicIDecrement(local uint *p, int scope, + int semantics) { return __sync_fetch_and_sub(p, (uint)1); } -_CLC_DEF uint -_Z24__spirv_AtomicIDecrementPU3AS1jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global uint *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL uint __spirv_AtomicIDecrement(global uint *p, int scope, + int semantics) { return __sync_fetch_and_sub(p, (uint)1); } #ifdef cl_khr_int64_base_atomics -_CLC_DEF long -_Z24__spirv_AtomicIDecrementPU3AS3lN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local long *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL long __spirv_AtomicIDecrement(local long *p, int scope, + int semantics) { return __sync_fetch_and_sub(p, (long)1); } -_CLC_DEF long -_Z24__spirv_AtomicIDecrementPU3AS1lN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global long *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL long __spirv_AtomicIDecrement(global long *p, int scope, + int semantics) { return __sync_fetch_and_sub(p, (long)1); } -_CLC_DEF ulong -_Z24__spirv_AtomicIDecrementPU3AS3mN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local ulong *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL ulong __spirv_AtomicIDecrement(local ulong *p, + int scope, + int semantics) { return __sync_fetch_and_sub(p, (ulong)1); } -_CLC_DEF ulong -_Z24__spirv_AtomicIDecrementPU3AS1mN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global ulong *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL ulong __spirv_AtomicIDecrement(global ulong *p, + int scope, + int semantics) { return __sync_fetch_and_sub(p, (ulong)1); } #endif diff --git a/libclc/libspirv/lib/generic/atomic/atomic_inc.cl b/libclc/libspirv/lib/generic/atomic/atomic_inc.cl index e4ac238ddce90..5f6e6afd35f06 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_inc.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_inc.cl @@ -8,62 +8,46 @@ #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - -_CLC_DEF int -_Z24__spirv_AtomicIIncrementPU3AS3iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local int *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL int __spirv_AtomicIIncrement(local int *p, int scope, + int semantics) { return __sync_fetch_and_add(p, (int)1); } -_CLC_DEF int -_Z24__spirv_AtomicIIncrementPU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global int *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL int __spirv_AtomicIIncrement(global int *p, int scope, + int semantics) { return __sync_fetch_and_add(p, (int)1); } -_CLC_DEF uint -_Z24__spirv_AtomicIIncrementPU3AS3jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local uint *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL uint __spirv_AtomicIIncrement(local uint *p, int scope, + int semantics) { return __sync_fetch_and_add(p, (uint)1); } -_CLC_DEF uint -_Z24__spirv_AtomicIIncrementPU3AS1jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global uint *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL uint __spirv_AtomicIIncrement(global uint *p, int scope, + int semantics) { return __sync_fetch_and_add(p, (uint)1); } #ifdef cl_khr_int64_base_atomics -_CLC_DEF long -_Z24__spirv_AtomicIIncrementPU3AS3lN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local long *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL long __spirv_AtomicIIncrement(local long *p, int scope, + int semantics) { return __sync_fetch_and_add(p, (long)1); } -_CLC_DEF long -_Z24__spirv_AtomicIIncrementPU3AS1lN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global long *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL long __spirv_AtomicIIncrement(global long *p, int scope, + int semantics) { return __sync_fetch_and_add(p, (long)1); } -_CLC_DEF ulong -_Z24__spirv_AtomicIIncrementPU3AS3mN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile local ulong *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL ulong __spirv_AtomicIIncrement(local ulong *p, + int scope, + int semantics) { return __sync_fetch_and_add(p, (ulong)1); } -_CLC_DEF ulong -_Z24__spirv_AtomicIIncrementPU3AS1mN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( - volatile global ulong *p, enum Scope scope, - enum MemorySemanticsMask semantics) { +_CLC_OVERLOAD _CLC_DECL ulong __spirv_AtomicIIncrement(global ulong *p, + int scope, + int semantics) { return __sync_fetch_and_add(p, (ulong)1); } #endif diff --git a/libclc/libspirv/lib/generic/atomic/atomic_load.cl b/libclc/libspirv/lib/generic/atomic/atomic_load.cl index 915b8eceab5da..7e6b341a9c483 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_load.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_load.cl @@ -8,51 +8,47 @@ #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - -#define FDECL(TYPE, PREFIX, AS, BYTE_SIZE, MEM_ORDER) \ -TYPE __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_##MEM_ORDER(volatile AS const TYPE *); - -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, PREFIX, BYTE_SIZE) \ - FDECL(TYPE, PREFIX, AS, BYTE_SIZE, unordered) \ - FDECL(TYPE, PREFIX, AS, BYTE_SIZE, acquire) \ - FDECL(TYPE, PREFIX, AS, BYTE_SIZE, seq_cst) \ - _CLC_DEF TYPE \ - _Z18__spirv_AtomicLoadP##AS_MANGLED##K##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - volatile AS const TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics) { \ - if (semantics & Acquire) { \ - return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_acquire(p); \ - } \ - if (semantics & SequentiallyConsistent) { \ - return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_seq_cst(p); \ - } \ - return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_unordered(p); \ +#define FDECL(TYPE, PREFIX, AS, BYTE_SIZE, MEM_ORDER) \ + TYPE __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_##MEM_ORDER( \ + AS const TYPE *); + +#define IMPL(TYPE, AS, PREFIX, BYTE_SIZE) \ + FDECL(TYPE, PREFIX, AS, BYTE_SIZE, unordered) \ + FDECL(TYPE, PREFIX, AS, BYTE_SIZE, acquire) \ + FDECL(TYPE, PREFIX, AS, BYTE_SIZE, seq_cst) \ + _CLC_OVERLOAD _CLC_DECL TYPE __spirv_AtomicLoad(AS TYPE *p, int scope, \ + int semantics) { \ + if (semantics & Acquire) { \ + return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_acquire(p); \ + } \ + if (semantics & SequentiallyConsistent) { \ + return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_seq_cst(p); \ + } \ + return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_unordered(p); \ } -#define IMPL_AS(TYPE, TYPE_MANGLED, PREFIX, BYTE_SIZE) \ - IMPL(TYPE, TYPE_MANGLED, global, U3AS1, PREFIX, BYTE_SIZE) \ - IMPL(TYPE, TYPE_MANGLED, local, U3AS3, PREFIX, BYTE_SIZE) +#define IMPL_AS(TYPE, PREFIX, BYTE_SIZE) \ + IMPL(TYPE, global, PREFIX, BYTE_SIZE) \ + IMPL(TYPE, local, PREFIX, BYTE_SIZE) -IMPL_AS(int, i, , 4) -IMPL_AS(unsigned int, j, u, 4) +IMPL_AS(int, , 4) +IMPL_AS(unsigned int, u, 4) #ifdef cl_khr_int64_base_atomics -IMPL_AS(long, l, , 8) -IMPL_AS(unsigned long, m, u, 8) +IMPL_AS(long, , 8) +IMPL_AS(unsigned long, u, 8) #endif #if _CLC_GENERIC_AS_SUPPORTED -#define IMPL_GENERIC(TYPE, TYPE_MANGLED, PREFIX, BYTE_SIZE) \ - IMPL(TYPE, TYPE_MANGLED, , , PREFIX, BYTE_SIZE) +#define IMPL_GENERIC(TYPE, PREFIX, BYTE_SIZE) IMPL(TYPE, , PREFIX, BYTE_SIZE) -IMPL_GENERIC(int, i, , 4) -IMPL_GENERIC(unsigned int, j, u, 4) +IMPL_GENERIC(int, , 4) +IMPL_GENERIC(unsigned int, u, 4) #ifdef cl_khr_int64_base_atomics -IMPL_GENERIC(long, l, , 8) -IMPL_GENERIC(unsigned long, m, u, 8) +IMPL_GENERIC(long, , 8) +IMPL_GENERIC(unsigned long, u, 8) #endif #endif //_CLC_GENERIC_AS_SUPPORTED diff --git a/libclc/libspirv/lib/generic/atomic/atomic_max.cl b/libclc/libspirv/lib/generic/atomic/atomic_max.cl index 27d694aa12026..90f0f340bb3d2 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_max.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_max.cl @@ -8,20 +8,16 @@ #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB, NAME, PREFIX, SUFFIX) \ - _CLC_DEF TYPE \ - _Z18##NAME##P##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - return PREFIX##__sync_fetch_and_##SUFFIX(p, val); \ +#define IMPL(TYPE, AS, NAME, PREFIX, SUFFIX) \ + _CLC_OVERLOAD _CLC_DECL TYPE NAME(AS TYPE *p, int scope, int semantics, \ + TYPE val) { \ + return PREFIX##__sync_fetch_and_##SUFFIX(p, val); \ } -IMPL(int, i, global, U3AS1, 1, __spirv_AtomicSMax, , max) -IMPL(unsigned int, j, global, U3AS1, 1, __spirv_AtomicUMax, , umax) -IMPL(int, i, local, U3AS3, 1, __spirv_AtomicSMax, , max) -IMPL(unsigned int, j, local, U3AS3, 1, __spirv_AtomicUMax, , umax) +IMPL(int, global, __spirv_AtomicSMax, , max) +IMPL(unsigned int, global, __spirv_AtomicUMax, , umax) +IMPL(int, local, __spirv_AtomicSMax, , max) +IMPL(unsigned int, local, __spirv_AtomicUMax, , umax) #ifdef cl_khr_int64_extended_atomics unsigned long __clc__sync_fetch_and_max_local_8(volatile local long *, long); @@ -29,28 +25,27 @@ unsigned long __clc__sync_fetch_and_max_global_8(volatile global long *, long); unsigned long __clc__sync_fetch_and_umax_local_8(volatile local unsigned long *, unsigned long); unsigned long __clc__sync_fetch_and_umax_global_8(volatile global unsigned long *, unsigned long); -IMPL(long, l, global, U3AS1, 1, __spirv_AtomicSMax, __clc, max_global_8) -IMPL(unsigned long, m, global, U3AS1, 1, __spirv_AtomicUMax, __clc, umax_global_8) -IMPL(long, l, local, U3AS3, 1, __spirv_AtomicSMax, __clc, max_local_8) -IMPL(unsigned long, m, local, U3AS3, 1, __spirv_AtomicUMax, __clc, umax_local_8) +IMPL(long, global, __spirv_AtomicSMax, __clc, max_global_8) +IMPL(unsigned long, global, __spirv_AtomicUMax, __clc, umax_global_8) +IMPL(long, local, __spirv_AtomicSMax, __clc, max_local_8) +IMPL(unsigned long, local, __spirv_AtomicUMax, __clc, umax_local_8) #endif #if _CLC_GENERIC_AS_SUPPORTED +#define IMPL_GENERIC(TYPE, NAME, PREFIX, SUFFIX) \ + IMPL(TYPE, , NAME, PREFIX, SUFFIX) -#define IMPL_GENERIC(TYPE, TYPE_MANGLED, NAME, PREFIX, SUFFIX) \ - IMPL(TYPE, TYPE_MANGLED, , , 0, NAME, PREFIX, SUFFIX) - -IMPL_GENERIC(int, i, __spirv_AtomicSMax, , max) -IMPL_GENERIC(unsigned int, j, __spirv_AtomicUMax, , umax) +IMPL_GENERIC(int, __spirv_AtomicSMax, , max) +IMPL_GENERIC(unsigned int, __spirv_AtomicUMax, , umax) #ifdef cl_khr_int64_extended_atomics unsigned long __clc__sync_fetch_and_max_generic_8(volatile generic long *, long); unsigned long __clc__sync_fetch_and_umax_generic_8(volatile __generic unsigned long *, unsigned long); -IMPL_GENERIC(long, l, __spirv_AtomicSMax, __clc, max_generic_8) -IMPL_GENERIC(unsigned long, m, __spirv_AtomicUMax, __clc, umax_generic_8) +IMPL_GENERIC(long, __spirv_AtomicSMax, __clc, max_generic_8) +IMPL_GENERIC(unsigned long, __spirv_AtomicUMax, __clc, umax_generic_8) #endif diff --git a/libclc/libspirv/lib/generic/atomic/atomic_min.cl b/libclc/libspirv/lib/generic/atomic/atomic_min.cl index 66a518a75a256..d3df63524c916 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_min.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_min.cl @@ -8,20 +8,16 @@ #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB, NAME, PREFIX, SUFFIX) \ - _CLC_DEF TYPE \ - _Z18##NAME##P##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - return PREFIX##__sync_fetch_and_##SUFFIX(p, val); \ +#define IMPL(TYPE, AS, NAME, PREFIX, SUFFIX) \ + _CLC_OVERLOAD _CLC_DECL TYPE NAME(AS TYPE *p, int scope, int semantics, \ + TYPE val) { \ + return PREFIX##__sync_fetch_and_##SUFFIX(p, val); \ } -IMPL(int, i, global, U3AS1, 1, __spirv_AtomicSMin, , min) -IMPL(unsigned int, j, global, U3AS1, 1, __spirv_AtomicUMin, , umin) -IMPL(int, i, local, U3AS3, 1, __spirv_AtomicSMin, , min) -IMPL(unsigned int, j, local, U3AS3, 1, __spirv_AtomicUMin, , umin) +IMPL(int, global, __spirv_AtomicSMin, , min) +IMPL(unsigned int, global, __spirv_AtomicUMin, , umin) +IMPL(int, local, __spirv_AtomicSMin, , min) +IMPL(unsigned int, local, __spirv_AtomicUMin, , umin) #ifdef cl_khr_int64_extended_atomics unsigned long __clc__sync_fetch_and_min_local_8(volatile local long *, long); @@ -29,28 +25,27 @@ unsigned long __clc__sync_fetch_and_min_global_8(volatile global long *, long); unsigned long __clc__sync_fetch_and_umin_local_8(volatile local unsigned long *, unsigned long); unsigned long __clc__sync_fetch_and_umin_global_8(volatile global unsigned long *, unsigned long); -IMPL(long, l, global, U3AS1, 1, __spirv_AtomicSMin, __clc, min_global_8) -IMPL(unsigned long, m, global, U3AS1, 1, __spirv_AtomicUMin, __clc, umin_global_8) -IMPL(long, l, local, U3AS3, 1, __spirv_AtomicSMin, __clc, min_local_8) -IMPL(unsigned long, m, local, U3AS3, 1, __spirv_AtomicUMin, __clc, umin_local_8) +IMPL(long, global, __spirv_AtomicSMin, __clc, min_global_8) +IMPL(unsigned long, global, __spirv_AtomicUMin, __clc, umin_global_8) +IMPL(long, local, __spirv_AtomicSMin, __clc, min_local_8) +IMPL(unsigned long, local, __spirv_AtomicUMin, __clc, umin_local_8) #endif #if _CLC_GENERIC_AS_SUPPORTED +#define IMPL_GENERIC(TYPE, NAME, PREFIX, SUFFIX) \ + IMPL(TYPE, , NAME, PREFIX, SUFFIX) -#define IMPL_GENERIC(TYPE, TYPE_MANGLED, NAME, PREFIX, SUFFIX) \ - IMPL(TYPE, TYPE_MANGLED, , , 0, NAME, PREFIX, SUFFIX) - -IMPL_GENERIC(int, i, __spirv_AtomicSMin, , min) -IMPL_GENERIC(unsigned int, j, __spirv_AtomicUMin, , umin) +IMPL_GENERIC(int, __spirv_AtomicSMin, , min) +IMPL_GENERIC(unsigned int, __spirv_AtomicUMin, , umin) #ifdef cl_khr_int64_extended_atomics unsigned long __clc__sync_fetch_and_min_generic_8(volatile generic long *, long); unsigned long __clc__sync_fetch_and_umin_generic_8(volatile __generic unsigned long *, unsigned long); -IMPL_GENERIC(long, l, __spirv_AtomicSMin, __clc, min_generic_8) -IMPL_GENERIC(unsigned long, m, __spirv_AtomicUMin, __clc, umin_generic_8) +IMPL_GENERIC(long, __spirv_AtomicSMin, __clc, min_generic_8) +IMPL_GENERIC(unsigned long, __spirv_AtomicUMin, __clc, umin_generic_8) #endif diff --git a/libclc/libspirv/lib/generic/atomic/atomic_or.cl b/libclc/libspirv/lib/generic/atomic/atomic_or.cl index 683be8de0a36f..d2e78a062d78b 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_or.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_or.cl @@ -8,39 +8,34 @@ #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB, FN_NAME) \ - _CLC_DEF TYPE \ - _Z16__spirv_AtomicOrP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - return FN_NAME(p, val); \ +#define IMPL(TYPE, AS, FN_NAME) \ + _CLC_OVERLOAD _CLC_DECL TYPE __spirv_AtomicOr(AS TYPE *p, int scope, \ + int semantics, TYPE val) { \ + return FN_NAME(p, val); \ } -IMPL(int, i, global, U3AS1, 1, __sync_fetch_and_or) -IMPL(unsigned int, j, global, U3AS1, 1, __sync_fetch_and_or) -IMPL(int, i, local, U3AS3, 1, __sync_fetch_and_or) -IMPL(unsigned int, j, local, U3AS3, 1, __sync_fetch_and_or) +IMPL(int, global, __sync_fetch_and_or) +IMPL(unsigned int, global, __sync_fetch_and_or) +IMPL(int, local, __sync_fetch_and_or) +IMPL(unsigned int, local, __sync_fetch_and_or) #ifdef cl_khr_int64_extended_atomics -IMPL(long, l, global, U3AS1, 1, __sync_fetch_and_or_8) -IMPL(unsigned long, m, global, U3AS1, 1, __sync_fetch_and_or_8) -IMPL(long, l, local, U3AS3, 1, __sync_fetch_and_or_8) -IMPL(unsigned long, m, local, U3AS3, 1, __sync_fetch_and_or_8) +IMPL(long, global, __sync_fetch_and_or_8) +IMPL(unsigned long, global, __sync_fetch_and_or_8) +IMPL(long, local, __sync_fetch_and_or_8) +IMPL(unsigned long, local, __sync_fetch_and_or_8) #endif #if _CLC_GENERIC_AS_SUPPORTED -#define IMPL_GENERIC(TYPE, TYPE_MANGLED, FN_NAME) \ - IMPL(TYPE, TYPE_MANGLED, , , 0, FN_NAME) +#define IMPL_GENERIC(TYPE, FN_NAME) IMPL(TYPE, , FN_NAME) -IMPL_GENERIC(int, i, __sync_fetch_and_or) -IMPL_GENERIC(unsigned int, j, __sync_fetch_and_or) +IMPL_GENERIC(int, __sync_fetch_and_or) +IMPL_GENERIC(unsigned int, __sync_fetch_and_or) #ifdef cl_khr_int64_base_atomics -IMPL_GENERIC(long, l, __sync_fetch_and_or_8) -IMPL_GENERIC(unsigned long, m, __sync_fetch_and_or_8) +IMPL_GENERIC(long, __sync_fetch_and_or_8) +IMPL_GENERIC(unsigned long, __sync_fetch_and_or_8) #endif #endif //_CLC_GENERIC_AS_SUPPORTED diff --git a/libclc/libspirv/lib/generic/atomic/atomic_store.cl b/libclc/libspirv/lib/generic/atomic/atomic_store.cl index 1a929cbc693e0..00c732da06b3e 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_store.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_store.cl @@ -8,67 +8,57 @@ #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - -_CLC_DEF void -_Z19__spirv_AtomicStorePU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf( - volatile global float *p, enum Scope scope, - enum MemorySemanticsMask semantics, float val) { - _Z19__spirv_AtomicStorePU3AS1jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEj( - (volatile global uint *)p, scope, semantics, __clc_as_uint(val)); +_CLC_OVERLOAD _CLC_DECL void __spirv_AtomicStore(global float *p, int scope, + int semantics, float val) { + __spirv_AtomicStore((global uint *)p, scope, semantics, __clc_as_uint(val)); } -_CLC_DEF void -_Z19__spirv_AtomicStorePU3AS3fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf( - volatile local float *p, enum Scope scope, - enum MemorySemanticsMask semantics, float val) { - _Z19__spirv_AtomicStorePU3AS3jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEj( - (volatile local uint *)p, scope, semantics, __clc_as_uint(val)); +_CLC_OVERLOAD _CLC_DECL void __spirv_AtomicStore(local float *p, int scope, + int semantics, float val) { + __spirv_AtomicStore((local uint *)p, scope, semantics, __clc_as_uint(val)); } -#define FDECL(TYPE, PREFIX, AS, BYTE_SIZE, MEM_ORDER) \ -TYPE __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_##MEM_ORDER(volatile AS const TYPE *, TYPE); +#define FDECL(TYPE, PREFIX, AS, BYTE_SIZE, MEM_ORDER) \ + TYPE __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_##MEM_ORDER( \ + AS const TYPE *, TYPE); -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB, PREFIX, BYTE_SIZE) \ - FDECL(TYPE, PREFIX, AS, BYTE_SIZE, unordered) \ - FDECL(TYPE, PREFIX, AS, BYTE_SIZE, release) \ - FDECL(TYPE, PREFIX, AS, BYTE_SIZE, seq_cst) \ - _CLC_DEF void \ - _Z19__spirv_AtomicStoreP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - if (semantics == Release) { \ - __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_release(p, val); \ - } else if (semantics == SequentiallyConsistent) { \ - __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_seq_cst(p, val); \ - } else { \ - __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_unordered(p, val); \ - } \ +#define IMPL(TYPE, AS, PREFIX, BYTE_SIZE) \ + FDECL(TYPE, PREFIX, AS, BYTE_SIZE, unordered) \ + FDECL(TYPE, PREFIX, AS, BYTE_SIZE, release) \ + FDECL(TYPE, PREFIX, AS, BYTE_SIZE, seq_cst) \ + _CLC_OVERLOAD _CLC_DECL void __spirv_AtomicStore(AS TYPE *p, int scope, \ + int semantics, TYPE val) { \ + if (semantics == Release) { \ + __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_release(p, val); \ + } else if (semantics == SequentiallyConsistent) { \ + __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_seq_cst(p, val); \ + } else { \ + __clc__atomic_##PREFIX##store_##AS##_##BYTE_SIZE##_unordered(p, val); \ + } \ } -#define IMPL_AS(TYPE, TYPE_MANGLED, PREFIX, BYTE_SIZE) \ - IMPL(TYPE, TYPE_MANGLED, global, U3AS1, 1, PREFIX, BYTE_SIZE) \ - IMPL(TYPE, TYPE_MANGLED, local, U3AS3, 1, PREFIX, BYTE_SIZE) +#define IMPL_AS(TYPE, PREFIX, BYTE_SIZE) \ + IMPL(TYPE, global, PREFIX, BYTE_SIZE) \ + IMPL(TYPE, local, PREFIX, BYTE_SIZE) -IMPL_AS(int, i, , 4) -IMPL_AS(unsigned int, j, u, 4) +IMPL_AS(int, , 4) +IMPL_AS(unsigned int, u, 4) #ifdef cl_khr_int64_base_atomics -IMPL_AS(long, l, , 8) -IMPL_AS(unsigned long, m, u, 8) +IMPL_AS(long, , 8) +IMPL_AS(unsigned long, u, 8) #endif #if _CLC_GENERIC_AS_SUPPORTED -#define IMPL_GENERIC(TYPE, TYPE_MANGLED, PREFIX, BYTE_SIZE) \ - IMPL(TYPE, TYPE_MANGLED, , , 0, PREFIX, BYTE_SIZE) +#define IMPL_GENERIC(TYPE, PREFIX, BYTE_SIZE) IMPL(TYPE, , PREFIX, BYTE_SIZE) -IMPL_GENERIC(int, i, , 4) -IMPL_GENERIC(unsigned int, j, u, 4) +IMPL_GENERIC(int, , 4) +IMPL_GENERIC(unsigned int, u, 4) #ifdef cl_khr_int64_base_atomics -IMPL_GENERIC(long, l, , 8) -IMPL_GENERIC(unsigned long, m, u, 8) +IMPL_GENERIC(long, , 8) +IMPL_GENERIC(unsigned long, u, 8) #endif #endif //_CLC_GENERIC_AS_SUPPORTED diff --git a/libclc/libspirv/lib/generic/atomic/atomic_sub.cl b/libclc/libspirv/lib/generic/atomic/atomic_sub.cl index 9f0c3195555a9..42cdd416aa119 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_sub.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_sub.cl @@ -8,39 +8,34 @@ #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB, FN_NAME) \ - _CLC_DEF TYPE \ - _Z18__spirv_AtomicISubP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - return FN_NAME(p, val); \ +#define IMPL(TYPE, AS, FN_NAME) \ + _CLC_OVERLOAD _CLC_DECL TYPE __spirv_AtomicISub(AS TYPE *p, int scope, \ + int semantics, TYPE val) { \ + return FN_NAME(p, val); \ } -IMPL(int, i, global, U3AS1, 1, __sync_fetch_and_sub) -IMPL(unsigned int, j, global, U3AS1, 1, __sync_fetch_and_sub) -IMPL(int, i, local, U3AS3, 1, __sync_fetch_and_sub) -IMPL(unsigned int, j, local, U3AS3, 1, __sync_fetch_and_sub) +IMPL(int, global, __sync_fetch_and_sub) +IMPL(unsigned int, global, __sync_fetch_and_sub) +IMPL(int, local, __sync_fetch_and_sub) +IMPL(unsigned int, local, __sync_fetch_and_sub) #ifdef cl_khr_int64_base_atomics -IMPL(long, l, global, U3AS1, 1, __sync_fetch_and_sub_8) -IMPL(unsigned long, m, global, U3AS1, 1, __sync_fetch_and_sub_8) -IMPL(long, l, local, U3AS3, 1, __sync_fetch_and_sub_8) -IMPL(unsigned long, m, local, U3AS3, 1, __sync_fetch_and_sub_8) +IMPL(long, global, __sync_fetch_and_sub_8) +IMPL(unsigned long, global, __sync_fetch_and_sub_8) +IMPL(long, local, __sync_fetch_and_sub_8) +IMPL(unsigned long, local, __sync_fetch_and_sub_8) #endif #if _CLC_GENERIC_AS_SUPPORTED -#define IMPL_GENERIC(TYPE, TYPE_MANGLED, FN_NAME) \ - IMPL(TYPE, TYPE_MANGLED, , , 0, FN_NAME) +#define IMPL_GENERIC(TYPE, FN_NAME) IMPL(TYPE, , FN_NAME) -IMPL_GENERIC(int, i, __sync_fetch_and_sub) -IMPL_GENERIC(unsigned int, j, __sync_fetch_and_sub) +IMPL_GENERIC(int, __sync_fetch_and_sub) +IMPL_GENERIC(unsigned int, __sync_fetch_and_sub) #ifdef cl_khr_int64_base_atomics -IMPL_GENERIC(long, l, __sync_fetch_and_sub_8) -IMPL_GENERIC(unsigned long, m, __sync_fetch_and_sub_8) +IMPL_GENERIC(long, __sync_fetch_and_sub_8) +IMPL_GENERIC(unsigned long, __sync_fetch_and_sub_8) #endif #endif //_CLC_GENERIC_AS_SUPPORTED diff --git a/libclc/libspirv/lib/generic/atomic/atomic_xchg.cl b/libclc/libspirv/lib/generic/atomic/atomic_xchg.cl index 8565a2a0da872..f71d7d5596310 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_xchg.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_xchg.cl @@ -8,57 +8,46 @@ #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - -_CLC_DEF float -_Z22__spirv_AtomicExchangePU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf( - volatile global float *p, enum Scope scope, - enum MemorySemanticsMask semantics, float val) { - return __clc_as_float( - _Z22__spirv_AtomicExchangePU3AS1jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEj( - (volatile global uint *)p, scope, semantics, __clc_as_uint(val))); +_CLC_OVERLOAD _CLC_DEF float __spirv_AtomicExchange(global float *p, int scope, + int semantics, float val) { + return __clc_as_float(__spirv_AtomicExchange((global uint *)p, scope, + semantics, __clc_as_uint(val))); } -_CLC_DEF float -_Z22__spirv_AtomicExchangePU3AS3fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf( - volatile local float *p, enum Scope scope, - enum MemorySemanticsMask semantics, float val) { - return __clc_as_float( - _Z22__spirv_AtomicExchangePU3AS3jN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEj( - (volatile local uint *)p, scope, semantics, __clc_as_uint(val))); +_CLC_OVERLOAD _CLC_DEF float __spirv_AtomicExchange(local float *p, int scope, + int semantics, float val) { + return __clc_as_float(__spirv_AtomicExchange((local uint *)p, scope, + semantics, __clc_as_uint(val))); } -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB, FN_NAME) \ - _CLC_DEF TYPE \ - _Z22__spirv_AtomicExchangeP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - return FN_NAME(p, val); \ +#define IMPL(TYPE, AS, FN_NAME) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_AtomicExchange( \ + AS TYPE *p, int scope, int semantics, TYPE val) { \ + return FN_NAME(p, val); \ } -IMPL(int, i, global, U3AS1, 1, __sync_swap_4) -IMPL(unsigned int, j, global, U3AS1, 1, __sync_swap_4) -IMPL(int, i, local, U3AS3, 1, __sync_swap_4) -IMPL(unsigned int, j, local, U3AS3, 1, __sync_swap_4) +IMPL(int, global, __sync_swap_4) +IMPL(unsigned int, global, __sync_swap_4) +IMPL(int, local, __sync_swap_4) +IMPL(unsigned int, local, __sync_swap_4) #ifdef cl_khr_int64_base_atomics -IMPL(long, l, global, U3AS1, 1, __sync_swap_8) -IMPL(unsigned long, m, global, U3AS1, 1, __sync_swap_8) -IMPL(long, l, local, U3AS3, 1, __sync_swap_8) -IMPL(unsigned long, m, local, U3AS3, 1, __sync_swap_8) +IMPL(long, global, __sync_swap_8) +IMPL(unsigned long, global, __sync_swap_8) +IMPL(long, local, __sync_swap_8) +IMPL(unsigned long, local, __sync_swap_8) #endif #if _CLC_GENERIC_AS_SUPPORTED -#define IMPL_GENERIC(TYPE, TYPE_MANGLED, FN_NAME) \ - IMPL(TYPE, TYPE_MANGLED, , , 0, FN_NAME) +#define IMPL_GENERIC(TYPE, FN_NAME) IMPL(TYPE, , FN_NAME) -IMPL_GENERIC(int, i, __sync_swap_4) -IMPL_GENERIC(unsigned int, j, __sync_swap_4) +IMPL_GENERIC(int, __sync_swap_4) +IMPL_GENERIC(unsigned int, __sync_swap_4) #ifdef cl_khr_int64_base_atomics -IMPL_GENERIC(long, l, __sync_swap_8) -IMPL_GENERIC(unsigned long, m, __sync_swap_8) +IMPL_GENERIC(long, __sync_swap_8) +IMPL_GENERIC(unsigned long, __sync_swap_8) #endif #endif //_CLC_GENERIC_AS_SUPPORTED diff --git a/libclc/libspirv/lib/generic/atomic/atomic_xor.cl b/libclc/libspirv/lib/generic/atomic/atomic_xor.cl index 6e3df26b67281..860dcb189e1ff 100644 --- a/libclc/libspirv/lib/generic/atomic/atomic_xor.cl +++ b/libclc/libspirv/lib/generic/atomic/atomic_xor.cl @@ -8,39 +8,34 @@ #include -// TODO: Stop manually mangling this name. Need C++ namespaces to get the exact mangling. - -#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, SUB, FN_NAME) \ - _CLC_DEF TYPE \ - _Z17__spirv_AtomicXorP##AS_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagENS##SUB##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile AS TYPE *p, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - return FN_NAME(p, val); \ +#define IMPL(TYPE, AS, FN_NAME) \ + _CLC_OVERLOAD _CLC_DECL TYPE __spirv_AtomicXor(AS TYPE *p, int scope, \ + int semantics, TYPE val) { \ + return FN_NAME(p, val); \ } -IMPL(int, i, global, U3AS1, 1, __sync_fetch_and_xor) -IMPL(unsigned int, j, global, U3AS1, 1, __sync_fetch_and_xor) -IMPL(int, i, local, U3AS3, 1, __sync_fetch_and_xor) -IMPL(unsigned int, j, local, U3AS3, 1, __sync_fetch_and_xor) +IMPL(int, global, __sync_fetch_and_xor) +IMPL(unsigned int, global, __sync_fetch_and_xor) +IMPL(int, local, __sync_fetch_and_xor) +IMPL(unsigned int, local, __sync_fetch_and_xor) #ifdef cl_khr_int64_extended_atomics -IMPL(long, l, global, U3AS1, 1, __sync_fetch_and_xor_8) -IMPL(unsigned long, m, global, U3AS1, 1, __sync_fetch_and_xor_8) -IMPL(long, l, local, U3AS3, 1, __sync_fetch_and_xor_8) -IMPL(unsigned long, m, local, U3AS3, 1, __sync_fetch_and_xor_8) +IMPL(long, global, __sync_fetch_and_xor_8) +IMPL(unsigned long, global, __sync_fetch_and_xor_8) +IMPL(long, local, __sync_fetch_and_xor_8) +IMPL(unsigned long, local, __sync_fetch_and_xor_8) #endif #if _CLC_GENERIC_AS_SUPPORTED -#define IMPL_GENERIC(TYPE, TYPE_MANGLED, FN_NAME) \ - IMPL(TYPE, TYPE_MANGLED, , , 0, FN_NAME) +#define IMPL_GENERIC(TYPE, FN_NAME) IMPL(TYPE, , FN_NAME) -IMPL_GENERIC(int, i, __sync_fetch_and_xor) -IMPL_GENERIC(unsigned int, j, __sync_fetch_and_xor) +IMPL_GENERIC(int, __sync_fetch_and_xor) +IMPL_GENERIC(unsigned int, __sync_fetch_and_xor) #ifdef cl_khr_int64_base_atomics -IMPL_GENERIC(long, l, __sync_fetch_and_xor_8) -IMPL_GENERIC(unsigned long, m, __sync_fetch_and_xor_8) +IMPL_GENERIC(long, __sync_fetch_and_xor_8) +IMPL_GENERIC(unsigned long, __sync_fetch_and_xor_8) #endif #endif //_CLC_GENERIC_AS_SUPPORTED diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_add.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_add.cl index d64e7ca9a6954..2ecc9762bdf46 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_add.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_add.cl @@ -10,111 +10,96 @@ #include #include -__CLC_NVVM_ATOMIC(int, i, int, i, add, _Z18__spirv_AtomicIAdd) -__CLC_NVVM_ATOMIC(uint, j, int, i, add, _Z18__spirv_AtomicIAdd) -__CLC_NVVM_ATOMIC(long, l, long, l, add, _Z18__spirv_AtomicIAdd) -__CLC_NVVM_ATOMIC(ulong, m, long, l, add, _Z18__spirv_AtomicIAdd) +__CLC_NVVM_ATOMIC(int, int, i, add, __spirv_AtomicIAdd) +__CLC_NVVM_ATOMIC(uint, int, i, add, __spirv_AtomicIAdd) +__CLC_NVVM_ATOMIC(long, long, l, add, __spirv_AtomicIAdd) +__CLC_NVVM_ATOMIC(ulong, long, l, add, __spirv_AtomicIAdd) -__CLC_NVVM_ATOMIC(float, f, float, f, add, _Z21__spirv_AtomicFAddEXT) +__CLC_NVVM_ATOMIC(float, float, f, add, __spirv_AtomicFAddEXT) #ifdef cl_khr_int64_base_atomics -#define __CLC_NVVM_ATOMIC_ADD_DOUBLE_IMPL(ADDR_SPACE, ADDR_SPACE_MANGLED, \ - ADDR_SPACE_NV, SUBSTITUTION1, \ - SUBSTITUTION2, SUBSTITUTION3) \ - long \ - _Z18__spirv_AtomicLoadP##ADDR_SPACE_MANGLED##KlN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - volatile ADDR_SPACE const long *, enum Scope, \ - enum MemorySemanticsMask); \ - long \ - _Z29__spirv_AtomicCompareExchange##P##ADDR_SPACE_MANGLED##lN5__spv5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4FlagES##SUBSTITUTION2##_ll( \ - volatile ADDR_SPACE long *, enum Scope, enum MemorySemanticsMask, \ - enum MemorySemanticsMask, long, long); \ - __attribute__((always_inline)) _CLC_DECL double \ - _Z21__spirv_AtomicFAddEXT##P##ADDR_SPACE_MANGLED##d##N5__spv5Scope4FlagENS##SUBSTITUTION3##_19MemorySemanticsMask4FlagE##d( \ - volatile ADDR_SPACE double *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics, double value) { \ - /* Semantics mask may include memory order, storage class and other info \ -Memory order is stored in the lowest 5 bits */ \ - unsigned int order = semantics & 0x1F; \ - if (__clc_nvvm_reflect_arch() >= 600) { \ - switch (order) { \ - case None: \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ - ADDR_SPACE_NV, ) \ - break; \ - case Acquire: \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ - ADDR_SPACE_NV, _acquire) \ - } else { \ - __CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(double, double, d, add, \ - ADDR_SPACE, ADDR_SPACE_NV) \ - } \ - break; \ - case Release: \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ - ADDR_SPACE_NV, _release) \ - } else { \ - __spirv_MemoryBarrier(scope, Release); \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ - ADDR_SPACE_NV, ) \ - } \ - break; \ - case AcquireRelease: \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ - ADDR_SPACE_NV, _acq_rel) \ - } else { \ - __spirv_MemoryBarrier(scope, Release); \ - __CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(double, double, d, add, \ - ADDR_SPACE, ADDR_SPACE_NV) \ - } \ - break; \ - case SequentiallyConsistent: \ - if (__clc_nvvm_reflect_arch() >= 700) { \ - __CLC_NVVM_FENCE_SC_SM70() \ - __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ - ADDR_SPACE_NV, _acq_rel) \ - break; \ - } \ - } \ - __builtin_trap(); \ - __builtin_unreachable(); \ - } else { \ - enum MemorySemanticsMask load_order; \ - switch (semantics) { \ - case SequentiallyConsistent: \ - load_order = SequentiallyConsistent; \ - break; \ - case Acquire: \ - case AcquireRelease: \ - load_order = Acquire; \ - break; \ - default: \ - load_order = None; \ - } \ - volatile ADDR_SPACE long *pointer_int = \ - (volatile ADDR_SPACE long *)pointer; \ - long old_int; \ - long new_val_int; \ - do { \ - old_int = \ - _Z18__spirv_AtomicLoadP##ADDR_SPACE_MANGLED##KlN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - pointer_int, scope, load_order); \ - double new_val = *(double *)&old_int + *(double *)&value; \ - new_val_int = *(long *)&new_val; \ - } while ( \ - _Z29__spirv_AtomicCompareExchange##P##ADDR_SPACE_MANGLED##lN5__spv5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4FlagES##SUBSTITUTION2##_ll( \ - pointer_int, scope, semantics, semantics, new_val_int, \ - old_int) != old_int); \ - return *(double *)&old_int; \ - } \ +#define __CLC_NVVM_ATOMIC_ADD_DOUBLE_IMPL(ADDR_SPACE, ADDR_SPACE_NV) \ + __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL double \ + __spirv_AtomicFAddEXT(ADDR_SPACE double *pointer, int scope, int semantics, \ + double value) { \ + /* Semantics mask may include memory order, storage class and other info \ +Memory order is stored in the lowest 5 bits */ \ + unsigned int order = semantics & 0x1F; \ + if (__clc_nvvm_reflect_arch() >= 600) { \ + switch (order) { \ + case None: \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ + ADDR_SPACE_NV, ) \ + break; \ + case Acquire: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ + ADDR_SPACE_NV, _acquire) \ + } else { \ + __CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(double, double, d, add, \ + ADDR_SPACE, ADDR_SPACE_NV) \ + } \ + break; \ + case Release: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ + ADDR_SPACE_NV, _release) \ + } else { \ + __spirv_MemoryBarrier(scope, Release); \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ + ADDR_SPACE_NV, ) \ + } \ + break; \ + case AcquireRelease: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ + ADDR_SPACE_NV, _acq_rel) \ + } else { \ + __spirv_MemoryBarrier(scope, Release); \ + __CLC_NVVM_ATOMIC_IMPL_ACQUIRE_FENCE(double, double, d, add, \ + ADDR_SPACE, ADDR_SPACE_NV) \ + } \ + break; \ + case SequentiallyConsistent: \ + if (__clc_nvvm_reflect_arch() >= 700) { \ + __CLC_NVVM_FENCE_SC_SM70() \ + __CLC_NVVM_ATOMIC_IMPL_ORDER(double, double, d, add, ADDR_SPACE, \ + ADDR_SPACE_NV, _acq_rel) \ + break; \ + } \ + } \ + __builtin_trap(); \ + __builtin_unreachable(); \ + } else { \ + int load_order; \ + switch (semantics) { \ + case SequentiallyConsistent: \ + load_order = SequentiallyConsistent; \ + break; \ + case Acquire: \ + case AcquireRelease: \ + load_order = Acquire; \ + break; \ + default: \ + load_order = None; \ + } \ + ADDR_SPACE long *pointer_int = (ADDR_SPACE long *)pointer; \ + long old_int; \ + long new_val_int; \ + do { \ + old_int = __spirv_AtomicLoad(pointer_int, scope, load_order); \ + double new_val = *(double *)&old_int + *(double *)&value; \ + new_val_int = *(long *)&new_val; \ + } while (__spirv_AtomicCompareExchange(pointer_int, scope, semantics, \ + semantics, new_val_int, \ + old_int) != old_int); \ + return *(double *)&old_int; \ + } \ } -__CLC_NVVM_ATOMIC_ADD_DOUBLE_IMPL(, , _gen_, 0, 4, 0) -__CLC_NVVM_ATOMIC_ADD_DOUBLE_IMPL(__global, U3AS1, _global_, 1, 5, 1) -__CLC_NVVM_ATOMIC_ADD_DOUBLE_IMPL(__local, U3AS3, _shared_, 1, 5, 1) +__CLC_NVVM_ATOMIC_ADD_DOUBLE_IMPL(, _gen_) +__CLC_NVVM_ATOMIC_ADD_DOUBLE_IMPL(__global, _global_) +__CLC_NVVM_ATOMIC_ADD_DOUBLE_IMPL(__local, _shared_) #endif diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_and.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_and.cl index 07190607d04ec..552523a3322f1 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_and.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_and.cl @@ -10,10 +10,10 @@ #include #include -__CLC_NVVM_ATOMIC(int, i, int, i, and, _Z17__spirv_AtomicAnd) -__CLC_NVVM_ATOMIC(long, l, long, l, and, _Z17__spirv_AtomicAnd) -__CLC_NVVM_ATOMIC(unsigned int, j, int, i, and, _Z17__spirv_AtomicAnd) -__CLC_NVVM_ATOMIC(unsigned long, m, long, l, and, _Z17__spirv_AtomicAnd) +__CLC_NVVM_ATOMIC(int, int, i, and, __spirv_AtomicAnd) +__CLC_NVVM_ATOMIC(long, long, l, and, __spirv_AtomicAnd) +__CLC_NVVM_ATOMIC(unsigned int, int, i, and, __spirv_AtomicAnd) +__CLC_NVVM_ATOMIC(unsigned long, long, l, and, __spirv_AtomicAnd) #undef __CLC_NVVM_ATOMIC_TYPES #undef __CLC_NVVM_ATOMIC diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_cmpxchg.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_cmpxchg.cl index 874a3eb525ac9..ab8f06a4acbe5 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_cmpxchg.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_cmpxchg.cl @@ -72,21 +72,14 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(int, int); } \ } -// Type __spirv_AtomicCompareExchange(AS Type *P, __spv::Scope::Flag S, -// __spv::MemorySemanticsMask::Flag E, -// __spv::MemorySemanticsMask::Flag U, +// Type __spirv_AtomicCompareExchange(AS Type *P, int S, int E, int U, // Type V, Type C); -#define __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, \ - TYPE_MANGLED_NV, OP, OP_MANGLED, \ - ADDR_SPACE, POINTER_AND_ADDR_SPACE_MANGLED, \ - ADDR_SPACE_NV, SUBSTITUTION1, SUBSTITUTION2) \ - __attribute__((always_inline)) _CLC_DECL TYPE _Z29__spirv_\ -Atomic##OP_MANGLED##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5\ -__spv5Scope4FlagENS##SUBSTITUTION1##_19Memory\ -SemanticsMask4FlagES##SUBSTITUTION2##_##TYPE_MANGLED##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics1, \ - enum MemorySemanticsMask semantics2, TYPE cmp, TYPE value) { \ +#define __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, \ + OP_MANGLED, ADDR_SPACE, ADDR_SPACE_NV) \ + __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL TYPE \ + __spirv_Atomic##OP_MANGLED(ADDR_SPACE TYPE *pointer, int scope, \ + int semantics1, int semantics2, TYPE cmp, \ + TYPE value) { \ /* Semantics mask may include memory order, storage class and other info \ Memory order is stored in the lowest 5 bits */ \ unsigned int order = semantics1 & 0x1F; \ @@ -135,21 +128,20 @@ Memory order is stored in the lowest 5 bits */ \ __builtin_unreachable(); \ } -#define __CLC_NVVM_ATOMIC_CAS(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ - OP, OP_MANGLED) \ - __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \ - OP_MANGLED, __global, PU3AS1, _global_, 1, 5) \ - __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \ - OP_MANGLED, __local, PU3AS3, _shared_, 1, 5) \ - __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \ - OP_MANGLED, , P, _gen_, 0, 4) +#define __CLC_NVVM_ATOMIC_CAS(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, OP_MANGLED) \ + __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, OP_MANGLED, \ + __global, _global_) \ + __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, OP_MANGLED, \ + __local, _shared_) \ + __CLC_NVVM_ATOMIC_CAS_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, OP_MANGLED, , \ + _gen_) -__CLC_NVVM_ATOMIC_CAS(int, i, int, i, cas, CompareExchange) -__CLC_NVVM_ATOMIC_CAS(long, l, long, l, cas, CompareExchange) -__CLC_NVVM_ATOMIC_CAS(unsigned int, j, int, i, cas, CompareExchange) -__CLC_NVVM_ATOMIC_CAS(unsigned long, m, long, l, cas, CompareExchange) -__CLC_NVVM_ATOMIC_CAS(float, f, float, f, cas, CompareExchange) -__CLC_NVVM_ATOMIC_CAS(double, d, double, d, cas, CompareExchange) +__CLC_NVVM_ATOMIC_CAS(int, int, i, cas, CompareExchange) +__CLC_NVVM_ATOMIC_CAS(long, long, l, cas, CompareExchange) +__CLC_NVVM_ATOMIC_CAS(unsigned int, int, i, cas, CompareExchange) +__CLC_NVVM_ATOMIC_CAS(unsigned long, long, l, cas, CompareExchange) +__CLC_NVVM_ATOMIC_CAS(float, float, f, cas, CompareExchange) +__CLC_NVVM_ATOMIC_CAS(double, double, d, cas, CompareExchange) #undef __CLC_NVVM_ATOMIC_CAS_IMPL_ORDER #undef __CLC_NVVM_ATOMIC_CAS diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_dec.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_dec.cl index c3e40c9496856..aed29d765cde3 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_dec.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_dec.cl @@ -10,8 +10,8 @@ #include #include -__CLC_NVVM_ATOMIC_INCDEC(unsigned int, j, IDecrement, -1) -__CLC_NVVM_ATOMIC_INCDEC(unsigned long, m, IDecrement, -1) +__CLC_NVVM_ATOMIC_INCDEC(unsigned int, IDecrement, -1) +__CLC_NVVM_ATOMIC_INCDEC(unsigned long, IDecrement, -1) #undef __CLC_NVVM_ATOMIC_INCDEC_IMPL #undef __CLC_NVVM_ATOMIC_INCDEC diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_helpers.h b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_helpers.h index 76365394f3f39..b9ef9c0d846c2 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_helpers.h +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_helpers.h @@ -83,14 +83,10 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(int, int); __asm__ __volatile__("fence.sc.cta;"); \ } -#define __CLC_NVVM_ATOMIC_IMPL( \ - TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, NAME_MANGLED, \ - ADDR_SPACE, POINTER_AND_ADDR_SPACE_MANGLED, ADDR_SPACE_NV, SUBSTITUTION) \ - __attribute__((always_inline)) _CLC_DECL TYPE \ - NAME_MANGLED##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv\ -5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE value) { \ +#define __CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, NAME, \ + ADDR_SPACE, ADDR_SPACE_NV) \ + __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL TYPE NAME( \ + ADDR_SPACE TYPE *pointer, int scope, int semantics, TYPE value) { \ /* Semantics mask may include memory order, storage class and other info \ Memory order is stored in the lowest 5 bits */ \ unsigned int order = semantics & 0x1F; \ @@ -140,12 +136,10 @@ Memory order is stored in the lowest 5 bits */ \ __builtin_unreachable(); \ } -#define __CLC_NVVM_ATOMIC(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \ - NAME_MANGLED) \ - __CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \ - NAME_MANGLED, __global, PU3AS1, _global_, 1) \ - __CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \ - NAME_MANGLED, __local, PU3AS3, _shared_, 1) \ - __CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, OP, \ - NAME_MANGLED, , P, _gen_, 0) +#define __CLC_NVVM_ATOMIC(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, NAME) \ + __CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, NAME, __global, \ + _global_) \ + __CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, NAME, __local, \ + _shared_) \ + __CLC_NVVM_ATOMIC_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, OP, NAME, , _gen_) #endif diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_inc.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_inc.cl index adabb17d6698f..e92da76642c79 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_inc.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_inc.cl @@ -10,8 +10,8 @@ #include #include -__CLC_NVVM_ATOMIC_INCDEC(unsigned int, j, IIncrement, 1) -__CLC_NVVM_ATOMIC_INCDEC(unsigned long, m, IIncrement, 1) +__CLC_NVVM_ATOMIC_INCDEC(unsigned int, IIncrement, 1) +__CLC_NVVM_ATOMIC_INCDEC(unsigned long, IIncrement, 1) #undef __CLC_NVVM_ATOMIC_TYPES #undef __CLC_NVVM_ATOMIC diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_inc_dec_helpers.h b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_inc_dec_helpers.h index 8710d59e24f63..478d895c90ed7 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_inc_dec_helpers.h +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_inc_dec_helpers.h @@ -12,29 +12,16 @@ #include #include -#define __CLC_NVVM_ATOMIC_INCDEC_IMPL( \ - TYPE, TYPE_MANGLED, OP_MANGLED, VAL, ADDR_SPACE, \ - POINTER_AND_ADDR_SPACE_MANGLED, SUBSTITUTION) \ - TYPE _Z21__spirv_\ -AtomicIAddEXT##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv\ -5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *, enum Scope, enum MemorySemanticsMask, TYPE); \ - __attribute__((always_inline)) _CLC_DECL TYPE _Z24__spirv_\ -Atomic##OP_MANGLED##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv\ -5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics) { \ - return _Z21__spirv_\ -AtomicIAddEXT##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv\ -5Scope4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - pointer, scope, semantics, VAL); \ +#define __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, OP_MANGLED, VAL, ADDR_SPACE) \ + __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL TYPE \ + __spirv_Atomic##OP_MANGLED(ADDR_SPACE TYPE *pointer, int scope, \ + int semantics) { \ + return __spirv_AtomicIAdd(pointer, scope, semantics, VAL); \ } -#define __CLC_NVVM_ATOMIC_INCDEC(TYPE, TYPE_MANGLED, OP_MANGLED, VAL) \ - __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, VAL, __global, \ - PU3AS1, 1) \ - __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, VAL, __local, \ - PU3AS3, 1) \ - __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, VAL, , P, 0) +#define __CLC_NVVM_ATOMIC_INCDEC(TYPE, OP_MANGLED, VAL) \ + __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, OP_MANGLED, VAL, __global) \ + __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, OP_MANGLED, VAL, __local) \ + __CLC_NVVM_ATOMIC_INCDEC_IMPL(TYPE, OP_MANGLED, VAL, ) #endif diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_load.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_load.cl index 1ee0ce8b8d60f..da23ef23d3635 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_load.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_load.cl @@ -36,14 +36,10 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(int, int); } \ } -#define __CLC_NVVM_ATOMIC_LOAD_IMPL( \ - TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, ADDR_SPACE, \ - POINTER_AND_ADDR_SPACE_MANGLED, ADDR_SPACE_NV) \ - __attribute__((always_inline)) _CLC_DECL TYPE _Z18__spirv_\ -AtomicLoad##POINTER_AND_ADDR_SPACE_MANGLED##K##TYPE_MANGLED##N5__spv5\ -Scope4FlagENS1_19MemorySemanticsMask4FlagE( \ - const volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics) { \ +#define __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ + ADDR_SPACE, ADDR_SPACE_NV) \ + __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL TYPE \ + __spirv_AtomicLoad(ADDR_SPACE TYPE *pointer, int scope, int semantics) { \ /* Semantics mask may include memory order, storage class and other info \ Memory order is stored in the lowest 5 bits */ \ unsigned int order = semantics & 0x1F; \ @@ -78,22 +74,21 @@ Memory order is stored in the lowest 5 bits */ \ __builtin_unreachable(); \ } -#define __CLC_NVVM_ATOMIC_LOAD(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV) \ - __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ - __global, PU3AS1, _global_) \ - __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, \ - __local, PU3AS3, _shared_) \ - __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV, , \ - P, _gen_) +#define __CLC_NVVM_ATOMIC_LOAD(TYPE, TYPE_NV, TYPE_MANGLED_NV) \ + __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, __global, \ + _global_) \ + __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, __local, \ + _shared_) \ + __CLC_NVVM_ATOMIC_LOAD_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, , _gen_) -__CLC_NVVM_ATOMIC_LOAD(int, i, int, i) -__CLC_NVVM_ATOMIC_LOAD(uint, j, int, i) -__CLC_NVVM_ATOMIC_LOAD(long, l, long, l) -__CLC_NVVM_ATOMIC_LOAD(ulong, m, long, l) +__CLC_NVVM_ATOMIC_LOAD(int, int, i) +__CLC_NVVM_ATOMIC_LOAD(uint, int, i) +__CLC_NVVM_ATOMIC_LOAD(long, long, l) +__CLC_NVVM_ATOMIC_LOAD(ulong, long, l) -__CLC_NVVM_ATOMIC_LOAD(float, f, float, f) +__CLC_NVVM_ATOMIC_LOAD(float, float, f) #ifdef cl_khr_int64_base_atomics -__CLC_NVVM_ATOMIC_LOAD(double, d, double, d) +__CLC_NVVM_ATOMIC_LOAD(double, double, d) #endif #undef __CLC_NVVM_ATOMIC_LOAD_TYPES diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_max.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_max.cl index d35a08a7baa2e..f67a257965bb1 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_max.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_max.cl @@ -10,38 +10,20 @@ #include #include -__CLC_NVVM_ATOMIC(int, i, int, i, max, _Z18__spirv_AtomicSMax) -__CLC_NVVM_ATOMIC(long, l, long, l, max, _Z18__spirv_AtomicSMax) -__CLC_NVVM_ATOMIC(unsigned int, j, unsigned int, ui, max, - _Z18__spirv_AtomicUMax) -__CLC_NVVM_ATOMIC(unsigned long, m, unsigned long, ul, max, - _Z18__spirv_AtomicUMax) +__CLC_NVVM_ATOMIC(int, int, i, max, __spirv_AtomicSMax) +__CLC_NVVM_ATOMIC(long, long, l, max, __spirv_AtomicSMax) +__CLC_NVVM_ATOMIC(unsigned int, unsigned int, ui, max, __spirv_AtomicUMax) +__CLC_NVVM_ATOMIC(unsigned long, unsigned long, ul, max, __spirv_AtomicUMax) #undef __CLC_NVVM_ATOMIC_TYPES #undef __CLC_NVVM_ATOMIC #undef __CLC_NVVM_ATOMIC_IMPL -#define __CLC_NVVM_ATOMIC_MAX_IMPL( \ - TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, OP_MANGLED, ADDR_SPACE, \ - POINTER_AND_ADDR_SPACE_MANGLED, SUBSTITUTION1, SUBSTITUTION2) \ - TYPE_INT \ - _Z18__spirv_\ -AtomicLoad##POINTER_AND_ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4\ -FlagENS1_19MemorySemanticsMask4FlagE(volatile ADDR_SPACE const TYPE_INT *, \ - enum Scope, enum MemorySemanticsMask); \ - TYPE_INT \ - _Z29__spirv_\ -AtomicCompareExchange##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__sp\ -v5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask\ -4FlagES##SUBSTITUTION2##_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ - volatile ADDR_SPACE TYPE_INT *, enum Scope, enum MemorySemanticsMask, \ - enum MemorySemanticsMask, TYPE_INT, TYPE_INT); \ - __attribute__((always_inline)) _CLC_DECL TYPE _Z21__spirv_\ -Atomic##OP_MANGLED##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope\ -4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - enum MemorySemanticsMask load_order; \ +#define __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_INT, OP_MANGLED, ADDR_SPACE) \ + __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL TYPE \ + __spirv_Atomic##OP_MANGLED(ADDR_SPACE TYPE *pointer, int scope, \ + int semantics, TYPE val) { \ + int load_order; \ switch (semantics) { \ case SequentiallyConsistent: \ load_order = SequentiallyConsistent; \ @@ -53,18 +35,12 @@ Atomic##OP_MANGLED##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope\ default: \ load_order = None; \ } \ - volatile ADDR_SPACE TYPE_INT *pointer_int = \ - (volatile ADDR_SPACE TYPE_INT *)pointer; \ + ADDR_SPACE TYPE_INT *pointer_int = (ADDR_SPACE TYPE_INT *)pointer; \ TYPE_INT val_int = *(TYPE_INT *)&val; \ - TYPE_INT old_int = _Z18__spirv_\ -AtomicLoad##POINTER_AND_ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4\ -FlagENS1_19MemorySemanticsMask4FlagE(pointer_int, scope, load_order); \ + TYPE_INT old_int = __spirv_AtomicLoad(pointer_int, scope, load_order); \ TYPE old = *(TYPE *)&old_int; \ while (val > old) { \ - TYPE_INT tmp_int = _Z29__spirv_\ -AtomicCompareExchange##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__sp\ -v5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask\ -4FlagES##SUBSTITUTION2##_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ + TYPE_INT tmp_int = __spirv_AtomicCompareExchange( \ pointer_int, scope, semantics, semantics, val_int, old_int); \ if (old_int == tmp_int) { \ return *(TYPE *)&tmp_int; \ @@ -75,14 +51,10 @@ v5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask\ return old; \ } -#define __CLC_NVVM_ATOMIC_MAX(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ - OP_MANGLED) \ - __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ - OP_MANGLED, __global, PU3AS1, 1, 5) \ - __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ - OP_MANGLED, __local, PU3AS3, 1, 5) \ - __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ - OP_MANGLED, , P, 0, 4) +#define __CLC_NVVM_ATOMIC_MAX(TYPE, TYPE_INT, OP_MANGLED) \ + __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_INT, OP_MANGLED, __global) \ + __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_INT, OP_MANGLED, __local) \ + __CLC_NVVM_ATOMIC_MAX_IMPL(TYPE, TYPE_INT, OP_MANGLED, ) -__CLC_NVVM_ATOMIC_MAX(float, f, int, i, FMaxEXT) -__CLC_NVVM_ATOMIC_MAX(double, d, long, l, FMaxEXT) +__CLC_NVVM_ATOMIC_MAX(float, int, FMaxEXT) +__CLC_NVVM_ATOMIC_MAX(double, long, FMaxEXT) diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_min.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_min.cl index ccb38e922742d..60fe3d0329c65 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_min.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_min.cl @@ -10,36 +10,20 @@ #include #include -__CLC_NVVM_ATOMIC(int, i, int, i, min, _Z18__spirv_AtomicSMin) -__CLC_NVVM_ATOMIC(long, l, long, l, min, _Z18__spirv_AtomicSMin) -__CLC_NVVM_ATOMIC(uint, j, uint, ui, min, _Z18__spirv_AtomicUMin) -__CLC_NVVM_ATOMIC(ulong, m, ulong, ul, min, _Z18__spirv_AtomicUMin) +__CLC_NVVM_ATOMIC(int, int, i, min, __spirv_AtomicSMin) +__CLC_NVVM_ATOMIC(long, long, l, min, __spirv_AtomicSMin) +__CLC_NVVM_ATOMIC(uint, uint, ui, min, __spirv_AtomicUMin) +__CLC_NVVM_ATOMIC(ulong, ulong, ul, min, __spirv_AtomicUMin) #undef __CLC_NVVM_ATOMIC_TYPES #undef __CLC_NVVM_ATOMIC #undef __CLC_NVVM_ATOMIC_IMPL -#define __CLC_NVVM_ATOMIC_MIN_IMPL( \ - TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, OP_MANGLED, ADDR_SPACE, \ - POINTER_AND_ADDR_SPACE_MANGLED, SUBSTITUTION1, SUBSTITUTION2) \ - TYPE_INT \ - _Z18__spirv_\ -AtomicLoad##POINTER_AND_ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4\ -FlagENS1_19MemorySemanticsMask4FlagE(volatile ADDR_SPACE const TYPE_INT *, \ - enum Scope, enum MemorySemanticsMask); \ - TYPE_INT \ - _Z29__spirv_\ -AtomicCompareExchange##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__sp\ -v5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4\ -FlagES##SUBSTITUTION2##_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ - volatile ADDR_SPACE TYPE_INT *, enum Scope, enum MemorySemanticsMask, \ - enum MemorySemanticsMask, TYPE_INT, TYPE_INT); \ - __attribute__((always_inline)) _CLC_DECL TYPE _Z21__spirv_\ -Atomic##OP_MANGLED##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope\ -4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - enum MemorySemanticsMask load_order; \ +#define __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_INT, OP_MANGLED, ADDR_SPACE) \ + __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL TYPE \ + __spirv_Atomic##OP_MANGLED(ADDR_SPACE TYPE *pointer, int scope, \ + int semantics, TYPE val) { \ + int load_order; \ switch (semantics) { \ case SequentiallyConsistent: \ load_order = SequentiallyConsistent; \ @@ -51,18 +35,12 @@ Atomic##OP_MANGLED##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope\ default: \ load_order = None; \ } \ - volatile ADDR_SPACE TYPE_INT *pointer_int = \ - (volatile ADDR_SPACE TYPE_INT *)pointer; \ + ADDR_SPACE TYPE_INT *pointer_int = (ADDR_SPACE TYPE_INT *)pointer; \ TYPE_INT val_int = *(TYPE_INT *)&val; \ - TYPE_INT old_int = _Z18__spirv_\ -AtomicLoad##POINTER_AND_ADDR_SPACE_MANGLED##K##TYPE_INT_MANGLED##N5__spv5Scope4\ -FlagENS1_19MemorySemanticsMask4FlagE(pointer_int, scope, load_order); \ + TYPE_INT old_int = __spirv_AtomicLoad(pointer_int, scope, load_order); \ TYPE old = *(TYPE *)&old_int; \ while (val < old) { \ - TYPE_INT tmp_int = _Z29__spirv_\ -AtomicCompareExchange##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_INT_MANGLED##N5__sp\ -v5Scope4FlagENS##SUBSTITUTION1##_19MemorySemanticsMask4\ -FlagES##SUBSTITUTION2##_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ + TYPE_INT tmp_int = __spirv_AtomicCompareExchange( \ pointer_int, scope, semantics, semantics, val_int, old_int); \ if (old_int == tmp_int) { \ return *(TYPE *)&tmp_int; \ @@ -73,14 +51,10 @@ FlagES##SUBSTITUTION2##_##TYPE_INT_MANGLED##TYPE_INT_MANGLED( \ return old; \ } -#define __CLC_NVVM_ATOMIC_MIN(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ - OP_MANGLED) \ - __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ - OP_MANGLED, __global, PU3AS1, 1, 5) \ - __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ - OP_MANGLED, __local, PU3AS3, 1, 5) \ - __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_MANGLED, TYPE_INT, TYPE_INT_MANGLED, \ - OP_MANGLED, , P, 0, 4) +#define __CLC_NVVM_ATOMIC_MIN(TYPE, TYPE_INT, OP_MANGLED) \ + __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_INT, OP_MANGLED, __global) \ + __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_INT, OP_MANGLED, __local) \ + __CLC_NVVM_ATOMIC_MIN_IMPL(TYPE, TYPE_INT, OP_MANGLED, ) -__CLC_NVVM_ATOMIC_MIN(float, f, int, i, FMinEXT) -__CLC_NVVM_ATOMIC_MIN(double, d, long, l, FMinEXT) +__CLC_NVVM_ATOMIC_MIN(float, int, FMinEXT) +__CLC_NVVM_ATOMIC_MIN(double, long, FMinEXT) diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_or.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_or.cl index c5ade3cc281c0..780fdd529e361 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_or.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_or.cl @@ -10,10 +10,10 @@ #include #include -__CLC_NVVM_ATOMIC(int, i, int, i, or, _Z16__spirv_AtomicOr) -__CLC_NVVM_ATOMIC(long, l, long, l, or, _Z16__spirv_AtomicOr) -__CLC_NVVM_ATOMIC(unsigned int, j, int, i, or, _Z16__spirv_AtomicOr) -__CLC_NVVM_ATOMIC(unsigned long, m, long, l, or, _Z16__spirv_AtomicOr) +__CLC_NVVM_ATOMIC(int, int, i, or, __spirv_AtomicOr) +__CLC_NVVM_ATOMIC(long, long, l, or, __spirv_AtomicOr) +__CLC_NVVM_ATOMIC(unsigned int, int, i, or, __spirv_AtomicOr) +__CLC_NVVM_ATOMIC(unsigned long, long, l, or, __spirv_AtomicOr) #undef __CLC_NVVM_ATOMIC_TYPES #undef __CLC_NVVM_ATOMIC diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_store.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_store.cl index 55e01844dd603..3382366a38ade 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_store.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_store.cl @@ -36,14 +36,11 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(int, int); } \ } -#define __CLC_NVVM_ATOMIC_STORE_IMPL( \ - TYPE, TYPE_MANGLED, SUBSTITUTION, TYPE_NV, TYPE_MANGLED_NV, ADDR_SPACE, \ - POINTER_AND_ADDR_SPACE_MANGLED, ADDR_SPACE_NV) \ - __attribute__((always_inline)) _CLC_DECL void _Z19__spirv_\ -AtomicStore##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagEN\ -S##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE value) { \ +#define __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, \ + ADDR_SPACE, ADDR_SPACE_NV) \ + __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL void \ + __spirv_AtomicStore(ADDR_SPACE TYPE *pointer, int scope, int semantics, \ + TYPE value) { \ /* Semantics mask may include memory order, storage class and other info \ Memory order is stored in the lowest 5 bits */ \ unsigned int order = semantics & 0x1F; \ @@ -82,22 +79,21 @@ Memory order is stored in the lowest 5 bits */ \ __builtin_unreachable(); \ } -#define __CLC_NVVM_ATOMIC_STORE(TYPE, TYPE_MANGLED, TYPE_NV, TYPE_MANGLED_NV) \ - __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, 1, TYPE_NV, TYPE_MANGLED_NV,\ - __global, PU3AS1, _global_) \ - __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, 1, TYPE_NV, TYPE_MANGLED_NV,\ - __local, PU3AS3, _shared_) \ - __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_MANGLED, 0, TYPE_NV, TYPE_MANGLED_NV,\ - , P, _gen_) +#define __CLC_NVVM_ATOMIC_STORE(TYPE, TYPE_NV, TYPE_MANGLED_NV) \ + __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, __global, \ + _global_) \ + __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, __local, \ + _shared_) \ + __CLC_NVVM_ATOMIC_STORE_IMPL(TYPE, TYPE_NV, TYPE_MANGLED_NV, , _gen_) -__CLC_NVVM_ATOMIC_STORE(int, i, int, i) -__CLC_NVVM_ATOMIC_STORE(uint, j, int, i) -__CLC_NVVM_ATOMIC_STORE(long, l, long, l) -__CLC_NVVM_ATOMIC_STORE(ulong, m, long, l) +__CLC_NVVM_ATOMIC_STORE(int, int, i) +__CLC_NVVM_ATOMIC_STORE(uint, int, i) +__CLC_NVVM_ATOMIC_STORE(long, long, l) +__CLC_NVVM_ATOMIC_STORE(ulong, long, l) -__CLC_NVVM_ATOMIC_STORE(float, f, float, f) +__CLC_NVVM_ATOMIC_STORE(float, float, f) #ifdef cl_khr_int64_base_atomics -__CLC_NVVM_ATOMIC_STORE(double, d, double, d) +__CLC_NVVM_ATOMIC_STORE(double, double, d) #endif #undef __CLC_NVVM_ATOMIC_STORE_TYPES diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_sub.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_sub.cl index 256d600585e47..1aa10828bee28 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_sub.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_sub.cl @@ -9,35 +9,24 @@ #include #include -#define __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, ADDR_SPACE, \ - POINTER_AND_ADDR_SPACE_MANGLED, \ - SUBSTITUTION) \ - TYPE _Z18__spirv_\ -AtomicIAdd##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagEN\ -S##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *, enum Scope, enum MemorySemanticsMask, TYPE); \ - __attribute__((always_inline)) _CLC_DECL TYPE _Z18__spirv_\ -Atomic##OP_MANGLED##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope\ -4FlagENS##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED( \ - volatile ADDR_SPACE TYPE *pointer, enum Scope scope, \ - enum MemorySemanticsMask semantics, TYPE val) { \ - return _Z18__spirv_\ -AtomicIAdd##POINTER_AND_ADDR_SPACE_MANGLED##TYPE_MANGLED##N5__spv5Scope4FlagEN\ -S##SUBSTITUTION##_19MemorySemanticsMask4FlagE##TYPE_MANGLED(pointer, scope, \ - semantics, -val); \ +#define __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, OP_MANGLED, ADDR_SPACE) \ + _CLC_OVERLOAD _CLC_DECL TYPE __spirv_AtomicIAdd(ADDR_SPACE TYPE *, int, int, \ + TYPE); \ + __attribute__((always_inline)) _CLC_OVERLOAD _CLC_DECL TYPE \ + __spirv_Atomic##OP_MANGLED(ADDR_SPACE TYPE *pointer, int scope, \ + int semantics, TYPE val) { \ + return __spirv_AtomicIAdd(pointer, scope, semantics, -val); \ } -#define __CLC_NVVM_ATOMIC_SUB(TYPE, TYPE_MANGLED, OP_MANGLED) \ - __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, __global, PU3AS1, \ - 1) \ - __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, __local, PU3AS3, \ - 1) \ - __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, TYPE_MANGLED, OP_MANGLED, , P, 0) +#define __CLC_NVVM_ATOMIC_SUB(TYPE, OP_MANGLED) \ + __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, OP_MANGLED, __global) \ + __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, OP_MANGLED, __local) \ + __CLC_NVVM_ATOMIC_SUB_IMPL(TYPE, OP_MANGLED, ) -__CLC_NVVM_ATOMIC_SUB(int, i, ISub) -__CLC_NVVM_ATOMIC_SUB(unsigned int, j, ISub) -__CLC_NVVM_ATOMIC_SUB(long, l, ISub) -__CLC_NVVM_ATOMIC_SUB(unsigned long, m, ISub) +__CLC_NVVM_ATOMIC_SUB(int, ISub) +__CLC_NVVM_ATOMIC_SUB(unsigned int, ISub) +__CLC_NVVM_ATOMIC_SUB(long, ISub) +__CLC_NVVM_ATOMIC_SUB(unsigned long, ISub) #undef __CLC_NVVM_ATOMIC_SUB_IMPL #undef __CLC_NVVM_ATOMIC_SUB diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_xchg.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_xchg.cl index 34ea69b4ecb4c..8fe85d1175bfd 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_xchg.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_xchg.cl @@ -10,12 +10,12 @@ #include #include -__CLC_NVVM_ATOMIC(int, i, int, i, xchg, _Z22__spirv_AtomicExchange) -__CLC_NVVM_ATOMIC(long, l, long, l, xchg, _Z22__spirv_AtomicExchange) -__CLC_NVVM_ATOMIC(unsigned int, j, int, i, xchg, _Z22__spirv_AtomicExchange) -__CLC_NVVM_ATOMIC(unsigned long, m, long, l, xchg, _Z22__spirv_AtomicExchange) -__CLC_NVVM_ATOMIC(float, f, float, f, xchg, _Z22__spirv_AtomicExchange) -__CLC_NVVM_ATOMIC(double, d, double, d, xchg, _Z22__spirv_AtomicExchange) +__CLC_NVVM_ATOMIC(int, int, i, xchg, __spirv_AtomicExchange) +__CLC_NVVM_ATOMIC(long, long, l, xchg, __spirv_AtomicExchange) +__CLC_NVVM_ATOMIC(unsigned int, int, i, xchg, __spirv_AtomicExchange) +__CLC_NVVM_ATOMIC(unsigned long, long, l, xchg, __spirv_AtomicExchange) +__CLC_NVVM_ATOMIC(float, float, f, xchg, __spirv_AtomicExchange) +__CLC_NVVM_ATOMIC(double, double, d, xchg, __spirv_AtomicExchange) #undef __CLC_NVVM_ATOMIC_TYPES #undef __CLC_NVVM_ATOMIC diff --git a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_xor.cl b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_xor.cl index 1e3e65d1f41e0..f5fc4961db8da 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_xor.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/atomic/atomic_xor.cl @@ -10,10 +10,10 @@ #include #include -__CLC_NVVM_ATOMIC(int, i, int, i, xor, _Z17__spirv_AtomicXor) -__CLC_NVVM_ATOMIC(long, l, long, l, xor, _Z17__spirv_AtomicXor) -__CLC_NVVM_ATOMIC(unsigned int, j, int, i, xor, _Z17__spirv_AtomicXor) -__CLC_NVVM_ATOMIC(unsigned long, m, long, l, xor, _Z17__spirv_AtomicXor) +__CLC_NVVM_ATOMIC(int, int, i, xor, __spirv_AtomicXor) +__CLC_NVVM_ATOMIC(long, long, l, xor, __spirv_AtomicXor) +__CLC_NVVM_ATOMIC(unsigned int, int, i, xor, __spirv_AtomicXor) +__CLC_NVVM_ATOMIC(unsigned long, long, l, xor, __spirv_AtomicXor) #undef __CLC_NVVM_ATOMIC_TYPES #undef __CLC_NVVM_ATOMIC diff --git a/libdevice/atomic.hpp b/libdevice/atomic.hpp index fd2148875bc39..ea3e725ebe48d 100644 --- a/libdevice/atomic.hpp +++ b/libdevice/atomic.hpp @@ -57,32 +57,23 @@ struct MemorySemanticsMask { }; } // namespace __spv -extern DEVICE_EXTERNAL int -__spirv_AtomicCompareExchange(int SPIR_GLOBAL *, __spv::Scope::Flag, - __spv::MemorySemanticsMask::Flag, - __spv::MemorySemanticsMask::Flag, int, int); - -extern DEVICE_EXTERNAL int -__spirv_AtomicCompareExchange(int *, __spv::Scope::Flag, - __spv::MemorySemanticsMask::Flag, - __spv::MemorySemanticsMask::Flag, int, int); - -extern DEVICE_EXTERNAL int __spirv_AtomicLoad(const int SPIR_GLOBAL *, - __spv::Scope::Flag, - __spv::MemorySemanticsMask::Flag); - -extern DEVICE_EXTERNAL void -__spirv_AtomicStore(int SPIR_GLOBAL *, __spv::Scope::Flag, - __spv::MemorySemanticsMask::Flag, int); - -extern DEVICE_EXTERNAL void -__spirv_AtomicStore(int *, __spv::Scope::Flag, __spv::MemorySemanticsMask::Flag, - int); - -extern DEVICE_EXTERNAL int __spirv_AtomicIAdd(SPIR_GLOBAL int *, - __spv::Scope::Flag, - __spv::MemorySemanticsMask::Flag, - int); +extern DEVICE_EXTERNAL int __spirv_AtomicCompareExchange(int SPIR_GLOBAL *, int, + int, int, int, + int) noexcept; + +extern DEVICE_EXTERNAL int __spirv_AtomicCompareExchange(int *, int, int, int, + int, int) noexcept; + +extern DEVICE_EXTERNAL int __spirv_AtomicLoad(const int SPIR_GLOBAL *, int, + int) noexcept; + +extern DEVICE_EXTERNAL void __spirv_AtomicStore(int SPIR_GLOBAL *, int, int, + int) noexcept; + +extern DEVICE_EXTERNAL void __spirv_AtomicStore(int *, int, int, int) noexcept; + +extern DEVICE_EXTERNAL int __spirv_AtomicIAdd(SPIR_GLOBAL int *, int, int, + int) noexcept; /// Atomically set the value in *Ptr with Desired if and only if it is Expected /// Return the value which already was in *Ptr diff --git a/sycl/include/sycl/__spirv/spirv_ops.hpp b/sycl/include/sycl/__spirv/spirv_ops.hpp index 4d99badf8d475..88c503e02efe4 100644 --- a/sycl/include/sycl/__spirv/spirv_ops.hpp +++ b/sycl/include/sycl/__spirv/spirv_ops.hpp @@ -238,69 +238,53 @@ extern __DPCPP_SYCL_EXTERNAL // Atomic SPIR-V builtins #define __SPIRV_ATOMIC_LOAD(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicLoad( \ - AS const Type *P, __spv::Scope::Flag S, \ - __spv::MemorySemanticsMask::Flag O); + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicLoad(AS Type *P, int S, \ + int O) noexcept; #define __SPIRV_ATOMIC_STORE(AS, Type) \ extern __DPCPP_SYCL_EXTERNAL void __spirv_AtomicStore( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ - Type V); + AS Type *P, int S, int O, Type V) noexcept; #define __SPIRV_ATOMIC_EXCHANGE(AS, Type) \ extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicExchange( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ - Type V); + AS Type *P, int S, int O, Type V) noexcept; #define __SPIRV_ATOMIC_CMP_EXCHANGE(AS, Type) \ extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicCompareExchange( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag E, \ - __spv::MemorySemanticsMask::Flag U, Type V, Type C); + AS Type *P, int S, int E, int U, Type V, Type C) noexcept; #define __SPIRV_ATOMIC_IADD(AS, Type) \ extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicIAdd( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ - Type V); + AS Type *P, int S, int O, Type V) noexcept; #define __SPIRV_ATOMIC_ISUB(AS, Type) \ extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicISub( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ - Type V); + AS Type *P, int S, int O, Type V) noexcept; #define __SPIRV_ATOMIC_FADD(AS, Type) \ extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFAddEXT( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ - Type V); + AS Type *P, int S, int O, Type V) noexcept; #define __SPIRV_ATOMIC_SMIN(AS, Type) \ extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicSMin( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ - Type V); + AS Type *P, int S, int O, Type V) noexcept; #define __SPIRV_ATOMIC_UMIN(AS, Type) \ extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicUMin( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ - Type V); + AS Type *P, int S, int O, Type V) noexcept; #define __SPIRV_ATOMIC_FMIN(AS, Type) \ extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFMinEXT( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ - Type V); + AS Type *P, int S, int O, Type V) noexcept; #define __SPIRV_ATOMIC_SMAX(AS, Type) \ extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicSMax( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ - Type V); + AS Type *P, int S, int O, Type V) noexcept; #define __SPIRV_ATOMIC_UMAX(AS, Type) \ extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicUMax( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ - Type V); + AS Type *P, int S, int O, Type V) noexcept; #define __SPIRV_ATOMIC_FMAX(AS, Type) \ extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFMaxEXT( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ - Type V); + AS Type *P, int S, int O, Type V) noexcept; #define __SPIRV_ATOMIC_AND(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicAnd( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ - Type V); + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicAnd(AS Type *P, int S, \ + int O, Type V) noexcept; #define __SPIRV_ATOMIC_OR(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicOr( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ - Type V); + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicOr(AS Type *P, int S, int O, \ + Type V) noexcept; #define __SPIRV_ATOMIC_XOR(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicXor( \ - AS Type *P, __spv::Scope::Flag S, __spv::MemorySemanticsMask::Flag O, \ - Type V); + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicXor(AS Type *P, int S, \ + int O, Type V) noexcept; #define __SPIRV_ATOMIC_FLOAT(AS, Type) \ __SPIRV_ATOMIC_FADD(AS, Type) \ @@ -335,24 +319,21 @@ extern __DPCPP_SYCL_EXTERNAL template \ typename std::enable_if_t< \ std::is_integral::value && std::is_signed::value, T> \ - __spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \ - __spv::MemorySemanticsMask::Flag Semantics, \ - T Value) { \ + __spirv_Atomic##Op(AS T *Ptr, int Memory, int Semantics, \ + T Value) noexcept { \ return __spirv_AtomicS##Op(Ptr, Memory, Semantics, Value); \ } \ template \ typename std::enable_if_t< \ std::is_integral::value && !std::is_signed::value, T> \ - __spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \ - __spv::MemorySemanticsMask::Flag Semantics, \ - T Value) { \ + __spirv_Atomic##Op(AS T *Ptr, int Memory, int Semantics, \ + T Value) noexcept { \ return __spirv_AtomicU##Op(Ptr, Memory, Semantics, Value); \ } \ template \ typename std::enable_if_t::value, T> \ - __spirv_Atomic##Op(AS T *Ptr, __spv::Scope::Flag Memory, \ - __spv::MemorySemanticsMask::Flag Semantics, \ - T Value) { \ + __spirv_Atomic##Op(AS T *Ptr, int Memory, int Semantics, \ + T Value) noexcept { \ return __spirv_AtomicF##Op##EXT(Ptr, Memory, Semantics, Value); \ } diff --git a/sycl/test/check_device_code/atomic_ref.cpp b/sycl/test/check_device_code/atomic_ref.cpp index 899d547497e5e..58d915f88d07b 100644 --- a/sycl/test/check_device_code/atomic_ref.cpp +++ b/sycl/test/check_device_code/atomic_ref.cpp @@ -7,7 +7,7 @@ // CHECK-SAME: ptr addrspace(4) noundef align 4 dereferenceable(4) [[I:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP:%.*]] = addrspacecast ptr addrspace(4) [[I]] to ptr addrspace(1) -// CHECK-NEXT: [[CALL3_I_I:%.*]] = tail call spir_func noundef i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(ptr addrspace(1) noundef [[TMP]], i32 noundef 1, i32 noundef 898) #[[ATTR4:[0-9]+]] +// CHECK-NEXT: [[CALL3_I_I:%.*]] = tail call spir_func noundef i32 @_Z18__spirv_AtomicLoadPU3AS1iii(ptr addrspace(1) noundef [[TMP]], i32 noundef 1, i32 noundef 898) #[[ATTR4:[0-9]+]] // CHECK-NEXT: ret i32 [[CALL3_I_I]] // SYCL_EXTERNAL auto atomic_ref_global(int &i) { diff --git a/sycl/test/check_device_code/hip/atomic/amdgpu_unsafe_atomics.cpp b/sycl/test/check_device_code/hip/atomic/amdgpu_unsafe_atomics.cpp index b7f7e1e26830e..8870cf26f3d7d 100644 --- a/sycl/test/check_device_code/hip/atomic/amdgpu_unsafe_atomics.cpp +++ b/sycl/test/check_device_code/hip/atomic/amdgpu_unsafe_atomics.cpp @@ -12,11 +12,11 @@ SYCL_EXTERNAL void intAtomicFunc(int *i) { atomicInt.fetch_and(1); atomicInt.fetch_or(1); // CHECK: void{{.*}}intAtomicFunc - // CHECK-SAFE: cmpxchg volatile + // CHECK-SAFE: cmpxchg // CHECK-SAFE-NOT: atomicrmw - // CHECK-UNSAFE: atomicrmw volatile xor - // CHECK-UNSAFE: atomicrmw volatile and - // CHECK-UNSAFE: atomicrmw volatile or + // CHECK-UNSAFE: atomicrmw xor + // CHECK-UNSAFE: atomicrmw and + // CHECK-UNSAFE: atomicrmw or // CHECK-UNSAFE-NOT: cmpxchg } @@ -25,15 +25,15 @@ SYCL_EXTERNAL void fpAtomicFunc(float *f, double *d) { sycl::access::address_space::global_space>(*f) .fetch_add(1.0f); // CHECK: void{{.*}}fpAtomicFunc - // CHECK-SAFE: atomicrmw volatile fadd + // CHECK-SAFE: atomicrmw fadd // CHECK-SAFE-NOT: amdgpu.ignore.denormal.mode - // CHECK-UNSAFE-FP: atomicrmw volatile fadd {{.*}}!amdgpu.no.fine.grained.memory{{.*}}!amdgpu.ignore.denormal.mode + // CHECK-UNSAFE-FP: atomicrmw fadd {{.*}}!amdgpu.no.fine.grained.memory{{.*}}!amdgpu.ignore.denormal.mode sycl::atomic_ref(*d) .fetch_add(1.0); // CHECK-SAFE: cmpxchg // CHECK-SAFE-NOT: llvm.amdgcn.global.atomic.fadd.f64 - // CHECK-UNSAFE-FP: atomicrmw volatile fadd {{.*}}!amdgpu.no.fine.grained.memory + // CHECK-UNSAFE-FP: atomicrmw fadd {{.*}}!amdgpu.no.fine.grained.memory // CHECK: __CLANG_OFFLOAD_BUNDLE____END__ sycl-amdgcn-amd-amdhsa- }