Skip to content

Commit c3115dd

Browse files
authored
[SPIR-V] Scope Atomic ops on Workgroup memory (microsoft#6527)
Set the scope of atomic operation on Workgroup-shared variables to the Workgroup rather than the Device. Fixes microsoft#6508
1 parent 9ee056e commit c3115dd

File tree

4 files changed

+42
-31
lines changed

4 files changed

+42
-31
lines changed

tools/clang/lib/SPIRV/SpirvEmitter.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9173,6 +9173,12 @@ SpirvEmitter::processIntrinsicInterlockedMethod(const CallExpr *expr,
91739173
ptr = ptrInfo;
91749174
}
91759175

9176+
// Atomic operations on memory in the Workgroup storage class should also be
9177+
// Workgroup scoped. Otherwise, default to Device scope.
9178+
spv::Scope scope = ptr->getStorageClass() == spv::StorageClass::Workgroup
9179+
? spv::Scope::Workgroup
9180+
: spv::Scope::Device;
9181+
91769182
const bool isCompareExchange =
91779183
opcode == hlsl::IntrinsicOp::IOP_InterlockedCompareExchange;
91789184
const bool isCompareStore =
@@ -9182,7 +9188,7 @@ SpirvEmitter::processIntrinsicInterlockedMethod(const CallExpr *expr,
91829188
auto *comparator = doArg(expr, 1);
91839189
auto *valueInstr = doArg(expr, 2);
91849190
auto *originalVal = spvBuilder.createAtomicCompareExchange(
9185-
baseType, ptr, spv::Scope::Device, spv::MemorySemanticsMask::MaskNone,
9191+
baseType, ptr, scope, spv::MemorySemanticsMask::MaskNone,
91869192
spv::MemorySemanticsMask::MaskNone, valueInstr, comparator, srcLoc);
91879193
if (isCompareExchange)
91889194
writeToOutputArg(originalVal, expr, 3);
@@ -9204,8 +9210,8 @@ SpirvEmitter::processIntrinsicInterlockedMethod(const CallExpr *expr,
92049210
if (atomicOp == spv::Op::OpAtomicSMin && baseType->isUnsignedIntegerType())
92059211
atomicOp = spv::Op::OpAtomicUMin;
92069212
auto *originalVal = spvBuilder.createAtomicOp(
9207-
atomicOp, baseType, ptr, spv::Scope::Device,
9208-
spv::MemorySemanticsMask::MaskNone, value, srcLoc);
9213+
atomicOp, baseType, ptr, scope, spv::MemorySemanticsMask::MaskNone,
9214+
value, srcLoc);
92099215
if (expr->getNumArgs() > 2)
92109216
writeToOutputArg(originalVal, expr, 2);
92119217
}

tools/clang/test/CodeGenSPIRV/intrinsics.64bit-interlocked-methods.cs.hlsl

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ void main()
2727
// CHECK: OpCapability Int64Atomics
2828

2929
// CHECK: [[val1_i64:%[0-9]+]] = OpLoad %long %val1_i64
30-
// CHECK-NEXT: [[atomic_add:%[0-9]+]] = OpAtomicIAdd %long %dest_i %uint_1 %uint_0 [[val1_i64]]
30+
// CHECK-NEXT: [[atomic_add:%[0-9]+]] = OpAtomicIAdd %long %dest_i %uint_2 %uint_0 [[val1_i64]]
3131
// CHECK-NEXT: OpStore %original_i_val [[atomic_add]]
3232
InterlockedAdd(dest_i, val1_i64, original_i_val);
3333

@@ -39,41 +39,41 @@ void main()
3939
InterlockedAdd(getDest()[0], val3_u64, original_u_val);
4040

4141
// CHECK: [[val3_u64_0:%[0-9]+]] = OpLoad %ulong %val3_u64
42-
// CHECK-NEXT: [[atomic_and:%[0-9]+]] = OpAtomicAnd %ulong %dest_u %uint_1 %uint_0 [[val3_u64_0]]
42+
// CHECK-NEXT: [[atomic_and:%[0-9]+]] = OpAtomicAnd %ulong %dest_u %uint_2 %uint_0 [[val3_u64_0]]
4343
// CHECK-NEXT: OpStore %original_u_val [[atomic_and]]
4444
InterlockedAnd(dest_u, val3_u64, original_u_val);
4545

4646
// CHECK: [[val1_i64_0:%[0-9]+]] = OpLoad %long %val1_i64
47-
// CHECK-NEXT: [[atomic_max:%[0-9]+]] = OpAtomicSMax %long %dest_i %uint_1 %uint_0 [[val1_i64_0]]
47+
// CHECK-NEXT: [[atomic_max:%[0-9]+]] = OpAtomicSMax %long %dest_i %uint_2 %uint_0 [[val1_i64_0]]
4848
// CHECK-NEXT: OpStore %original_i_val [[atomic_max]]
4949
InterlockedMax(dest_i, val1_i64, original_i_val);
5050

5151
// CHECK: [[val3_u64_1:%[0-9]+]] = OpLoad %ulong %val3_u64
52-
// CHECK-NEXT: [[atomic_min:%[0-9]+]] = OpAtomicUMin %ulong %dest_u %uint_1 %uint_0 [[val3_u64_1]]
52+
// CHECK-NEXT: [[atomic_min:%[0-9]+]] = OpAtomicUMin %ulong %dest_u %uint_2 %uint_0 [[val3_u64_1]]
5353
// CHECK-NEXT: OpStore %original_u_val [[atomic_min]]
5454
InterlockedMin(dest_u, val3_u64, original_u_val);
5555

5656
// CHECK: [[val2_i64:%[0-9]+]] = OpLoad %long %val2_i64
57-
// CHECK-NEXT: [[atomic_or:%[0-9]+]] = OpAtomicOr %long %dest_i %uint_1 %uint_0 [[val2_i64_0:%[0-9]+]]
57+
// CHECK-NEXT: [[atomic_or:%[0-9]+]] = OpAtomicOr %long %dest_i %uint_2 %uint_0 [[val2_i64_0:%[0-9]+]]
5858
// CHECK-NEXT: OpStore %original_i_val [[atomic_or]]
5959
InterlockedOr (dest_i, val2_i64, original_i_val);
6060

6161
// CHECK: [[val3_u64_2:%[0-9]+]] = OpLoad %ulong %val3_u64
62-
// CHECK-NEXT: [[atomic_xor:%[0-9]+]] = OpAtomicXor %ulong %dest_u %uint_1 %uint_0 [[val3_u64_2]]
62+
// CHECK-NEXT: [[atomic_xor:%[0-9]+]] = OpAtomicXor %ulong %dest_u %uint_2 %uint_0 [[val3_u64_2]]
6363
// CHECK-NEXT: OpStore %original_u_val [[atomic_xor]]
6464
InterlockedXor(dest_u, val3_u64, original_u_val);
6565

6666
// CHECK: [[val1_i64_1:%[0-9]+]] = OpLoad %long %val1_i64
6767
// CHECK-NEXT: [[val2_i64_1:%[0-9]+]] = OpLoad %long %val2_i64
68-
// CHECK-NEXT: {{%[0-9]+}} = OpAtomicCompareExchange %long %dest_i %uint_1 %uint_0 %uint_0 [[val2_i64_1]] [[val1_i64_1]]
68+
// CHECK-NEXT: {{%[0-9]+}} = OpAtomicCompareExchange %long %dest_i %uint_2 %uint_0 %uint_0 [[val2_i64_1]] [[val1_i64_1]]
6969
InterlockedCompareStore(dest_i, val1_i64, val2_i64);
7070

71-
// CHECK: [[ace:%[0-9]+]] = OpAtomicCompareExchange %ulong %dest_u %uint_1 %uint_0 %uint_0 %ulong_20 %ulong_15
71+
// CHECK: [[ace:%[0-9]+]] = OpAtomicCompareExchange %ulong %dest_u %uint_2 %uint_0 %uint_0 %ulong_20 %ulong_15
7272
// CHECK-NEXT: OpStore %original_u_val [[ace]]
7373
InterlockedCompareExchange(dest_u, 15u, 20u, original_u_val);
7474

7575
// CHECK: [[val2_i64_2:%[0-9]+]] = OpLoad %long %val2_i64
76-
// CHECK-NEXT: [[ae:%[0-9]+]] = OpAtomicExchange %long %dest_i %uint_1 %uint_0 [[val2_i64_2]]
76+
// CHECK-NEXT: [[ae:%[0-9]+]] = OpAtomicExchange %long %dest_i %uint_2 %uint_0 [[val2_i64_2]]
7777
// CHECK-NEXT: OpStore %original_i_val [[ae]]
7878
InterlockedExchange(dest_i, val2_i64, original_i_val);
7979
}

tools/clang/test/CodeGenSPIRV/intrinsics.interlocked-methods.compareexchange.output.hlsl

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,46 +23,46 @@ void passBuffer(RWStructuredBuffer<uint> param) {
2323
void main() {
2424
uint result;
2525
InterlockedAdd(value, 1, result);
26-
// CHECK: [[tmp:%[0-9]+]] = OpAtomicIAdd %uint %value %uint_1 %uint_0 %uint_1
26+
// CHECK: [[tmp:%[0-9]+]] = OpAtomicIAdd %uint %value %uint_2 %uint_0 %uint_1
2727
// CHECK: OpStore %result [[tmp]]
2828

2929
uint2 value2;
3030
InterlockedAdd(value, 1, value2.x);
31-
// CHECK: [[tmp:%[0-9]+]] = OpAtomicIAdd %uint %value %uint_1 %uint_0 %uint_1
31+
// CHECK: [[tmp:%[0-9]+]] = OpAtomicIAdd %uint %value %uint_2 %uint_0 %uint_1
3232
// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Function_uint %value2 %int_0
3333
// CHECK: OpStore [[ptr]] [[tmp]]
3434

3535
S1 s1;
3636
InterlockedAdd(value, 1, s1.m0);
37-
// CHECK: [[tmp:%[0-9]+]] = OpAtomicIAdd %uint %value %uint_1 %uint_0 %uint_1
37+
// CHECK: [[tmp:%[0-9]+]] = OpAtomicIAdd %uint %value %uint_2 %uint_0 %uint_1
3838
// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Function_uint %s1 %int_0
3939
// CHECK: OpStore [[ptr]] [[tmp]]
4040

4141
uint array[2];
4242
InterlockedAdd(value, 1, array[0]);
43-
// CHECK: [[tmp:%[0-9]+]] = OpAtomicIAdd %uint %value %uint_1 %uint_0 %uint_1
43+
// CHECK: [[tmp:%[0-9]+]] = OpAtomicIAdd %uint %value %uint_2 %uint_0 %uint_1
4444
// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Function_uint %array %int_0
4545
// CHECK: OpStore [[ptr]] [[tmp]]
4646

4747
S2 s2;
4848
InterlockedAdd(value, 1, s2.m0[1].m0);
49-
// CHECK: [[tmp:%[0-9]+]] = OpAtomicIAdd %uint %value %uint_1 %uint_0 %uint_1
49+
// CHECK: [[tmp:%[0-9]+]] = OpAtomicIAdd %uint %value %uint_2 %uint_0 %uint_1
5050
// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Function_uint %s2 %int_0 %int_1 %int_0
5151
// CHECK: OpStore [[ptr]] [[tmp]]
5252

5353
InterlockedAdd(value, 1, buffer[0]);
54-
// CHECK: [[tmp:%[0-9]+]] = OpAtomicIAdd %uint %value %uint_1 %uint_0 %uint_1
54+
// CHECK: [[tmp:%[0-9]+]] = OpAtomicIAdd %uint %value %uint_2 %uint_0 %uint_1
5555
// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint %buffer %int_0 %uint_0
5656
// CHECK: OpStore [[ptr]] [[tmp]]
5757

5858
InterlockedAdd(value, 1, returnBuffer()[0]);
59-
// CHECK: [[tmp:%[0-9]+]] = OpAtomicIAdd %uint %value %uint_1 %uint_0 %uint_1
59+
// CHECK: [[tmp:%[0-9]+]] = OpAtomicIAdd %uint %value %uint_2 %uint_0 %uint_1
6060
// CHECK: [[buf:%[0-9]+]] = OpFunctionCall %_ptr_Uniform_type_RWStructuredBuffer_uint %returnBuffer
6161
// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint [[buf]] %int_0 %uint_0
6262
// CHECK: OpStore [[ptr]] [[tmp]]
6363

6464
passBuffer(buffer);
65-
// CHECK: [[tmp:%[0-9]+]] = OpAtomicIAdd %uint %value %uint_1 %uint_0 %uint_1
65+
// CHECK: [[tmp:%[0-9]+]] = OpAtomicIAdd %uint %value %uint_2 %uint_0 %uint_1
6666
// CHECK: [[buf:%[0-9]+]] = OpLoad %_ptr_Uniform_type_RWStructuredBuffer_uint %param
6767
// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Uniform_uint [[buf]] %int_0 %uint_0
6868
// CHECK: OpStore [[ptr]] [[tmp]]

tools/clang/test/CodeGenSPIRV/intrinsics.interlocked-methods.cs.hlsl

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,53 +26,58 @@ void main()
2626
//////////////////////////////////////////////////////////////////////////
2727

2828
// CHECK: [[val1_27:%[0-9]+]] = OpLoad %int %val1
29-
// CHECK-NEXT: [[iadd27:%[0-9]+]] = OpAtomicIAdd %int %dest_i %uint_1 %uint_0 [[val1_27]]
29+
// CHECK-NEXT: [[iadd27:%[0-9]+]] = OpAtomicIAdd %int %dest_i %uint_2 %uint_0 [[val1_27]]
3030
// CHECK-NEXT: OpStore %original_i_val [[iadd27]]
3131
InterlockedAdd(dest_i, val1, original_i_val);
3232

3333
// CHECK: [[buff:%[0-9]+]] = OpFunctionCall %type_buffer_image %getDest
3434
// CHECK-NEXT: OpStore %temp_var_RWBuffer [[buff]]
35-
// CHECK-NEXT: OpImageTexelPointer %_ptr_Image_uint %temp_var_RWBuffer %uint_0 %uint_0
35+
// CHECK-NEXT: [[ptr:%[0-9]+]] = OpImageTexelPointer %_ptr_Image_uint %temp_var_RWBuffer %uint_0 %uint_0
36+
// CHECK-NEXT: [[load_28:%[0-9]+]] = OpLoad %int %val1
37+
// CHECK-NEXT: [[val1_28:%[0-9]+]] = OpBitcast %uint [[load_28]]
38+
// CHECK-NEXT: [[iadd28:%[0-9]+]] = OpAtomicIAdd %uint [[ptr]] %uint_1 %uint_0 [[val1_28]]
39+
// CHECK-NEXT: [[iadd28_2:%[0-9]+]] = OpBitcast %int [[iadd28]]
40+
// CHECK-NEXT: OpStore %original_i_val [[iadd28_2]]
3641
InterlockedAdd(getDest()[0], val1, original_i_val);
3742

38-
// CHECK: [[and28:%[0-9]+]] = OpAtomicAnd %uint %dest_u %uint_1 %uint_0 %uint_10
43+
// CHECK: [[and28:%[0-9]+]] = OpAtomicAnd %uint %dest_u %uint_2 %uint_0 %uint_10
3944
// CHECK-NEXT: OpStore %original_u_val [[and28]]
4045
InterlockedAnd(dest_u, 10, original_u_val);
4146

4247
// CHECK: [[uint10:%[0-9]+]] = OpBitcast %int %uint_10
43-
// CHECK-NEXT: [[asmax29:%[0-9]+]] = OpAtomicSMax %int %dest_i %uint_1 %uint_0 [[uint10]]
48+
// CHECK-NEXT: [[asmax29:%[0-9]+]] = OpAtomicSMax %int %dest_i %uint_2 %uint_0 [[uint10]]
4449
// CHECK-NEXT: OpStore %original_i_val [[asmax29]]
4550
InterlockedMax(dest_i, 10, original_i_val);
4651

47-
// CHECK: [[umin30:%[0-9]+]] = OpAtomicUMin %uint %dest_u %uint_1 %uint_0 %uint_10
52+
// CHECK: [[umin30:%[0-9]+]] = OpAtomicUMin %uint %dest_u %uint_2 %uint_0 %uint_10
4853
// CHECK-NEXT: OpStore %original_u_val [[umin30]]
4954
InterlockedMin(dest_u, 10, original_u_val);
5055

5156
// CHECK: [[val2_31:%[0-9]+]] = OpLoad %int %val2
52-
// CHECK-NEXT: [[or31:%[0-9]+]] = OpAtomicOr %int %dest_i %uint_1 %uint_0 [[val2_31]]
57+
// CHECK-NEXT: [[or31:%[0-9]+]] = OpAtomicOr %int %dest_i %uint_2 %uint_0 [[val2_31]]
5358
// CHECK-NEXT: OpStore %original_i_val [[or31]]
5459
InterlockedOr (dest_i, val2, original_i_val);
5560

56-
// CHECK: [[xor32:%[0-9]+]] = OpAtomicXor %uint %dest_u %uint_1 %uint_0 %uint_10
61+
// CHECK: [[xor32:%[0-9]+]] = OpAtomicXor %uint %dest_u %uint_2 %uint_0 %uint_10
5762
// CHECK-NEXT: OpStore %original_u_val [[xor32]]
5863
InterlockedXor(dest_u, 10, original_u_val);
5964

6065
// CHECK: [[val1_33:%[0-9]+]] = OpLoad %int %val1
6166
// CHECK-NEXT: [[val2_33:%[0-9]+]] = OpLoad %int %val2
62-
// CHECK-NEXT: {{%[0-9]+}} = OpAtomicCompareExchange %int %dest_i %uint_1 %uint_0 %uint_0 [[val2_33]] [[val1_33]]
67+
// CHECK-NEXT: {{%[0-9]+}} = OpAtomicCompareExchange %int %dest_i %uint_2 %uint_0 %uint_0 [[val2_33]] [[val1_33]]
6368
InterlockedCompareStore(dest_i, val1, val2);
6469

65-
// CHECK: [[ace34:%[0-9]+]] = OpAtomicCompareExchange %uint %dest_u %uint_1 %uint_0 %uint_0 %uint_20 %uint_15
70+
// CHECK: [[ace34:%[0-9]+]] = OpAtomicCompareExchange %uint %dest_u %uint_2 %uint_0 %uint_0 %uint_20 %uint_15
6671
// CHECK-NEXT: OpStore %original_u_val [[ace34]]
6772
InterlockedCompareExchange(dest_u, 15, 20, original_u_val);
6873

6974
// CHECK: [[val2_35:%[0-9]+]] = OpLoad %int %val2
70-
// CHECK-NEXT: [[ace35:%[0-9]+]] = OpAtomicExchange %int %dest_i %uint_1 %uint_0 [[val2_35]]
75+
// CHECK-NEXT: [[ace35:%[0-9]+]] = OpAtomicExchange %int %dest_i %uint_2 %uint_0 [[val2_35]]
7176
// CHECK-NEXT: OpStore %original_i_val [[ace35]]
7277
InterlockedExchange(dest_i, val2, original_i_val);
7378

7479
// CHECK: [[val_f:%[0-9]+]] = OpLoad %float %val_f1
75-
// CHECK-NEXT: [[ace36:%[0-9]+]] = OpAtomicExchange %float %dest_f %uint_1 %uint_0 [[val_f]]
80+
// CHECK-NEXT: [[ace36:%[0-9]+]] = OpAtomicExchange %float %dest_f %uint_2 %uint_0 [[val_f]]
7681
// CHECK-NEXT: OpStore %original_f_val [[ace36]]
7782
InterlockedExchange(dest_f, val_f1, original_f_val);
7883
}

0 commit comments

Comments
 (0)