Skip to content

Commit 10450dc

Browse files
[NVPTX] Add support for local volatile memory operations
1 parent 0d04789 commit 10450dc

File tree

7 files changed

+83
-81
lines changed

7 files changed

+83
-81
lines changed

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -645,15 +645,17 @@ getOperationOrderings(MemSDNode *N, const NVPTXSubtarget *Subtarget) {
645645
// Calling "example" in CUDA C++ compiled for sm_60- exhibits undefined
646646
// behavior due to lack of Independent Forward Progress. Lowering these
647647
// to weak memory operations in sm_60- is therefore fine.
648-
//
649648
// TODO: lower atomic and volatile operations to memory locations
650649
// in local, const, and param to two PTX instructions in sm_70+:
651650
// - the "weak" memory instruction we are currently lowering to, and
652651
// - some other instruction that preserves the side-effect, e.g.,
653652
// a dead dummy volatile load.
654-
if (CodeAddrSpace == NVPTX::AddressSpace::Local ||
655-
CodeAddrSpace == NVPTX::AddressSpace::Const ||
656-
CodeAddrSpace == NVPTX::AddressSpace::Param) {
653+
654+
if (CodeAddrSpace == NVPTX::AddressSpace::Const ||
655+
CodeAddrSpace == NVPTX::AddressSpace::Param ||
656+
(CodeAddrSpace == NVPTX::AddressSpace::Local
657+
&& (!N->isVolatile() || Ordering != AtomicOrdering::NotAtomic))) {
658+
// Allow non-atomic local volatile operations
657659
return NVPTX::Ordering::NotAtomic;
658660
}
659661

@@ -677,12 +679,13 @@ getOperationOrderings(MemSDNode *N, const NVPTXSubtarget *Subtarget) {
677679
// from .generic, .global, or .shared. The behavior of PTX volatile and PTX
678680
// atomics is undefined if the generic address does not refer to a .global or
679681
// .shared memory location.
680-
bool AddrGenericOrGlobalOrShared =
682+
bool AddrGenericOrGlobalOrSharedorLocal =
681683
(CodeAddrSpace == NVPTX::AddressSpace::Generic ||
682684
CodeAddrSpace == NVPTX::AddressSpace::Global ||
683685
CodeAddrSpace == NVPTX::AddressSpace::Shared ||
684-
CodeAddrSpace == NVPTX::AddressSpace::SharedCluster);
685-
if (!AddrGenericOrGlobalOrShared)
686+
CodeAddrSpace == NVPTX::AddressSpace::SharedCluster ||
687+
CodeAddrSpace == NVPTX::AddressSpace::Local);
688+
if (!AddrGenericOrGlobalOrSharedorLocal)
686689
return NVPTX::Ordering::NotAtomic;
687690

688691
bool UseRelaxedMMIO =

llvm/test/CodeGen/NVPTX/forward-ld-param.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ define i32 @test_modify_param(ptr byval([10 x i32]) %a, i32 %b, i32 %c ) {
8585
; CHECK-NEXT: mov.b64 %rd1, test_modify_param_param_0;
8686
; CHECK-NEXT: ld.param.b32 %r1, [test_modify_param_param_1];
8787
; CHECK-NEXT: ld.param.b32 %r2, [test_modify_param_param_2];
88-
; CHECK-NEXT: st.local.b32 [%rd1+2], %r1;
88+
; CHECK-NEXT: st.volatile.local.b32 [%rd1+2], %r1;
8989
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
9090
; CHECK-NEXT: ret;
9191
%p2 = getelementptr i8, ptr %a, i32 2

llvm/test/CodeGen/NVPTX/load-store-scalars.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2643,9 +2643,9 @@ define void @local_volatile_i8(ptr addrspace(5) %a) {
26432643
; CHECK-EMPTY:
26442644
; CHECK-NEXT: // %bb.0:
26452645
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_i8_param_0];
2646-
; CHECK-NEXT: ld.local.b8 %rs1, [%rd1];
2646+
; CHECK-NEXT: ld.volatile.local.b8 %rs1, [%rd1];
26472647
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
2648-
; CHECK-NEXT: st.local.b8 [%rd1], %rs2;
2648+
; CHECK-NEXT: st.volatile.local.b8 [%rd1], %rs2;
26492649
; CHECK-NEXT: ret;
26502650
%a.load = load volatile i8, ptr addrspace(5) %a
26512651
%a.add = add i8 %a.load, 1
@@ -2661,9 +2661,9 @@ define void @local_volatile_i16(ptr addrspace(5) %a) {
26612661
; CHECK-EMPTY:
26622662
; CHECK-NEXT: // %bb.0:
26632663
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_i16_param_0];
2664-
; CHECK-NEXT: ld.local.b16 %rs1, [%rd1];
2664+
; CHECK-NEXT: ld.volatile.local.b16 %rs1, [%rd1];
26652665
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
2666-
; CHECK-NEXT: st.local.b16 [%rd1], %rs2;
2666+
; CHECK-NEXT: st.volatile.local.b16 [%rd1], %rs2;
26672667
; CHECK-NEXT: ret;
26682668
%a.load = load volatile i16, ptr addrspace(5) %a
26692669
%a.add = add i16 %a.load, 1
@@ -2679,9 +2679,9 @@ define void @local_volatile_i32(ptr addrspace(5) %a) {
26792679
; CHECK-EMPTY:
26802680
; CHECK-NEXT: // %bb.0:
26812681
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_i32_param_0];
2682-
; CHECK-NEXT: ld.local.b32 %r1, [%rd1];
2682+
; CHECK-NEXT: ld.volatile.local.b32 %r1, [%rd1];
26832683
; CHECK-NEXT: add.s32 %r2, %r1, 1;
2684-
; CHECK-NEXT: st.local.b32 [%rd1], %r2;
2684+
; CHECK-NEXT: st.volatile.local.b32 [%rd1], %r2;
26852685
; CHECK-NEXT: ret;
26862686
%a.load = load volatile i32, ptr addrspace(5) %a
26872687
%a.add = add i32 %a.load, 1
@@ -2696,9 +2696,9 @@ define void @local_volatile_i64(ptr addrspace(5) %a) {
26962696
; CHECK-EMPTY:
26972697
; CHECK-NEXT: // %bb.0:
26982698
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_i64_param_0];
2699-
; CHECK-NEXT: ld.local.b64 %rd2, [%rd1];
2699+
; CHECK-NEXT: ld.volatile.local.b64 %rd2, [%rd1];
27002700
; CHECK-NEXT: add.s64 %rd3, %rd2, 1;
2701-
; CHECK-NEXT: st.local.b64 [%rd1], %rd3;
2701+
; CHECK-NEXT: st.volatile.local.b64 [%rd1], %rd3;
27022702
; CHECK-NEXT: ret;
27032703
%a.load = load volatile i64, ptr addrspace(5) %a
27042704
%a.add = add i64 %a.load, 1
@@ -2714,9 +2714,9 @@ define void @local_volatile_float(ptr addrspace(5) %a) {
27142714
; CHECK-EMPTY:
27152715
; CHECK-NEXT: // %bb.0:
27162716
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_float_param_0];
2717-
; CHECK-NEXT: ld.local.b32 %r1, [%rd1];
2717+
; CHECK-NEXT: ld.volatile.local.b32 %r1, [%rd1];
27182718
; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000;
2719-
; CHECK-NEXT: st.local.b32 [%rd1], %r2;
2719+
; CHECK-NEXT: st.volatile.local.b32 [%rd1], %r2;
27202720
; CHECK-NEXT: ret;
27212721
%a.load = load volatile float, ptr addrspace(5) %a
27222722
%a.add = fadd float %a.load, 1.
@@ -2731,9 +2731,9 @@ define void @local_volatile_double(ptr addrspace(5) %a) {
27312731
; CHECK-EMPTY:
27322732
; CHECK-NEXT: // %bb.0:
27332733
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_double_param_0];
2734-
; CHECK-NEXT: ld.local.b64 %rd2, [%rd1];
2734+
; CHECK-NEXT: ld.volatile.local.b64 %rd2, [%rd1];
27352735
; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000;
2736-
; CHECK-NEXT: st.local.b64 [%rd1], %rd3;
2736+
; CHECK-NEXT: st.volatile.local.b64 [%rd1], %rd3;
27372737
; CHECK-NEXT: ret;
27382738
%a.load = load volatile double, ptr addrspace(5) %a
27392739
%a.add = fadd double %a.load, 1.

llvm/test/CodeGen/NVPTX/load-store-sm-90.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1550,7 +1550,6 @@ define void @shared_seq_cst_volatile_cluster(ptr addrspace(3) %a, ptr addrspace(
15501550
}
15511551

15521552
;; local statespace
1553-
15541553
; CHECK-LABEL: local_unordered_cluster
15551554
define void @local_unordered_cluster(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr {
15561555
; CHECK-LABEL: local_unordered_cluster(

llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1280,11 +1280,11 @@ define void @local_volatile_32xi8(ptr addrspace(5) %a, ptr addrspace(5) %b) {
12801280
; CHECK-EMPTY:
12811281
; CHECK-NEXT: // %bb.0:
12821282
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_32xi8_param_0];
1283-
; CHECK-NEXT: ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
1284-
; CHECK-NEXT: ld.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
1283+
; CHECK-NEXT: ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
1284+
; CHECK-NEXT: ld.volatile.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
12851285
; CHECK-NEXT: ld.param.b64 %rd2, [local_volatile_32xi8_param_1];
1286-
; CHECK-NEXT: st.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
1287-
; CHECK-NEXT: st.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
1286+
; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
1287+
; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
12881288
; CHECK-NEXT: ret;
12891289
%a.load = load volatile <32 x i8>, ptr addrspace(5) %a
12901290
store volatile <32 x i8> %a.load, ptr addrspace(5) %b
@@ -1299,11 +1299,11 @@ define void @local_volatile_16xi16(ptr addrspace(5) %a, ptr addrspace(5) %b) {
12991299
; CHECK-EMPTY:
13001300
; CHECK-NEXT: // %bb.0:
13011301
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_16xi16_param_0];
1302-
; CHECK-NEXT: ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
1303-
; CHECK-NEXT: ld.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
1302+
; CHECK-NEXT: ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
1303+
; CHECK-NEXT: ld.volatile.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
13041304
; CHECK-NEXT: ld.param.b64 %rd2, [local_volatile_16xi16_param_1];
1305-
; CHECK-NEXT: st.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
1306-
; CHECK-NEXT: st.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
1305+
; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
1306+
; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
13071307
; CHECK-NEXT: ret;
13081308
%a.load = load volatile <16 x i16>, ptr addrspace(5) %a
13091309
store volatile <16 x i16> %a.load, ptr addrspace(5) %b
@@ -1318,11 +1318,11 @@ define void @local_volatile_16xhalf(ptr addrspace(5) %a, ptr addrspace(5) %b) {
13181318
; CHECK-EMPTY:
13191319
; CHECK-NEXT: // %bb.0:
13201320
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_16xhalf_param_0];
1321-
; CHECK-NEXT: ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
1322-
; CHECK-NEXT: ld.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
1321+
; CHECK-NEXT: ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
1322+
; CHECK-NEXT: ld.volatile.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
13231323
; CHECK-NEXT: ld.param.b64 %rd2, [local_volatile_16xhalf_param_1];
1324-
; CHECK-NEXT: st.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
1325-
; CHECK-NEXT: st.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
1324+
; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
1325+
; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
13261326
; CHECK-NEXT: ret;
13271327
%a.load = load volatile <16 x half>, ptr addrspace(5) %a
13281328
store volatile <16 x half> %a.load, ptr addrspace(5) %b
@@ -1337,11 +1337,11 @@ define void @local_volatile_16xbfloat(ptr addrspace(5) %a, ptr addrspace(5) %b)
13371337
; CHECK-EMPTY:
13381338
; CHECK-NEXT: // %bb.0:
13391339
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_16xbfloat_param_0];
1340-
; CHECK-NEXT: ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
1341-
; CHECK-NEXT: ld.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
1340+
; CHECK-NEXT: ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
1341+
; CHECK-NEXT: ld.volatile.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
13421342
; CHECK-NEXT: ld.param.b64 %rd2, [local_volatile_16xbfloat_param_1];
1343-
; CHECK-NEXT: st.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
1344-
; CHECK-NEXT: st.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
1343+
; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
1344+
; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
13451345
; CHECK-NEXT: ret;
13461346
%a.load = load volatile <16 x bfloat>, ptr addrspace(5) %a
13471347
store volatile <16 x bfloat> %a.load, ptr addrspace(5) %b
@@ -1356,11 +1356,11 @@ define void @local_volatile_8xi32(ptr addrspace(5) %a, ptr addrspace(5) %b) {
13561356
; CHECK-EMPTY:
13571357
; CHECK-NEXT: // %bb.0:
13581358
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_8xi32_param_0];
1359-
; CHECK-NEXT: ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
1360-
; CHECK-NEXT: ld.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
1359+
; CHECK-NEXT: ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
1360+
; CHECK-NEXT: ld.volatile.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16];
13611361
; CHECK-NEXT: ld.param.b64 %rd2, [local_volatile_8xi32_param_1];
1362-
; CHECK-NEXT: st.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
1363-
; CHECK-NEXT: st.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
1362+
; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8};
1363+
; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
13641364
; CHECK-NEXT: ret;
13651365
%a.load = load volatile <8 x i32>, ptr addrspace(5) %a
13661366
store volatile <8 x i32> %a.load, ptr addrspace(5) %b
@@ -1374,11 +1374,11 @@ define void @local_volatile_4xi64(ptr addrspace(5) %a, ptr addrspace(5) %b) {
13741374
; CHECK-EMPTY:
13751375
; CHECK-NEXT: // %bb.0:
13761376
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_4xi64_param_0];
1377-
; CHECK-NEXT: ld.local.v2.b64 {%rd2, %rd3}, [%rd1];
1378-
; CHECK-NEXT: ld.local.v2.b64 {%rd4, %rd5}, [%rd1+16];
1377+
; CHECK-NEXT: ld.volatile.local.v2.b64 {%rd2, %rd3}, [%rd1];
1378+
; CHECK-NEXT: ld.volatile.local.v2.b64 {%rd4, %rd5}, [%rd1+16];
13791379
; CHECK-NEXT: ld.param.b64 %rd6, [local_volatile_4xi64_param_1];
1380-
; CHECK-NEXT: st.local.v2.b64 [%rd6+16], {%rd4, %rd5};
1381-
; CHECK-NEXT: st.local.v2.b64 [%rd6], {%rd2, %rd3};
1380+
; CHECK-NEXT: st.volatile.local.v2.b64 [%rd6+16], {%rd4, %rd5};
1381+
; CHECK-NEXT: st.volatile.local.v2.b64 [%rd6], {%rd2, %rd3};
13821382
; CHECK-NEXT: ret;
13831383
%a.load = load volatile <4 x i64>, ptr addrspace(5) %a
13841384
store volatile <4 x i64> %a.load, ptr addrspace(5) %b
@@ -1392,11 +1392,11 @@ define void @local_volatile_8xfloat(ptr addrspace(5) %a, ptr addrspace(5) %b) {
13921392
; CHECK-EMPTY:
13931393
; CHECK-NEXT: // %bb.0:
13941394
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_8xfloat_param_0];
1395-
; CHECK-NEXT: ld.local.v2.b64 {%rd2, %rd3}, [%rd1];
1396-
; CHECK-NEXT: ld.local.v2.b64 {%rd4, %rd5}, [%rd1+16];
1395+
; CHECK-NEXT: ld.volatile.local.v2.b64 {%rd2, %rd3}, [%rd1];
1396+
; CHECK-NEXT: ld.volatile.local.v2.b64 {%rd4, %rd5}, [%rd1+16];
13971397
; CHECK-NEXT: ld.param.b64 %rd6, [local_volatile_8xfloat_param_1];
1398-
; CHECK-NEXT: st.local.v2.b64 [%rd6+16], {%rd4, %rd5};
1399-
; CHECK-NEXT: st.local.v2.b64 [%rd6], {%rd2, %rd3};
1398+
; CHECK-NEXT: st.volatile.local.v2.b64 [%rd6+16], {%rd4, %rd5};
1399+
; CHECK-NEXT: st.volatile.local.v2.b64 [%rd6], {%rd2, %rd3};
14001400
; CHECK-NEXT: ret;
14011401
%a.load = load volatile <8 x float>, ptr addrspace(5) %a
14021402
store volatile <8 x float> %a.load, ptr addrspace(5) %b
@@ -1410,11 +1410,11 @@ define void @local_volatile_4xdouble(ptr addrspace(5) %a, ptr addrspace(5) %b) {
14101410
; CHECK-EMPTY:
14111411
; CHECK-NEXT: // %bb.0:
14121412
; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_4xdouble_param_0];
1413-
; CHECK-NEXT: ld.local.v2.b64 {%rd2, %rd3}, [%rd1];
1414-
; CHECK-NEXT: ld.local.v2.b64 {%rd4, %rd5}, [%rd1+16];
1413+
; CHECK-NEXT: ld.volatile.local.v2.b64 {%rd2, %rd3}, [%rd1];
1414+
; CHECK-NEXT: ld.volatile.local.v2.b64 {%rd4, %rd5}, [%rd1+16];
14151415
; CHECK-NEXT: ld.param.b64 %rd6, [local_volatile_4xdouble_param_1];
1416-
; CHECK-NEXT: st.local.v2.b64 [%rd6+16], {%rd4, %rd5};
1417-
; CHECK-NEXT: st.local.v2.b64 [%rd6], {%rd2, %rd3};
1416+
; CHECK-NEXT: st.volatile.local.v2.b64 [%rd6+16], {%rd4, %rd5};
1417+
; CHECK-NEXT: st.volatile.local.v2.b64 [%rd6], {%rd2, %rd3};
14181418
; CHECK-NEXT: ret;
14191419
%a.load = load volatile <4 x double>, ptr addrspace(5) %a
14201420
store volatile <4 x double> %a.load, ptr addrspace(5) %b

0 commit comments

Comments
 (0)