diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 65e7c56774547..84762eb1bf71f 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -645,15 +645,17 @@ getOperationOrderings(MemSDNode *N, const NVPTXSubtarget *Subtarget) { // Calling "example" in CUDA C++ compiled for sm_60- exhibits undefined // behavior due to lack of Independent Forward Progress. Lowering these // to weak memory operations in sm_60- is therefore fine. - // // TODO: lower atomic and volatile operations to memory locations // in local, const, and param to two PTX instructions in sm_70+: // - the "weak" memory instruction we are currently lowering to, and // - some other instruction that preserves the side-effect, e.g., // a dead dummy volatile load. - if (CodeAddrSpace == NVPTX::AddressSpace::Local || - CodeAddrSpace == NVPTX::AddressSpace::Const || - CodeAddrSpace == NVPTX::AddressSpace::Param) { + + if (CodeAddrSpace == NVPTX::AddressSpace::Const || + CodeAddrSpace == NVPTX::AddressSpace::Param || + (CodeAddrSpace == NVPTX::AddressSpace::Local + && (!N->isVolatile() || Ordering != AtomicOrdering::NotAtomic))) { + // Allow non-atomic local volatile operations return NVPTX::Ordering::NotAtomic; } @@ -677,12 +679,13 @@ getOperationOrderings(MemSDNode *N, const NVPTXSubtarget *Subtarget) { // from .generic, .global, or .shared. The behavior of PTX volatile and PTX // atomics is undefined if the generic address does not refer to a .global or // .shared memory location. - bool AddrGenericOrGlobalOrShared = + bool AddrGenericOrGlobalOrSharedorLocal = (CodeAddrSpace == NVPTX::AddressSpace::Generic || CodeAddrSpace == NVPTX::AddressSpace::Global || CodeAddrSpace == NVPTX::AddressSpace::Shared || - CodeAddrSpace == NVPTX::AddressSpace::SharedCluster); - if (!AddrGenericOrGlobalOrShared) + CodeAddrSpace == NVPTX::AddressSpace::SharedCluster || + CodeAddrSpace == NVPTX::AddressSpace::Local); + if (!AddrGenericOrGlobalOrSharedorLocal) return NVPTX::Ordering::NotAtomic; bool UseRelaxedMMIO = diff --git a/llvm/test/CodeGen/NVPTX/forward-ld-param.ll b/llvm/test/CodeGen/NVPTX/forward-ld-param.ll index ed8f6b4511079..f53fc3a27de15 100644 --- a/llvm/test/CodeGen/NVPTX/forward-ld-param.ll +++ b/llvm/test/CodeGen/NVPTX/forward-ld-param.ll @@ -85,7 +85,7 @@ define i32 @test_modify_param(ptr byval([10 x i32]) %a, i32 %b, i32 %c ) { ; CHECK-NEXT: mov.b64 %rd1, test_modify_param_param_0; ; CHECK-NEXT: ld.param.b32 %r1, [test_modify_param_param_1]; ; CHECK-NEXT: ld.param.b32 %r2, [test_modify_param_param_2]; -; CHECK-NEXT: st.local.b32 [%rd1+2], %r1; +; CHECK-NEXT: st.volatile.local.b32 [%rd1+2], %r1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %p2 = getelementptr i8, ptr %a, i32 2 diff --git a/llvm/test/CodeGen/NVPTX/load-store-scalars.ll b/llvm/test/CodeGen/NVPTX/load-store-scalars.ll index bac59be5158ea..58ca8d613b09b 100644 --- a/llvm/test/CodeGen/NVPTX/load-store-scalars.ll +++ b/llvm/test/CodeGen/NVPTX/load-store-scalars.ll @@ -2643,9 +2643,9 @@ define void @local_volatile_i8(ptr addrspace(5) %a) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_i8_param_0]; -; CHECK-NEXT: ld.local.b8 %rs1, [%rd1]; +; CHECK-NEXT: ld.volatile.local.b8 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; -; CHECK-NEXT: st.local.b8 [%rd1], %rs2; +; CHECK-NEXT: st.volatile.local.b8 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load volatile i8, ptr addrspace(5) %a %a.add = add i8 %a.load, 1 @@ -2661,9 +2661,9 @@ define void @local_volatile_i16(ptr addrspace(5) %a) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_i16_param_0]; -; CHECK-NEXT: ld.local.b16 %rs1, [%rd1]; +; CHECK-NEXT: ld.volatile.local.b16 %rs1, [%rd1]; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; -; CHECK-NEXT: st.local.b16 [%rd1], %rs2; +; CHECK-NEXT: st.volatile.local.b16 [%rd1], %rs2; ; CHECK-NEXT: ret; %a.load = load volatile i16, ptr addrspace(5) %a %a.add = add i16 %a.load, 1 @@ -2679,9 +2679,9 @@ define void @local_volatile_i32(ptr addrspace(5) %a) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_i32_param_0]; -; CHECK-NEXT: ld.local.b32 %r1, [%rd1]; +; CHECK-NEXT: ld.volatile.local.b32 %r1, [%rd1]; ; CHECK-NEXT: add.s32 %r2, %r1, 1; -; CHECK-NEXT: st.local.b32 [%rd1], %r2; +; CHECK-NEXT: st.volatile.local.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load volatile i32, ptr addrspace(5) %a %a.add = add i32 %a.load, 1 @@ -2696,9 +2696,9 @@ define void @local_volatile_i64(ptr addrspace(5) %a) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_i64_param_0]; -; CHECK-NEXT: ld.local.b64 %rd2, [%rd1]; +; CHECK-NEXT: ld.volatile.local.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.s64 %rd3, %rd2, 1; -; CHECK-NEXT: st.local.b64 [%rd1], %rd3; +; CHECK-NEXT: st.volatile.local.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load volatile i64, ptr addrspace(5) %a %a.add = add i64 %a.load, 1 @@ -2714,9 +2714,9 @@ define void @local_volatile_float(ptr addrspace(5) %a) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_float_param_0]; -; CHECK-NEXT: ld.local.b32 %r1, [%rd1]; +; CHECK-NEXT: ld.volatile.local.b32 %r1, [%rd1]; ; CHECK-NEXT: add.rn.f32 %r2, %r1, 0f3F800000; -; CHECK-NEXT: st.local.b32 [%rd1], %r2; +; CHECK-NEXT: st.volatile.local.b32 [%rd1], %r2; ; CHECK-NEXT: ret; %a.load = load volatile float, ptr addrspace(5) %a %a.add = fadd float %a.load, 1. @@ -2731,9 +2731,9 @@ define void @local_volatile_double(ptr addrspace(5) %a) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_double_param_0]; -; CHECK-NEXT: ld.local.b64 %rd2, [%rd1]; +; CHECK-NEXT: ld.volatile.local.b64 %rd2, [%rd1]; ; CHECK-NEXT: add.rn.f64 %rd3, %rd2, 0d3FF0000000000000; -; CHECK-NEXT: st.local.b64 [%rd1], %rd3; +; CHECK-NEXT: st.volatile.local.b64 [%rd1], %rd3; ; CHECK-NEXT: ret; %a.load = load volatile double, ptr addrspace(5) %a %a.add = fadd double %a.load, 1. diff --git a/llvm/test/CodeGen/NVPTX/load-store-sm-90.ll b/llvm/test/CodeGen/NVPTX/load-store-sm-90.ll index ed170e92917f5..91a70e4468154 100644 --- a/llvm/test/CodeGen/NVPTX/load-store-sm-90.ll +++ b/llvm/test/CodeGen/NVPTX/load-store-sm-90.ll @@ -1550,7 +1550,6 @@ define void @shared_seq_cst_volatile_cluster(ptr addrspace(3) %a, ptr addrspace( } ;; local statespace - ; CHECK-LABEL: local_unordered_cluster define void @local_unordered_cluster(ptr addrspace(5) %a, ptr addrspace(5) %b, ptr addrspace(5) %c, ptr addrspace(5) %d, ptr addrspace(5) %e) local_unnamed_addr { ; CHECK-LABEL: local_unordered_cluster( diff --git a/llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll b/llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll index 68c53cde7f9ac..4423efcca1ff4 100644 --- a/llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll +++ b/llvm/test/CodeGen/NVPTX/load-store-vectors-256.ll @@ -1280,11 +1280,11 @@ define void @local_volatile_32xi8(ptr addrspace(5) %a, ptr addrspace(5) %b) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_32xi8_param_0]; -; CHECK-NEXT: ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; -; CHECK-NEXT: ld.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16]; +; CHECK-NEXT: ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; +; CHECK-NEXT: ld.volatile.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16]; ; CHECK-NEXT: ld.param.b64 %rd2, [local_volatile_32xi8_param_1]; -; CHECK-NEXT: st.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8}; -; CHECK-NEXT: st.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4}; +; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8}; +; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4}; ; CHECK-NEXT: ret; %a.load = load volatile <32 x i8>, ptr addrspace(5) %a store volatile <32 x i8> %a.load, ptr addrspace(5) %b @@ -1299,11 +1299,11 @@ define void @local_volatile_16xi16(ptr addrspace(5) %a, ptr addrspace(5) %b) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_16xi16_param_0]; -; CHECK-NEXT: ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; -; CHECK-NEXT: ld.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16]; +; CHECK-NEXT: ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; +; CHECK-NEXT: ld.volatile.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16]; ; CHECK-NEXT: ld.param.b64 %rd2, [local_volatile_16xi16_param_1]; -; CHECK-NEXT: st.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8}; -; CHECK-NEXT: st.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4}; +; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8}; +; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4}; ; CHECK-NEXT: ret; %a.load = load volatile <16 x i16>, ptr addrspace(5) %a store volatile <16 x i16> %a.load, ptr addrspace(5) %b @@ -1318,11 +1318,11 @@ define void @local_volatile_16xhalf(ptr addrspace(5) %a, ptr addrspace(5) %b) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_16xhalf_param_0]; -; CHECK-NEXT: ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; -; CHECK-NEXT: ld.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16]; +; CHECK-NEXT: ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; +; CHECK-NEXT: ld.volatile.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16]; ; CHECK-NEXT: ld.param.b64 %rd2, [local_volatile_16xhalf_param_1]; -; CHECK-NEXT: st.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8}; -; CHECK-NEXT: st.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4}; +; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8}; +; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4}; ; CHECK-NEXT: ret; %a.load = load volatile <16 x half>, ptr addrspace(5) %a store volatile <16 x half> %a.load, ptr addrspace(5) %b @@ -1337,11 +1337,11 @@ define void @local_volatile_16xbfloat(ptr addrspace(5) %a, ptr addrspace(5) %b) ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_16xbfloat_param_0]; -; CHECK-NEXT: ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; -; CHECK-NEXT: ld.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16]; +; CHECK-NEXT: ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; +; CHECK-NEXT: ld.volatile.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16]; ; CHECK-NEXT: ld.param.b64 %rd2, [local_volatile_16xbfloat_param_1]; -; CHECK-NEXT: st.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8}; -; CHECK-NEXT: st.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4}; +; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8}; +; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4}; ; CHECK-NEXT: ret; %a.load = load volatile <16 x bfloat>, ptr addrspace(5) %a store volatile <16 x bfloat> %a.load, ptr addrspace(5) %b @@ -1356,11 +1356,11 @@ define void @local_volatile_8xi32(ptr addrspace(5) %a, ptr addrspace(5) %b) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_8xi32_param_0]; -; CHECK-NEXT: ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; -; CHECK-NEXT: ld.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16]; +; CHECK-NEXT: ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; +; CHECK-NEXT: ld.volatile.local.v4.b32 {%r5, %r6, %r7, %r8}, [%rd1+16]; ; CHECK-NEXT: ld.param.b64 %rd2, [local_volatile_8xi32_param_1]; -; CHECK-NEXT: st.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8}; -; CHECK-NEXT: st.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4}; +; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2+16], {%r5, %r6, %r7, %r8}; +; CHECK-NEXT: st.volatile.local.v4.b32 [%rd2], {%r1, %r2, %r3, %r4}; ; CHECK-NEXT: ret; %a.load = load volatile <8 x i32>, ptr addrspace(5) %a store volatile <8 x i32> %a.load, ptr addrspace(5) %b @@ -1374,11 +1374,11 @@ define void @local_volatile_4xi64(ptr addrspace(5) %a, ptr addrspace(5) %b) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_4xi64_param_0]; -; CHECK-NEXT: ld.local.v2.b64 {%rd2, %rd3}, [%rd1]; -; CHECK-NEXT: ld.local.v2.b64 {%rd4, %rd5}, [%rd1+16]; +; CHECK-NEXT: ld.volatile.local.v2.b64 {%rd2, %rd3}, [%rd1]; +; CHECK-NEXT: ld.volatile.local.v2.b64 {%rd4, %rd5}, [%rd1+16]; ; CHECK-NEXT: ld.param.b64 %rd6, [local_volatile_4xi64_param_1]; -; CHECK-NEXT: st.local.v2.b64 [%rd6+16], {%rd4, %rd5}; -; CHECK-NEXT: st.local.v2.b64 [%rd6], {%rd2, %rd3}; +; CHECK-NEXT: st.volatile.local.v2.b64 [%rd6+16], {%rd4, %rd5}; +; CHECK-NEXT: st.volatile.local.v2.b64 [%rd6], {%rd2, %rd3}; ; CHECK-NEXT: ret; %a.load = load volatile <4 x i64>, ptr addrspace(5) %a store volatile <4 x i64> %a.load, ptr addrspace(5) %b @@ -1392,11 +1392,11 @@ define void @local_volatile_8xfloat(ptr addrspace(5) %a, ptr addrspace(5) %b) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_8xfloat_param_0]; -; CHECK-NEXT: ld.local.v2.b64 {%rd2, %rd3}, [%rd1]; -; CHECK-NEXT: ld.local.v2.b64 {%rd4, %rd5}, [%rd1+16]; +; CHECK-NEXT: ld.volatile.local.v2.b64 {%rd2, %rd3}, [%rd1]; +; CHECK-NEXT: ld.volatile.local.v2.b64 {%rd4, %rd5}, [%rd1+16]; ; CHECK-NEXT: ld.param.b64 %rd6, [local_volatile_8xfloat_param_1]; -; CHECK-NEXT: st.local.v2.b64 [%rd6+16], {%rd4, %rd5}; -; CHECK-NEXT: st.local.v2.b64 [%rd6], {%rd2, %rd3}; +; CHECK-NEXT: st.volatile.local.v2.b64 [%rd6+16], {%rd4, %rd5}; +; CHECK-NEXT: st.volatile.local.v2.b64 [%rd6], {%rd2, %rd3}; ; CHECK-NEXT: ret; %a.load = load volatile <8 x float>, ptr addrspace(5) %a store volatile <8 x float> %a.load, ptr addrspace(5) %b @@ -1410,11 +1410,11 @@ define void @local_volatile_4xdouble(ptr addrspace(5) %a, ptr addrspace(5) %b) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_4xdouble_param_0]; -; CHECK-NEXT: ld.local.v2.b64 {%rd2, %rd3}, [%rd1]; -; CHECK-NEXT: ld.local.v2.b64 {%rd4, %rd5}, [%rd1+16]; +; CHECK-NEXT: ld.volatile.local.v2.b64 {%rd2, %rd3}, [%rd1]; +; CHECK-NEXT: ld.volatile.local.v2.b64 {%rd4, %rd5}, [%rd1+16]; ; CHECK-NEXT: ld.param.b64 %rd6, [local_volatile_4xdouble_param_1]; -; CHECK-NEXT: st.local.v2.b64 [%rd6+16], {%rd4, %rd5}; -; CHECK-NEXT: st.local.v2.b64 [%rd6], {%rd2, %rd3}; +; CHECK-NEXT: st.volatile.local.v2.b64 [%rd6+16], {%rd4, %rd5}; +; CHECK-NEXT: st.volatile.local.v2.b64 [%rd6], {%rd2, %rd3}; ; CHECK-NEXT: ret; %a.load = load volatile <4 x double>, ptr addrspace(5) %a store volatile <4 x double> %a.load, ptr addrspace(5) %b diff --git a/llvm/test/CodeGen/NVPTX/load-store-vectors.ll b/llvm/test/CodeGen/NVPTX/load-store-vectors.ll index 7e013390a39db..37e66894110ca 100644 --- a/llvm/test/CodeGen/NVPTX/load-store-vectors.ll +++ b/llvm/test/CodeGen/NVPTX/load-store-vectors.ll @@ -2846,10 +2846,10 @@ define void @local_volatile_2xi8(ptr addrspace(5) %a) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_2xi8_param_0]; -; CHECK-NEXT: ld.local.v2.b8 {%rs1, %rs2}, [%rd1]; +; CHECK-NEXT: ld.volatile.local.v2.b8 {%rs1, %rs2}, [%rd1]; ; CHECK-NEXT: add.s16 %rs3, %rs2, 1; ; CHECK-NEXT: add.s16 %rs4, %rs1, 1; -; CHECK-NEXT: st.local.v2.b8 [%rd1], {%rs4, %rs3}; +; CHECK-NEXT: st.volatile.local.v2.b8 [%rd1], {%rs4, %rs3}; ; CHECK-NEXT: ret; %a.load = load volatile <2 x i8>, ptr addrspace(5) %a %a.add = add <2 x i8> %a.load, @@ -2866,7 +2866,7 @@ define void @local_volatile_4xi8(ptr addrspace(5) %a) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_4xi8_param_0]; -; CHECK-NEXT: ld.local.b32 %r1, [%rd1]; +; CHECK-NEXT: ld.volatile.local.b32 %r1, [%rd1]; ; CHECK-NEXT: prmt.b32 %r2, %r1, 0, 0x7773U; ; CHECK-NEXT: cvt.u16.u32 %rs1, %r2; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; @@ -2886,7 +2886,7 @@ define void @local_volatile_4xi8(ptr addrspace(5) %a) { ; CHECK-NEXT: cvt.u32.u16 %r10, %rs8; ; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U; ; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U; -; CHECK-NEXT: st.local.b32 [%rd1], %r12; +; CHECK-NEXT: st.volatile.local.b32 [%rd1], %r12; ; CHECK-NEXT: ret; %a.load = load volatile <4 x i8>, ptr addrspace(5) %a %a.add = add <4 x i8> %a.load, @@ -2903,7 +2903,7 @@ define void @local_volatile_8xi8(ptr addrspace(5) %a) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_8xi8_param_0]; -; CHECK-NEXT: ld.local.v2.b32 {%r1, %r2}, [%rd1]; +; CHECK-NEXT: ld.volatile.local.v2.b32 {%r1, %r2}, [%rd1]; ; CHECK-NEXT: prmt.b32 %r3, %r2, 0, 0x7773U; ; CHECK-NEXT: cvt.u16.u32 %rs1, %r3; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; @@ -2942,7 +2942,7 @@ define void @local_volatile_8xi8(ptr addrspace(5) %a) { ; CHECK-NEXT: cvt.u32.u16 %r22, %rs16; ; CHECK-NEXT: prmt.b32 %r23, %r22, %r20, 0x3340U; ; CHECK-NEXT: prmt.b32 %r24, %r23, %r18, 0x5410U; -; CHECK-NEXT: st.local.v2.b32 [%rd1], {%r24, %r13}; +; CHECK-NEXT: st.volatile.local.v2.b32 [%rd1], {%r24, %r13}; ; CHECK-NEXT: ret; %a.load = load volatile <8 x i8>, ptr addrspace(5) %a %a.add = add <8 x i8> %a.load, @@ -2959,7 +2959,7 @@ define void @local_volatile_16xi8(ptr addrspace(5) %a) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_16xi8_param_0]; -; CHECK-NEXT: ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; +; CHECK-NEXT: ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; ; CHECK-NEXT: prmt.b32 %r5, %r4, 0, 0x7773U; ; CHECK-NEXT: cvt.u16.u32 %rs1, %r5; ; CHECK-NEXT: add.s16 %rs2, %rs1, 1; @@ -3036,7 +3036,7 @@ define void @local_volatile_16xi8(ptr addrspace(5) %a) { ; CHECK-NEXT: cvt.u32.u16 %r46, %rs32; ; CHECK-NEXT: prmt.b32 %r47, %r46, %r44, 0x3340U; ; CHECK-NEXT: prmt.b32 %r48, %r47, %r42, 0x5410U; -; CHECK-NEXT: st.local.v4.b32 [%rd1], {%r48, %r37, %r26, %r15}; +; CHECK-NEXT: st.volatile.local.v4.b32 [%rd1], {%r48, %r37, %r26, %r15}; ; CHECK-NEXT: ret; %a.load = load volatile <16 x i8>, ptr addrspace(5) %a %a.add = add <16 x i8> %a.load, @@ -3052,10 +3052,10 @@ define void @local_volatile_2xi16(ptr addrspace(5) %a) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_2xi16_param_0]; -; CHECK-NEXT: ld.local.v2.b16 {%rs1, %rs2}, [%rd1]; +; CHECK-NEXT: ld.volatile.local.v2.b16 {%rs1, %rs2}, [%rd1]; ; CHECK-NEXT: add.s16 %rs3, %rs2, 1; ; CHECK-NEXT: add.s16 %rs4, %rs1, 1; -; CHECK-NEXT: st.local.v2.b16 [%rd1], {%rs4, %rs3}; +; CHECK-NEXT: st.volatile.local.v2.b16 [%rd1], {%rs4, %rs3}; ; CHECK-NEXT: ret; %a.load = load volatile <2 x i16>, ptr addrspace(5) %a %a.add = add <2 x i16> %a.load, @@ -3071,12 +3071,12 @@ define void @local_volatile_4xi16(ptr addrspace(5) %a) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_4xi16_param_0]; -; CHECK-NEXT: ld.local.v4.b16 {%rs1, %rs2, %rs3, %rs4}, [%rd1]; +; CHECK-NEXT: ld.volatile.local.v4.b16 {%rs1, %rs2, %rs3, %rs4}, [%rd1]; ; CHECK-NEXT: add.s16 %rs5, %rs4, 1; ; CHECK-NEXT: add.s16 %rs6, %rs3, 1; ; CHECK-NEXT: add.s16 %rs7, %rs2, 1; ; CHECK-NEXT: add.s16 %rs8, %rs1, 1; -; CHECK-NEXT: st.local.v4.b16 [%rd1], {%rs8, %rs7, %rs6, %rs5}; +; CHECK-NEXT: st.volatile.local.v4.b16 [%rd1], {%rs8, %rs7, %rs6, %rs5}; ; CHECK-NEXT: ret; %a.load = load volatile <4 x i16>, ptr addrspace(5) %a %a.add = add <4 x i16> %a.load, @@ -3093,7 +3093,7 @@ define void @local_volatile_8xi16(ptr addrspace(5) %a) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_8xi16_param_0]; -; CHECK-NEXT: ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; +; CHECK-NEXT: ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; ; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r4; ; CHECK-NEXT: add.s16 %rs3, %rs2, 1; ; CHECK-NEXT: add.s16 %rs4, %rs1, 1; @@ -3110,7 +3110,7 @@ define void @local_volatile_8xi16(ptr addrspace(5) %a) { ; CHECK-NEXT: add.s16 %rs15, %rs14, 1; ; CHECK-NEXT: add.s16 %rs16, %rs13, 1; ; CHECK-NEXT: mov.b32 %r8, {%rs16, %rs15}; -; CHECK-NEXT: st.local.v4.b32 [%rd1], {%r8, %r7, %r6, %r5}; +; CHECK-NEXT: st.volatile.local.v4.b32 [%rd1], {%r8, %r7, %r6, %r5}; ; CHECK-NEXT: ret; %a.load = load volatile <8 x i16>, ptr addrspace(5) %a %a.add = add <8 x i16> %a.load, @@ -3126,10 +3126,10 @@ define void @local_volatile_2xi32(ptr addrspace(5) %a) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_2xi32_param_0]; -; CHECK-NEXT: ld.local.v2.b32 {%r1, %r2}, [%rd1]; +; CHECK-NEXT: ld.volatile.local.v2.b32 {%r1, %r2}, [%rd1]; ; CHECK-NEXT: add.s32 %r3, %r2, 1; ; CHECK-NEXT: add.s32 %r4, %r1, 1; -; CHECK-NEXT: st.local.v2.b32 [%rd1], {%r4, %r3}; +; CHECK-NEXT: st.volatile.local.v2.b32 [%rd1], {%r4, %r3}; ; CHECK-NEXT: ret; %a.load = load volatile <2 x i32>, ptr addrspace(5) %a %a.add = add <2 x i32> %a.load, @@ -3145,12 +3145,12 @@ define void @local_volatile_4xi32(ptr addrspace(5) %a) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_4xi32_param_0]; -; CHECK-NEXT: ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; +; CHECK-NEXT: ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; ; CHECK-NEXT: add.s32 %r5, %r4, 1; ; CHECK-NEXT: add.s32 %r6, %r3, 1; ; CHECK-NEXT: add.s32 %r7, %r2, 1; ; CHECK-NEXT: add.s32 %r8, %r1, 1; -; CHECK-NEXT: st.local.v4.b32 [%rd1], {%r8, %r7, %r6, %r5}; +; CHECK-NEXT: st.volatile.local.v4.b32 [%rd1], {%r8, %r7, %r6, %r5}; ; CHECK-NEXT: ret; %a.load = load volatile <4 x i32>, ptr addrspace(5) %a %a.add = add <4 x i32> %a.load, @@ -3165,10 +3165,10 @@ define void @local_volatile_2xi64(ptr addrspace(5) %a) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_2xi64_param_0]; -; CHECK-NEXT: ld.local.v2.b64 {%rd2, %rd3}, [%rd1]; +; CHECK-NEXT: ld.volatile.local.v2.b64 {%rd2, %rd3}, [%rd1]; ; CHECK-NEXT: add.s64 %rd4, %rd3, 1; ; CHECK-NEXT: add.s64 %rd5, %rd2, 1; -; CHECK-NEXT: st.local.v2.b64 [%rd1], {%rd5, %rd4}; +; CHECK-NEXT: st.volatile.local.v2.b64 [%rd1], {%rd5, %rd4}; ; CHECK-NEXT: ret; %a.load = load volatile <2 x i64>, ptr addrspace(5) %a %a.add = add <2 x i64> %a.load, @@ -3184,10 +3184,10 @@ define void @local_volatile_2xfloat(ptr addrspace(5) %a) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_2xfloat_param_0]; -; CHECK-NEXT: ld.local.v2.b32 {%r1, %r2}, [%rd1]; +; CHECK-NEXT: ld.volatile.local.v2.b32 {%r1, %r2}, [%rd1]; ; CHECK-NEXT: add.rn.f32 %r3, %r2, 0f3F800000; ; CHECK-NEXT: add.rn.f32 %r4, %r1, 0f3F800000; -; CHECK-NEXT: st.local.v2.b32 [%rd1], {%r4, %r3}; +; CHECK-NEXT: st.volatile.local.v2.b32 [%rd1], {%r4, %r3}; ; CHECK-NEXT: ret; %a.load = load volatile <2 x float>, ptr addrspace(5) %a %a.add = fadd <2 x float> %a.load, @@ -3203,12 +3203,12 @@ define void @local_volatile_4xfloat(ptr addrspace(5) %a) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_4xfloat_param_0]; -; CHECK-NEXT: ld.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; +; CHECK-NEXT: ld.volatile.local.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; ; CHECK-NEXT: add.rn.f32 %r5, %r4, 0f3F800000; ; CHECK-NEXT: add.rn.f32 %r6, %r3, 0f3F800000; ; CHECK-NEXT: add.rn.f32 %r7, %r2, 0f3F800000; ; CHECK-NEXT: add.rn.f32 %r8, %r1, 0f3F800000; -; CHECK-NEXT: st.local.v4.b32 [%rd1], {%r8, %r7, %r6, %r5}; +; CHECK-NEXT: st.volatile.local.v4.b32 [%rd1], {%r8, %r7, %r6, %r5}; ; CHECK-NEXT: ret; %a.load = load volatile <4 x float>, ptr addrspace(5) %a %a.add = fadd <4 x float> %a.load, @@ -3223,10 +3223,10 @@ define void @local_volatile_2xdouble(ptr addrspace(5) %a) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [local_volatile_2xdouble_param_0]; -; CHECK-NEXT: ld.local.v2.b64 {%rd2, %rd3}, [%rd1]; +; CHECK-NEXT: ld.volatile.local.v2.b64 {%rd2, %rd3}, [%rd1]; ; CHECK-NEXT: add.rn.f64 %rd4, %rd3, 0d3FF0000000000000; ; CHECK-NEXT: add.rn.f64 %rd5, %rd2, 0d3FF0000000000000; -; CHECK-NEXT: st.local.v2.b64 [%rd1], {%rd5, %rd4}; +; CHECK-NEXT: st.volatile.local.v2.b64 [%rd1], {%rd5, %rd4}; ; CHECK-NEXT: ret; %a.load = load volatile <2 x double>, ptr addrspace(5) %a %a.add = fadd <2 x double> %a.load, diff --git a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll index 5c3017310d0a3..5b8018d8b32a7 100644 --- a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll +++ b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll @@ -18,7 +18,7 @@ define void @foo(i32 %a) { ; PTX32-NEXT: mov.b32 %SPL, __local_depot0; ; PTX32-NEXT: ld.param.b32 %r1, [foo_param_0]; ; PTX32-NEXT: add.u32 %r3, %SPL, 0; -; PTX32-NEXT: st.local.b32 [%r3], %r1; +; PTX32-NEXT: st.volatile.local.b32 [%r3], %r1; ; PTX32-NEXT: ret; ; ; PTX64-LABEL: foo( @@ -33,7 +33,7 @@ define void @foo(i32 %a) { ; PTX64-NEXT: mov.b64 %SPL, __local_depot0; ; PTX64-NEXT: ld.param.b32 %r1, [foo_param_0]; ; PTX64-NEXT: add.u64 %rd2, %SPL, 0; -; PTX64-NEXT: st.local.b32 [%rd2], %r1; +; PTX64-NEXT: st.volatile.local.b32 [%rd2], %r1; ; PTX64-NEXT: ret; %local = alloca i32, align 4 store volatile i32 %a, ptr %local