Skip to content

Commit d37e7ec

Browse files
authored
[LowerMemIntrinsics] Respect the volatile argument of llvm.memmove (#97545)
So far, we ignored if a memmove intrinsic is volatile when lowering it to loops in the IR. This change generates volatile loads and stores in this case (similar to how memcpy is handled) and adds tests for volatile memmoves and memcpys.
1 parent 5fd5b8a commit d37e7ec

File tree

2 files changed

+67
-5
lines changed

2 files changed

+67
-5
lines changed

llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -422,10 +422,10 @@ static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr,
422422
LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr");
423423
Value *Element = LoopBuilder.CreateAlignedLoad(
424424
EltTy, LoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, IndexPtr),
425-
PartSrcAlign, "element");
425+
PartSrcAlign, SrcIsVolatile, "element");
426426
LoopBuilder.CreateAlignedStore(
427427
Element, LoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, IndexPtr),
428-
PartDstAlign);
428+
PartDstAlign, DstIsVolatile);
429429
LoopBuilder.CreateCondBr(
430430
LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)),
431431
ExitBB, LoopBB);
@@ -440,10 +440,11 @@ static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr,
440440
IRBuilder<> FwdLoopBuilder(FwdLoopBB);
441441
PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr");
442442
Value *SrcGEP = FwdLoopBuilder.CreateInBoundsGEP(EltTy, SrcAddr, FwdCopyPhi);
443-
Value *FwdElement =
444-
FwdLoopBuilder.CreateAlignedLoad(EltTy, SrcGEP, PartSrcAlign, "element");
443+
Value *FwdElement = FwdLoopBuilder.CreateAlignedLoad(
444+
EltTy, SrcGEP, PartSrcAlign, SrcIsVolatile, "element");
445445
Value *DstGEP = FwdLoopBuilder.CreateInBoundsGEP(EltTy, DstAddr, FwdCopyPhi);
446-
FwdLoopBuilder.CreateAlignedStore(FwdElement, DstGEP, PartDstAlign);
446+
FwdLoopBuilder.CreateAlignedStore(FwdElement, DstGEP, PartDstAlign,
447+
DstIsVolatile);
447448
Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd(
448449
FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment");
449450
FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen),

llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1776,6 +1776,67 @@ entry:
17761776
ret void
17771777
}
17781778

1779+
define amdgpu_kernel void @memmove_volatile(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
1780+
; MAX1024-LABEL: @memmove_volatile(
1781+
; MAX1024-NEXT: call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) [[DST:%.*]], ptr addrspace(1) [[SRC:%.*]], i64 64, i1 true)
1782+
; MAX1024-NEXT: ret void
1783+
;
1784+
; ALL-LABEL: @memmove_volatile(
1785+
; ALL-NEXT: [[COMPARE_SRC_DST:%.*]] = icmp ult ptr addrspace(1) [[SRC:%.*]], [[DST:%.*]]
1786+
; ALL-NEXT: [[COMPARE_N_TO_0:%.*]] = icmp eq i64 64, 0
1787+
; ALL-NEXT: br i1 [[COMPARE_SRC_DST]], label [[COPY_BACKWARDS:%.*]], label [[COPY_FORWARD:%.*]]
1788+
; ALL: copy_backwards:
1789+
; ALL-NEXT: br i1 [[COMPARE_N_TO_0]], label [[MEMMOVE_DONE:%.*]], label [[COPY_BACKWARDS_LOOP:%.*]]
1790+
; ALL: copy_backwards_loop:
1791+
; ALL-NEXT: [[TMP1:%.*]] = phi i64 [ [[INDEX_PTR:%.*]], [[COPY_BACKWARDS_LOOP]] ], [ 64, [[COPY_BACKWARDS]] ]
1792+
; ALL-NEXT: [[INDEX_PTR]] = sub i64 [[TMP1]], 1
1793+
; ALL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[INDEX_PTR]]
1794+
; ALL-NEXT: [[ELEMENT:%.*]] = load volatile i8, ptr addrspace(1) [[TMP2]], align 1
1795+
; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[INDEX_PTR]]
1796+
; ALL-NEXT: store volatile i8 [[ELEMENT]], ptr addrspace(1) [[TMP3]], align 1
1797+
; ALL-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_PTR]], 0
1798+
; ALL-NEXT: br i1 [[TMP4]], label [[MEMMOVE_DONE]], label [[COPY_BACKWARDS_LOOP]]
1799+
; ALL: copy_forward:
1800+
; ALL-NEXT: br i1 [[COMPARE_N_TO_0]], label [[MEMMOVE_DONE]], label [[COPY_FORWARD_LOOP:%.*]]
1801+
; ALL: copy_forward_loop:
1802+
; ALL-NEXT: [[INDEX_PTR1:%.*]] = phi i64 [ [[INDEX_INCREMENT:%.*]], [[COPY_FORWARD_LOOP]] ], [ 0, [[COPY_FORWARD]] ]
1803+
; ALL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[INDEX_PTR1]]
1804+
; ALL-NEXT: [[ELEMENT2:%.*]] = load volatile i8, ptr addrspace(1) [[TMP5]], align 1
1805+
; ALL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[INDEX_PTR1]]
1806+
; ALL-NEXT: store volatile i8 [[ELEMENT2]], ptr addrspace(1) [[TMP6]], align 1
1807+
; ALL-NEXT: [[INDEX_INCREMENT]] = add i64 [[INDEX_PTR1]], 1
1808+
; ALL-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_INCREMENT]], 64
1809+
; ALL-NEXT: br i1 [[TMP7]], label [[MEMMOVE_DONE]], label [[COPY_FORWARD_LOOP]]
1810+
; ALL: memmove_done:
1811+
; ALL-NEXT: ret void
1812+
;
1813+
call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 64, i1 true)
1814+
ret void
1815+
}
1816+
1817+
define amdgpu_kernel void @memcpy_volatile(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
1818+
; MAX1024-LABEL: @memcpy_volatile(
1819+
; MAX1024-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) [[DST:%.*]], ptr addrspace(1) [[SRC:%.*]], i64 64, i1 true)
1820+
; MAX1024-NEXT: ret void
1821+
;
1822+
; ALL-LABEL: @memcpy_volatile(
1823+
; ALL-NEXT: br label [[LOAD_STORE_LOOP:%.*]]
1824+
; ALL: load-store-loop:
1825+
; ALL-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
1826+
; ALL-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
1827+
; ALL-NEXT: [[TMP2:%.*]] = load volatile <4 x i32>, ptr addrspace(1) [[TMP1]], align 1
1828+
; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
1829+
; ALL-NEXT: store volatile <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 1
1830+
; ALL-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 1
1831+
; ALL-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 4
1832+
; ALL-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
1833+
; ALL: memcpy-split:
1834+
; ALL-NEXT: ret void
1835+
;
1836+
call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 64, i1 true)
1837+
ret void
1838+
}
1839+
17791840
declare i64 @llvm.umin.i64(i64, i64)
17801841

17811842
attributes #0 = { nounwind }

0 commit comments

Comments
 (0)