@@ -1776,6 +1776,67 @@ entry:
1776
1776
ret void
1777
1777
}
1778
1778
1779
+ define amdgpu_kernel void @memmove_volatile (ptr addrspace (1 ) %dst , ptr addrspace (1 ) %src ) #0 {
1780
+ ; MAX1024-LABEL: @memmove_volatile(
1781
+ ; MAX1024-NEXT: call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) [[DST:%.*]], ptr addrspace(1) [[SRC:%.*]], i64 64, i1 true)
1782
+ ; MAX1024-NEXT: ret void
1783
+ ;
1784
+ ; ALL-LABEL: @memmove_volatile(
1785
+ ; ALL-NEXT: [[COMPARE_SRC_DST:%.*]] = icmp ult ptr addrspace(1) [[SRC:%.*]], [[DST:%.*]]
1786
+ ; ALL-NEXT: [[COMPARE_N_TO_0:%.*]] = icmp eq i64 64, 0
1787
+ ; ALL-NEXT: br i1 [[COMPARE_SRC_DST]], label [[COPY_BACKWARDS:%.*]], label [[COPY_FORWARD:%.*]]
1788
+ ; ALL: copy_backwards:
1789
+ ; ALL-NEXT: br i1 [[COMPARE_N_TO_0]], label [[MEMMOVE_DONE:%.*]], label [[COPY_BACKWARDS_LOOP:%.*]]
1790
+ ; ALL: copy_backwards_loop:
1791
+ ; ALL-NEXT: [[TMP1:%.*]] = phi i64 [ [[INDEX_PTR:%.*]], [[COPY_BACKWARDS_LOOP]] ], [ 64, [[COPY_BACKWARDS]] ]
1792
+ ; ALL-NEXT: [[INDEX_PTR]] = sub i64 [[TMP1]], 1
1793
+ ; ALL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[INDEX_PTR]]
1794
+ ; ALL-NEXT: [[ELEMENT:%.*]] = load volatile i8, ptr addrspace(1) [[TMP2]], align 1
1795
+ ; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[INDEX_PTR]]
1796
+ ; ALL-NEXT: store volatile i8 [[ELEMENT]], ptr addrspace(1) [[TMP3]], align 1
1797
+ ; ALL-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_PTR]], 0
1798
+ ; ALL-NEXT: br i1 [[TMP4]], label [[MEMMOVE_DONE]], label [[COPY_BACKWARDS_LOOP]]
1799
+ ; ALL: copy_forward:
1800
+ ; ALL-NEXT: br i1 [[COMPARE_N_TO_0]], label [[MEMMOVE_DONE]], label [[COPY_FORWARD_LOOP:%.*]]
1801
+ ; ALL: copy_forward_loop:
1802
+ ; ALL-NEXT: [[INDEX_PTR1:%.*]] = phi i64 [ [[INDEX_INCREMENT:%.*]], [[COPY_FORWARD_LOOP]] ], [ 0, [[COPY_FORWARD]] ]
1803
+ ; ALL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[INDEX_PTR1]]
1804
+ ; ALL-NEXT: [[ELEMENT2:%.*]] = load volatile i8, ptr addrspace(1) [[TMP5]], align 1
1805
+ ; ALL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[INDEX_PTR1]]
1806
+ ; ALL-NEXT: store volatile i8 [[ELEMENT2]], ptr addrspace(1) [[TMP6]], align 1
1807
+ ; ALL-NEXT: [[INDEX_INCREMENT]] = add i64 [[INDEX_PTR1]], 1
1808
+ ; ALL-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_INCREMENT]], 64
1809
+ ; ALL-NEXT: br i1 [[TMP7]], label [[MEMMOVE_DONE]], label [[COPY_FORWARD_LOOP]]
1810
+ ; ALL: memmove_done:
1811
+ ; ALL-NEXT: ret void
1812
+ ;
1813
+ call void @llvm.memmove.p1.p1.i64 (ptr addrspace (1 ) %dst , ptr addrspace (1 ) %src , i64 64 , i1 true )
1814
+ ret void
1815
+ }
1816
+
1817
+ define amdgpu_kernel void @memcpy_volatile (ptr addrspace (1 ) %dst , ptr addrspace (1 ) %src ) #0 {
1818
+ ; MAX1024-LABEL: @memcpy_volatile(
1819
+ ; MAX1024-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) [[DST:%.*]], ptr addrspace(1) [[SRC:%.*]], i64 64, i1 true)
1820
+ ; MAX1024-NEXT: ret void
1821
+ ;
1822
+ ; ALL-LABEL: @memcpy_volatile(
1823
+ ; ALL-NEXT: br label [[LOAD_STORE_LOOP:%.*]]
1824
+ ; ALL: load-store-loop:
1825
+ ; ALL-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
1826
+ ; ALL-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
1827
+ ; ALL-NEXT: [[TMP2:%.*]] = load volatile <4 x i32>, ptr addrspace(1) [[TMP1]], align 1
1828
+ ; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
1829
+ ; ALL-NEXT: store volatile <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 1
1830
+ ; ALL-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 1
1831
+ ; ALL-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 4
1832
+ ; ALL-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
1833
+ ; ALL: memcpy-split:
1834
+ ; ALL-NEXT: ret void
1835
+ ;
1836
+ call void @llvm.memcpy.p1.p1.i64 (ptr addrspace (1 ) %dst , ptr addrspace (1 ) %src , i64 64 , i1 true )
1837
+ ret void
1838
+ }
1839
+
1779
1840
declare i64 @llvm.umin.i64 (i64 , i64 )
1780
1841
1781
1842
attributes #0 = { nounwind }
0 commit comments