@@ -48,7 +48,8 @@ define amdgpu_kernel void @memset_global_to_flat_no_md(ptr addrspace(1) %global.
4848define  amdgpu_kernel void  @memcpy_flat_to_flat_replace_src_with_group (ptr  %dest , ptr  addrspace (3 ) %src.group.ptr , i64  %size ) #0  {
4949; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group( 
5050; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] { 
51- ; CHECK-NEXT:    call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]] 
51+ ; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1) 
52+ ; CHECK-NEXT:    call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]] 
5253; CHECK-NEXT:    ret void 
5354; 
5455  %cast.src  = addrspacecast ptr  addrspace (3 ) %src.group.ptr  to  ptr 
@@ -59,7 +60,8 @@ define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(ptr %dest,
5960define  amdgpu_kernel void  @memcpy_inline_flat_to_flat_replace_src_with_group (ptr  %dest , ptr  addrspace (3 ) %src.group.ptr ) #0  {
6061; CHECK-LABEL: define amdgpu_kernel void @memcpy_inline_flat_to_flat_replace_src_with_group( 
6162; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]]) #[[ATTR0]] { 
62- ; CHECK-NEXT:    call void @llvm.memcpy.inline.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 42, i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]] 
63+ ; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1) 
64+ ; CHECK-NEXT:    call void @llvm.memcpy.inline.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 42, i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]] 
6365; CHECK-NEXT:    ret void 
6466; 
6567  %cast.src  = addrspacecast ptr  addrspace (3 ) %src.group.ptr  to  ptr 
@@ -70,7 +72,8 @@ define amdgpu_kernel void @memcpy_inline_flat_to_flat_replace_src_with_group(ptr
7072define  amdgpu_kernel void  @memcpy_flat_to_flat_replace_dest_with_group (ptr  addrspace (3 ) %dest.group.ptr , ptr  %src.ptr , i64  %size ) #0  {
7173; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group( 
7274; CHECK-SAME: ptr addrspace(3) [[DEST_GROUP_PTR:%.*]], ptr [[SRC_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] { 
73- ; CHECK-NEXT:    call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) align 4 [[DEST_GROUP_PTR]], ptr align 4 [[SRC_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]] 
75+ ; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[SRC_PTR]] to ptr addrspace(1) 
76+ ; CHECK-NEXT:    call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) align 4 [[DEST_GROUP_PTR]], ptr addrspace(1) align 4 [[TMP1]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]] 
7477; CHECK-NEXT:    ret void 
7578; 
7679  %cast.dest  = addrspacecast ptr  addrspace (3 ) %dest.group.ptr  to  ptr 
@@ -116,7 +119,8 @@ define amdgpu_kernel void @memcpy_group_to_flat_replace_dest_global(ptr addrspac
116119define  amdgpu_kernel void  @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct (ptr  %dest , ptr  addrspace (3 ) %src.group.ptr , i64  %size ) #0  {
117120; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct( 
118121; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] { 
119- ; CHECK-NEXT:    call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa.struct [[TBAA_STRUCT8:![0-9]+]] 
122+ ; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1) 
123+ ; CHECK-NEXT:    call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa.struct [[TBAA_STRUCT8:![0-9]+]] 
120124; CHECK-NEXT:    ret void 
121125; 
122126  %cast.src  = addrspacecast ptr  addrspace (3 ) %src.group.ptr  to  ptr 
@@ -127,7 +131,8 @@ define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struc
127131define  amdgpu_kernel void  @memcpy_flat_to_flat_replace_src_with_group_no_md (ptr  %dest , ptr  addrspace (3 ) %src.group.ptr , i64  %size ) #0  {
128132; CHECK-LABEL: define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md( 
129133; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] { 
130- ; CHECK-NEXT:    call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false) 
134+ ; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1) 
135+ ; CHECK-NEXT:    call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false) 
131136; CHECK-NEXT:    ret void 
132137; 
133138  %cast.src  = addrspacecast ptr  addrspace (3 ) %src.group.ptr  to  ptr 
@@ -138,8 +143,10 @@ define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(ptr
138143define  amdgpu_kernel void  @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md (ptr  %dest0 , ptr  %dest1 , ptr  addrspace (3 ) %src.group.ptr , i64  %size ) #0  {
139144; CHECK-LABEL: define amdgpu_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md( 
140145; CHECK-SAME: ptr [[DEST0:%.*]], ptr [[DEST1:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] { 
141- ; CHECK-NEXT:    call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST0]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false) 
142- ; CHECK-NEXT:    call void @llvm.memcpy.p0.p3.i64(ptr align 4 [[DEST1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false) 
146+ ; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[DEST0]] to ptr addrspace(1) 
147+ ; CHECK-NEXT:    [[TMP2:%.*]] = addrspacecast ptr [[DEST1]] to ptr addrspace(1) 
148+ ; CHECK-NEXT:    call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false) 
149+ ; CHECK-NEXT:    call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) align 4 [[TMP2]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false) 
143150; CHECK-NEXT:    ret void 
144151; 
145152  %cast.src  = addrspacecast ptr  addrspace (3 ) %src.group.ptr  to  ptr 
@@ -162,7 +169,8 @@ define amdgpu_kernel void @memcpy_group_flat_to_flat_self(ptr addrspace(3) %grou
162169define  amdgpu_kernel void  @memmove_flat_to_flat_replace_src_with_group (ptr  %dest , ptr  addrspace (3 ) %src.group.ptr , i64  %size ) #0  {
163170; CHECK-LABEL: define amdgpu_kernel void @memmove_flat_to_flat_replace_src_with_group( 
164171; CHECK-SAME: ptr [[DEST:%.*]], ptr addrspace(3) [[SRC_GROUP_PTR:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] { 
165- ; CHECK-NEXT:    call void @llvm.memmove.p0.p3.i64(ptr align 4 [[DEST]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]] 
172+ ; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[DEST]] to ptr addrspace(1) 
173+ ; CHECK-NEXT:    call void @llvm.memmove.p1.p3.i64(ptr addrspace(1) align 4 [[TMP1]], ptr addrspace(3) align 4 [[SRC_GROUP_PTR]], i64 [[SIZE]], i1 false), !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META6]] 
166174; CHECK-NEXT:    ret void 
167175; 
168176  %cast.src  = addrspacecast ptr  addrspace (3 ) %src.group.ptr  to  ptr 
0 commit comments