diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp index 8d3eac6868318..5a6868f96d970 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp @@ -80,10 +80,14 @@ AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA, } else if (const Argument *Arg = dyn_cast(ObjA)) { const Function *F = Arg->getParent(); switch (F->getCallingConv()) { - case CallingConv::AMDGPU_KERNEL: + case CallingConv::AMDGPU_KERNEL: { // In the kernel function, kernel arguments won't alias to (local) // variables in shared or private address space. - return AliasResult::NoAlias; + const auto *ObjB = + getUnderlyingObject(B.Ptr->stripPointerCastsForAliasAnalysis()); + return ObjA != ObjB && isIdentifiedObject(ObjB) ? AliasResult::NoAlias + : AliasResult::MayAlias; + } default: // TODO: In the regular function, if that local variable in the // location B is not captured, that argument pointer won't alias to it diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll index a13eb5c6d085f..6b935a8768d3d 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll @@ -318,3 +318,20 @@ define void @test_9_9(ptr addrspace(9) %p, ptr addrspace(9) %p1) { load i8, ptr addrspace(9) %p1 ret void } + +; CHECK-LABEL: Function: test_kernel_arg_local_ptr +; CHECK: MayAlias: i32 addrspace(3)* %arg, i32 addrspace(3)* %arg1 +; CHECK: MayAlias: i32 addrspace(3)* %arg, i32* %arg2 +; CHECK: MayAlias: i32 addrspace(3)* %arg1, i32* %arg2 +define amdgpu_kernel void @test_kernel_arg_local_ptr(ptr addrspace(3) %arg) { +entry: + %load1 = load i32, ptr addrspace(3) %arg, align 4 + %arg.plus.1 = getelementptr inbounds nuw i8, ptr addrspace(3) %arg, i64 1 + %arg1 = getelementptr inbounds nuw i8, ptr addrspace(3) %arg.plus.1, i64 -1 + %load2 = load i32, ptr addrspace(3) %arg1, align 4 + %arg.plus.4 = getelementptr inbounds nuw i8, ptr addrspace(3) %arg, i64 4 + %acast = addrspacecast ptr addrspace(3) %arg.plus.4 to ptr + %arg2 = getelementptr inbounds i8, ptr %acast, i64 -4 + %load3 = load i32, ptr %arg2, align 4 + ret void +}