Skip to content

Commit f92db6d

Browse files
committed
[HIP] Relax conditions for address space cast in builtin args
Allow (implicit) address space casting between LLVM-equivalent target address spaces. Reviewed By: yaxunl, tra Differential Revision: https://reviews.llvm.org/D111734
1 parent 2a2432e commit f92db6d

File tree

3 files changed

+39
-3
lines changed

3 files changed

+39
-3
lines changed

clang/lib/Sema/SemaExpr.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6545,9 +6545,13 @@ ExprResult Sema::BuildCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc,
65456545
auto ArgPtTy = ArgTy->getPointeeType();
65466546
auto ArgAS = ArgPtTy.getAddressSpace();
65476547

6548-
// Only allow implicit casting from a non-default address space pointee
6549-
// type to a default address space pointee type
6550-
if (ArgAS != LangAS::Default || ParamAS == LangAS::Default)
6548+
// Add address space cast if target address spaces are different
6549+
bool NeedImplicitASC =
6550+
ParamAS != LangAS::Default && // Pointer params in generic AS don't need special handling.
6551+
( ArgAS == LangAS::Default || // We do allow implicit conversion from generic AS
6552+
// or from specific AS which has target AS matching that of Param.
6553+
getASTContext().getTargetAddressSpace(ArgAS) == getASTContext().getTargetAddressSpace(ParamAS));
6554+
if (!NeedImplicitASC)
65516555
continue;
65526556

65536557
// First, ensure that the Arg is an RValue.
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx90a -x hip \
2+
// RUN: -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device -emit-llvm %s \
3+
// RUN: -o - | FileCheck %s
4+
5+
#define __device__ __attribute__((device))
6+
typedef __attribute__((address_space(3))) float *LP;
7+
8+
// CHECK-LABEL: test_ds_atomic_add_f32
9+
// CHECK: %[[ADDR_ADDR:.*]] = alloca float*, align 8, addrspace(5)
10+
// CHECK: %[[ADDR_ADDR_ASCAST_PTR:.*]] = addrspacecast float* addrspace(5)* %[[ADDR_ADDR]] to float**
11+
// CHECK: store float* %addr, float** %[[ADDR_ADDR_ASCAST_PTR]], align 8
12+
// CHECK: %[[ADDR_ADDR_ASCAST:.*]] = load float*, float** %[[ADDR_ADDR_ASCAST_PTR]], align 8
13+
// CHECK: %[[AS_CAST:.*]] = addrspacecast float* %[[ADDR_ADDR_ASCAST]] to float addrspace(3)*
14+
// CHECK: %3 = call contract float @llvm.amdgcn.ds.fadd.f32(float addrspace(3)* %[[AS_CAST]]
15+
// CHECK: %4 = load float*, float** %rtn.ascast, align 8
16+
// CHECK: store float %3, float* %4, align 4
17+
__device__ void test_ds_atomic_add_f32(float *addr, float val) {
18+
float *rtn;
19+
*rtn = __builtin_amdgcn_ds_faddf((LP)addr, val, 0, 0, 0);
20+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx90a -x hip \
2+
// RUN: -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device %s \
3+
// RUN: -fsyntax-only -verify
4+
// expected-no-diagnostics
5+
6+
#define __device__ __attribute__((device))
7+
typedef __attribute__((address_space(3))) float *LP;
8+
9+
__device__ void test_ds_atomic_add_f32(float *addr, float val) {
10+
float *rtn;
11+
*rtn = __builtin_amdgcn_ds_faddf((LP)addr, val, 0, 0, 0);
12+
}

0 commit comments

Comments
 (0)