Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions clang/lib/CodeGen/CGDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1563,11 +1563,10 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
// The named return value optimization: allocate this variable in the
// return slot, so that we can elide the copy when returning this
// variable (C++0x [class.copy]p34).
address = ReturnValue;
AllocaAddr =
RawAddress(ReturnValue.emitRawPointer(*this),
ReturnValue.getElementType(), ReturnValue.getAlignment());
;
address = MaybeCastAllocaAddressSpace(AllocaAddr, Ty.getAddressSpace());

if (const RecordType *RecordTy = Ty->getAs<RecordType>()) {
const auto *RD = RecordTy->getOriginalDecl()->getDefinitionOrSelf();
Expand Down
23 changes: 15 additions & 8 deletions clang/lib/CodeGen/CGExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,9 @@ CodeGenFunction::CreateTempAllocaWithoutCast(llvm::Type *Ty, CharUnits Align,
return RawAddress(Alloca, Ty, Align, KnownNonNull);
}

RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, LangAS DestLangAS,
CharUnits Align, const Twine &Name,
llvm::Value *ArraySize,
RawAddress *AllocaAddr) {
RawAddress Alloca = CreateTempAllocaWithoutCast(Ty, Align, Name, ArraySize);
if (AllocaAddr)
*AllocaAddr = Alloca;
RawAddress CodeGenFunction::MaybeCastAllocaAddressSpace(
RawAddress Alloca, LangAS DestLangAS, llvm::Value *ArraySize) {

llvm::Value *V = Alloca.getPointer();
// Alloca always returns a pointer in alloca address space, which may
// be different from the type defined by the language. For example,
Expand All @@ -134,7 +130,18 @@ RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, LangAS DestLangAS,
/*IsNonNull=*/true);
}

return RawAddress(V, Ty, Align, KnownNonNull);
return RawAddress(V, Alloca.getElementType(), Alloca.getAlignment(),
KnownNonNull);
}

RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, LangAS DestLangAS,
CharUnits Align, const Twine &Name,
llvm::Value *ArraySize,
RawAddress *AllocaAddr) {
RawAddress Alloca = CreateTempAllocaWithoutCast(Ty, Align, Name, ArraySize);
if (AllocaAddr)
*AllocaAddr = Alloca;
return MaybeCastAllocaAddressSpace(Alloca, DestLangAS, ArraySize);
}

/// CreateTempAlloca - This creates an alloca and inserts it into the entry
Expand Down
7 changes: 7 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -2804,6 +2804,13 @@ class CodeGenFunction : public CodeGenTypeCache {
AllocaTracker Tracker;
};

private:
/// If \p Alloca is not in the same address space as \p DestLangAS, insert an
/// address space cast and return a new RawAddress based on this value.
RawAddress MaybeCastAllocaAddressSpace(RawAddress Alloca, LangAS DestLangAS,
llvm::Value *ArraySize = nullptr);

public:
/// CreateTempAlloca - This creates an alloca and inserts it into the entry
/// block if \p ArraySize is nullptr, otherwise inserts it at the current
/// insertion point of the builder. The caller is responsible for setting an
Expand Down
7 changes: 3 additions & 4 deletions clang/test/CodeGenCXX/sret_cast_with_nonzero_alloca_as.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,15 @@ struct X { int z[17]; };
// CHECK-NEXT: [[Y_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
// CHECK-NEXT: [[Y_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[Y_ADDR]] to ptr
// CHECK-NEXT: [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr
// CHECK-NEXT: store i8 [[X]], ptr [[X_ADDR_ASCAST]], align 1
// CHECK-NEXT: store i8 [[Y]], ptr [[Y_ADDR_ASCAST]], align 1
// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[X_ADDR_ASCAST]], align 1
// CHECK-NEXT: [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr
// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT_ASCAST]], i64 1
// CHECK-NEXT: store i8 [[TMP0]], ptr [[ADD_PTR]], align 1
// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[Y_ADDR_ASCAST]], align 1
// CHECK-NEXT: [[AGG_RESULT_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr
// CHECK-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT_ASCAST1]], i64 2
// CHECK-NEXT: store i8 [[TMP1]], ptr [[ADD_PTR2]], align 1
// CHECK-NEXT: [[ADD_PTR1:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT_ASCAST]], i64 2
// CHECK-NEXT: store i8 [[TMP1]], ptr [[ADD_PTR1]], align 1
// CHECK-NEXT: ret void
//
X foo(char x, char y) {
Expand Down
46 changes: 46 additions & 0 deletions clang/test/CodeGenHIP/store-addr-space.hip
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --functions "bar" --version 5
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm -fcuda-is-device \
// RUN: -o - %s | FileCheck --check-prefix=AMDGCN --enable-var-scope %s

struct Foo {
unsigned long long val;
//
__attribute__((device)) inline Foo() { val = 0; }
__attribute__((device)) inline Foo(const Foo &src) { val = src.val; }
__attribute__((device)) inline Foo(const volatile Foo &src) { val = src.val; }
};

// AMDGCN-LABEL: define dso_local void @_Z3barPK3Foo(
// AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_FOO:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef [[SRC_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// AMDGCN-NEXT: [[ENTRY:.*:]]
// AMDGCN-NEXT: [[RESULT_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
// AMDGCN-NEXT: [[SRC_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// AMDGCN-NEXT: [[DST:%.*]] = alloca [[UNION_ANON:%.*]], align 8, addrspace(5)
// AMDGCN-NEXT: [[RESULT_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RESULT_PTR]] to ptr
// AMDGCN-NEXT: [[SRC_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_PTR_ADDR]] to ptr
// AMDGCN-NEXT: [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr
// AMDGCN-NEXT: [[DST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DST]] to ptr
// AMDGCN-NEXT: store ptr addrspace(5) [[AGG_RESULT]], ptr [[RESULT_PTR_ASCAST]], align 4
// AMDGCN-NEXT: store ptr [[SRC_PTR]], ptr [[SRC_PTR_ADDR_ASCAST]], align 8
// AMDGCN-NEXT: call void @_ZN3FooC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[AGG_RESULT_ASCAST]]) #[[ATTR1:[0-9]+]]
// AMDGCN-NEXT: store ptr [[AGG_RESULT_ASCAST]], ptr [[DST_ASCAST]], align 8
// AMDGCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SRC_PTR_ADDR_ASCAST]], align 8
// AMDGCN-NEXT: [[VAL:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[TMP0]], i32 0, i32 0
// AMDGCN-NEXT: [[TMP1:%.*]] = load i64, ptr [[VAL]], align 8
// AMDGCN-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DST_ASCAST]], align 8
// AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 0
// AMDGCN-NEXT: store i64 [[TMP1]], ptr [[ARRAYIDX]], align 8
// AMDGCN-NEXT: ret void
//
__attribute__((device)) Foo bar(const Foo *const src_ptr) {
Foo result;

union {
Foo* const ptr;
unsigned long long * const ptr64;
} dst = {&result};

dst.ptr64[0] = src_ptr->val;
return result;
}
1 change: 1 addition & 0 deletions clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,7 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
// AMDGCN20-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5)
// AMDGCN20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN20-NEXT: [[IN1:%.*]] = addrspacecast ptr addrspace(5) [[IN]] to ptr
// AMDGCN20-NEXT: [[RETVAL_ASCAST_ASCAST:%.*]] = addrspacecast ptr [[RETVAL_ASCAST]] to ptr addrspace(5)
// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr [[IN1]], i32 0, i32 0
// AMDGCN20-NEXT: store [9 x i32] [[IN_COERCE]], ptr [[COERCE_DIVE]], align 4
// AMDGCN20-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr [[RETVAL_ASCAST]], align 4
Expand Down
1 change: 1 addition & 0 deletions clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
// AMDGCN-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5)
// AMDGCN-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// AMDGCN-NEXT: [[IN1:%.*]] = addrspacecast ptr addrspace(5) [[IN]] to ptr
// AMDGCN-NEXT: [[RETVAL_ASCAST_ASCAST:%.*]] = addrspacecast ptr [[RETVAL_ASCAST]] to ptr addrspace(5)
// AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr [[IN1]], i32 0, i32 0
// AMDGCN-NEXT: store [9 x i32] [[IN_COERCE]], ptr [[COERCE_DIVE]], align 4
// AMDGCN-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr [[RETVAL_ASCAST]], align 4
Expand Down