Skip to content
Merged
6 changes: 3 additions & 3 deletions clang/lib/Basic/Targets/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,9 +260,9 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
TargetInfo::adjust(Diags, Opts);
// ToDo: There are still a few places using default address space as private
// address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
// can be removed from the following line.
setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
// address space in OpenCL, which needs to be cleaned up, then the references
// to OpenCL can be removed from the following line.
setAddressSpaceMap((Opts.OpenCL && !Opts.OpenCLGenericAddressSpace) ||
!isAMDGCN(getTriple()));
}

Expand Down
3 changes: 2 additions & 1 deletion clang/lib/CodeGen/CGBlocks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1397,7 +1397,8 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D,
DI->setLocation(D->getLocation());
DI->EmitDeclareOfBlockLiteralArgVariable(
*BlockInfo, D->getName(), argNum,
cast<llvm::AllocaInst>(alloc.getPointer()), Builder);
cast<llvm::AllocaInst>(alloc.getPointer()->stripPointerCasts()),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would hope alloc would be the direct alloca reference to begin with

Builder);
}
}

Expand Down
11 changes: 9 additions & 2 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5857,8 +5857,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
/*IndexTypeQuals=*/0);
auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
llvm::Value *TmpPtr = Tmp.getPointer();
// The EmitLifetime* pair expect a naked Alloca as their last argument,
// however for cases where the default AS is not the Alloca AS, Tmp is
// actually the Alloca ascasted to the default AS, hence the
// stripPointerCasts()
llvm::Value *Alloca = TmpPtr->stripPointerCasts();
llvm::Value *TmpSize = EmitLifetimeStart(
CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), Alloca);
llvm::Value *ElemPtr;
// Each of the following arguments specifies the size of the corresponding
// argument passed to the enqueued block.
Expand All @@ -5874,7 +5879,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Builder.CreateAlignedStore(
V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
}
return std::tie(ElemPtr, TmpSize, TmpPtr);
// Return the Alloca itself rather than a potential ascast as this is only
// used by the paired EmitLifetimeEnd.
return std::tie(ElemPtr, TmpSize, Alloca);
};

// Could have events and/or varargs.
Expand Down
1,521 changes: 1,440 additions & 81 deletions clang/test/CodeGenOpenCL/addr-space-struct-arg.cl

Large diffs are not rendered by default.

126 changes: 85 additions & 41 deletions clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
Original file line number Diff line number Diff line change
@@ -1,67 +1,111 @@
// RUN: %clang_cc1 -O0 -cl-std=CL1.2 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL12 %s
// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL20 %s
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -O0 -cl-std=CL1.2 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CL12 %s
// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CL20 %s

// CL12-LABEL: define{{.*}} void @func1(ptr addrspace(5) noundef %x)
// CL20-LABEL: define{{.*}} void @func1(ptr noundef %x)
// CL12-LABEL: define dso_local void @func1(
// CL12-SAME: ptr addrspace(5) noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] {
// CL12-NEXT: [[ENTRY:.*:]]
// CL12-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
// CL12-NEXT: store ptr addrspace(5) [[X]], ptr addrspace(5) [[X_ADDR]], align 4
// CL12-NEXT: [[TMP0:%.*]] = load ptr addrspace(5), ptr addrspace(5) [[X_ADDR]], align 4
// CL12-NEXT: store i32 1, ptr addrspace(5) [[TMP0]], align 4
// CL12-NEXT: ret void
//
// CL20-LABEL: define dso_local void @func1(
// CL20-SAME: ptr noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] {
// CL20-NEXT: [[ENTRY:.*:]]
// CL20-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CL20-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
// CL20-NEXT: store ptr [[X]], ptr [[X_ADDR_ASCAST]], align 8
// CL20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8
// CL20-NEXT: store i32 1, ptr [[TMP0]], align 4
// CL20-NEXT: ret void
//
void func1(int *x) {
// CL12: %[[x_addr:.*]] = alloca ptr addrspace(5){{.*}}addrspace(5)
// CL12: store ptr addrspace(5) %x, ptr addrspace(5) %[[x_addr]]
// CL12: %[[r0:.*]] = load ptr addrspace(5), ptr addrspace(5) %[[x_addr]]
// CL12: store i32 1, ptr addrspace(5) %[[r0]]
// CL20: %[[x_addr:.*]] = alloca ptr{{.*}}addrspace(5)
// CL20: store ptr %x, ptr addrspace(5) %[[x_addr]]
// CL20: %[[r0:.*]] = load ptr, ptr addrspace(5) %[[x_addr]]
// CL20: store i32 1, ptr %[[r0]]
*x = 1;
}

// CHECK-LABEL: define{{.*}} void @func2()
// CL12-LABEL: define dso_local void @func2(
// CL12-SAME: ) #[[ATTR0]] {
// CL12-NEXT: [[ENTRY:.*:]]
// CL12-NEXT: [[LV1:%.*]] = alloca i32, align 4, addrspace(5)
// CL12-NEXT: [[LV2:%.*]] = alloca i32, align 4, addrspace(5)
// CL12-NEXT: [[LA:%.*]] = alloca [100 x i32], align 4, addrspace(5)
// CL12-NEXT: [[LP1:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
// CL12-NEXT: [[LP2:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
// CL12-NEXT: [[LVC:%.*]] = alloca i32, align 4, addrspace(5)
// CL12-NEXT: store i32 1, ptr addrspace(5) [[LV1]], align 4
// CL12-NEXT: store i32 2, ptr addrspace(5) [[LV2]], align 4
// CL12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) [[LA]], i64 0, i64 0
// CL12-NEXT: store i32 3, ptr addrspace(5) [[ARRAYIDX]], align 4
// CL12-NEXT: store ptr addrspace(5) [[LV1]], ptr addrspace(5) [[LP1]], align 4
// CL12-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) [[LA]], i64 0, i64 0
// CL12-NEXT: store ptr addrspace(5) [[ARRAYDECAY]], ptr addrspace(5) [[LP2]], align 4
// CL12-NEXT: call void @func1(ptr addrspace(5) noundef [[LV1]]) #[[ATTR2:[0-9]+]]
// CL12-NEXT: store i32 4, ptr addrspace(5) [[LVC]], align 4
// CL12-NEXT: store i32 4, ptr addrspace(5) [[LV1]], align 4
// CL12-NEXT: ret void
//
// CL20-LABEL: define dso_local void @func2(
// CL20-SAME: ) #[[ATTR0]] {
// CL20-NEXT: [[ENTRY:.*:]]
// CL20-NEXT: [[LV1:%.*]] = alloca i32, align 4, addrspace(5)
// CL20-NEXT: [[LV2:%.*]] = alloca i32, align 4, addrspace(5)
// CL20-NEXT: [[LA:%.*]] = alloca [100 x i32], align 4, addrspace(5)
// CL20-NEXT: [[LP1:%.*]] = alloca ptr, align 8, addrspace(5)
// CL20-NEXT: [[LP2:%.*]] = alloca ptr, align 8, addrspace(5)
// CL20-NEXT: [[LVC:%.*]] = alloca i32, align 4, addrspace(5)
// CL20-NEXT: [[LV1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LV1]] to ptr
// CL20-NEXT: [[LV2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LV2]] to ptr
// CL20-NEXT: [[LA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LA]] to ptr
// CL20-NEXT: [[LP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP1]] to ptr
// CL20-NEXT: [[LP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP2]] to ptr
// CL20-NEXT: [[LVC_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LVC]] to ptr
// CL20-NEXT: store i32 1, ptr [[LV1_ASCAST]], align 4
// CL20-NEXT: store i32 2, ptr [[LV2_ASCAST]], align 4
// CL20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[LA_ASCAST]], i64 0, i64 0
// CL20-NEXT: store i32 3, ptr [[ARRAYIDX]], align 4
// CL20-NEXT: store ptr [[LV1_ASCAST]], ptr [[LP1_ASCAST]], align 8
// CL20-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[LA_ASCAST]], i64 0, i64 0
// CL20-NEXT: store ptr [[ARRAYDECAY]], ptr [[LP2_ASCAST]], align 8
// CL20-NEXT: call void @func1(ptr noundef [[LV1_ASCAST]]) #[[ATTR2:[0-9]+]]
// CL20-NEXT: store i32 4, ptr [[LVC_ASCAST]], align 4
// CL20-NEXT: store i32 4, ptr [[LV1_ASCAST]], align 4
// CL20-NEXT: ret void
//
void func2(void) {
// CHECK: %lv1 = alloca i32, align 4, addrspace(5)
// CHECK: %lv2 = alloca i32, align 4, addrspace(5)
// CHECK: %la = alloca [100 x i32], align 4, addrspace(5)
// CL12: %lp1 = alloca ptr addrspace(5), align 4, addrspace(5)
// CL12: %lp2 = alloca ptr addrspace(5), align 4, addrspace(5)
// CL20: %lp1 = alloca ptr, align 8, addrspace(5)
// CL20: %lp2 = alloca ptr, align 8, addrspace(5)
// CHECK: %lvc = alloca i32, align 4, addrspace(5)

// CHECK: store i32 1, ptr addrspace(5) %lv1
int lv1;
lv1 = 1;
// CHECK: store i32 2, ptr addrspace(5) %lv2
int lv2 = 2;

// CHECK: %[[arrayidx:.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) %la, i64 0, i64 0
// CHECK: store i32 3, ptr addrspace(5) %[[arrayidx]], align 4
int la[100];
la[0] = 3;

// CL12: store ptr addrspace(5) %lv1, ptr addrspace(5) %lp1, align 4
// CL20: %[[r0:.*]] = addrspacecast ptr addrspace(5) %lv1 to ptr
// CL20: store ptr %[[r0]], ptr addrspace(5) %lp1, align 8
int *lp1 = &lv1;

// CHECK: %[[arraydecay:.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) %la, i64 0, i64 0
// CL12: store ptr addrspace(5) %[[arraydecay]], ptr addrspace(5) %lp2, align 4
// CL20: %[[r1:.*]] = addrspacecast ptr addrspace(5) %[[arraydecay]] to ptr
// CL20: store ptr %[[r1]], ptr addrspace(5) %lp2, align 8
int *lp2 = la;

// CL12: call void @func1(ptr addrspace(5) noundef %lv1)
// CL20: %[[r2:.*]] = addrspacecast ptr addrspace(5) %lv1 to ptr
// CL20: call void @func1(ptr noundef %[[r2]])
func1(&lv1);

// CHECK: store i32 4, ptr addrspace(5) %lvc
// CHECK: store i32 4, ptr addrspace(5) %lv1
const int lvc = 4;
lv1 = lvc;
}

// CHECK-LABEL: define{{.*}} void @func3()
// CHECK: %a = alloca [16 x [1 x float]], align 4, addrspace(5)
// CHECK: call void @llvm.memset.p5.i64(ptr addrspace(5) align 4 %a, i8 0, i64 64, i1 false)
// CL12-LABEL: define dso_local void @func3(
// CL12-SAME: ) #[[ATTR0]] {
// CL12-NEXT: [[ENTRY:.*:]]
// CL12-NEXT: [[A:%.*]] = alloca [16 x [1 x float]], align 4, addrspace(5)
// CL12-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 4 [[A]], i8 0, i64 64, i1 false)
// CL12-NEXT: ret void
//
// CL20-LABEL: define dso_local void @func3(
// CL20-SAME: ) #[[ATTR0]] {
// CL20-NEXT: [[ENTRY:.*:]]
// CL20-NEXT: [[A:%.*]] = alloca [16 x [1 x float]], align 4, addrspace(5)
// CL20-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
// CL20-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A_ASCAST]], i8 0, i64 64, i1 false)
// CL20-NEXT: ret void
//
void func3(void) {
float a[16][1] = {{0.}};
}
Loading
Loading