Skip to content

Commit ca91962

Browse files
committed
private is a terrible default AS, and is terrible and wrong for OCL 2.0; fix the latter.
1 parent 34cdd67 commit ca91962

20 files changed

+3355
-800
lines changed

clang/lib/Basic/Targets/AMDGPU.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -260,9 +260,9 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
260260
void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
261261
TargetInfo::adjust(Diags, Opts);
262262
// ToDo: There are still a few places using default address space as private
263-
// address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
264-
// can be removed from the following line.
265-
setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
263+
// address space in OpenCL, which needs to be cleaned up, then the references
264+
// to OpenCL can be removed from the following line.
265+
setAddressSpaceMap((Opts.OpenCL && !Opts.OpenCLGenericAddressSpace) ||
266266
!isAMDGCN(getTriple()));
267267
}
268268

clang/lib/CodeGen/CGBlocks.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1397,7 +1397,8 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D,
13971397
DI->setLocation(D->getLocation());
13981398
DI->EmitDeclareOfBlockLiteralArgVariable(
13991399
*BlockInfo, D->getName(), argNum,
1400-
cast<llvm::AllocaInst>(alloc.getPointer()), Builder);
1400+
cast<llvm::AllocaInst>(alloc.getPointer()->stripPointerCasts()),
1401+
Builder);
14011402
}
14021403
}
14031404

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5858,7 +5858,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
58585858
auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
58595859
llvm::Value *TmpPtr = Tmp.getPointer();
58605860
llvm::Value *TmpSize = EmitLifetimeStart(
5861-
CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
5861+
CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()),
5862+
TmpPtr->stripPointerCasts());
58625863
llvm::Value *ElemPtr;
58635864
// Each of the following arguments specifies the size of the corresponding
58645865
// argument passed to the enqueued block.
@@ -5903,7 +5904,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
59035904
auto Call = RValue::get(
59045905
EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
59055906
if (TmpSize)
5906-
EmitLifetimeEnd(TmpSize, TmpPtr);
5907+
EmitLifetimeEnd(TmpSize, TmpPtr->stripPointerCasts());
59075908
return Call;
59085909
}
59095910
// Any calls now have event arguments passed.

clang/test/CodeGenOpenCL/addr-space-struct-arg.cl

Lines changed: 1440 additions & 81 deletions
Large diffs are not rendered by default.
Lines changed: 85 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,67 +1,111 @@
1-
// RUN: %clang_cc1 -O0 -cl-std=CL1.2 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL12 %s
2-
// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CHECK,CL20 %s
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: %clang_cc1 -O0 -cl-std=CL1.2 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CL12 %s
3+
// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn---amdgizcl -emit-llvm %s -o - | FileCheck -check-prefixes=CL20 %s
34

4-
// CL12-LABEL: define{{.*}} void @func1(ptr addrspace(5) noundef %x)
5-
// CL20-LABEL: define{{.*}} void @func1(ptr noundef %x)
5+
// CL12-LABEL: define dso_local void @func1(
6+
// CL12-SAME: ptr addrspace(5) noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] {
7+
// CL12-NEXT: [[ENTRY:.*:]]
8+
// CL12-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
9+
// CL12-NEXT: store ptr addrspace(5) [[X]], ptr addrspace(5) [[X_ADDR]], align 4
10+
// CL12-NEXT: [[TMP0:%.*]] = load ptr addrspace(5), ptr addrspace(5) [[X_ADDR]], align 4
11+
// CL12-NEXT: store i32 1, ptr addrspace(5) [[TMP0]], align 4
12+
// CL12-NEXT: ret void
13+
//
14+
// CL20-LABEL: define dso_local void @func1(
15+
// CL20-SAME: ptr noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] {
16+
// CL20-NEXT: [[ENTRY:.*:]]
17+
// CL20-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
18+
// CL20-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
19+
// CL20-NEXT: store ptr [[X]], ptr [[X_ADDR_ASCAST]], align 8
20+
// CL20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8
21+
// CL20-NEXT: store i32 1, ptr [[TMP0]], align 4
22+
// CL20-NEXT: ret void
23+
//
624
void func1(int *x) {
7-
// CL12: %[[x_addr:.*]] = alloca ptr addrspace(5){{.*}}addrspace(5)
8-
// CL12: store ptr addrspace(5) %x, ptr addrspace(5) %[[x_addr]]
9-
// CL12: %[[r0:.*]] = load ptr addrspace(5), ptr addrspace(5) %[[x_addr]]
10-
// CL12: store i32 1, ptr addrspace(5) %[[r0]]
11-
// CL20: %[[x_addr:.*]] = alloca ptr{{.*}}addrspace(5)
12-
// CL20: store ptr %x, ptr addrspace(5) %[[x_addr]]
13-
// CL20: %[[r0:.*]] = load ptr, ptr addrspace(5) %[[x_addr]]
14-
// CL20: store i32 1, ptr %[[r0]]
1525
*x = 1;
1626
}
1727

18-
// CHECK-LABEL: define{{.*}} void @func2()
28+
// CL12-LABEL: define dso_local void @func2(
29+
// CL12-SAME: ) #[[ATTR0]] {
30+
// CL12-NEXT: [[ENTRY:.*:]]
31+
// CL12-NEXT: [[LV1:%.*]] = alloca i32, align 4, addrspace(5)
32+
// CL12-NEXT: [[LV2:%.*]] = alloca i32, align 4, addrspace(5)
33+
// CL12-NEXT: [[LA:%.*]] = alloca [100 x i32], align 4, addrspace(5)
34+
// CL12-NEXT: [[LP1:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
35+
// CL12-NEXT: [[LP2:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5)
36+
// CL12-NEXT: [[LVC:%.*]] = alloca i32, align 4, addrspace(5)
37+
// CL12-NEXT: store i32 1, ptr addrspace(5) [[LV1]], align 4
38+
// CL12-NEXT: store i32 2, ptr addrspace(5) [[LV2]], align 4
39+
// CL12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) [[LA]], i64 0, i64 0
40+
// CL12-NEXT: store i32 3, ptr addrspace(5) [[ARRAYIDX]], align 4
41+
// CL12-NEXT: store ptr addrspace(5) [[LV1]], ptr addrspace(5) [[LP1]], align 4
42+
// CL12-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) [[LA]], i64 0, i64 0
43+
// CL12-NEXT: store ptr addrspace(5) [[ARRAYDECAY]], ptr addrspace(5) [[LP2]], align 4
44+
// CL12-NEXT: call void @func1(ptr addrspace(5) noundef [[LV1]]) #[[ATTR2:[0-9]+]]
45+
// CL12-NEXT: store i32 4, ptr addrspace(5) [[LVC]], align 4
46+
// CL12-NEXT: store i32 4, ptr addrspace(5) [[LV1]], align 4
47+
// CL12-NEXT: ret void
48+
//
49+
// CL20-LABEL: define dso_local void @func2(
50+
// CL20-SAME: ) #[[ATTR0]] {
51+
// CL20-NEXT: [[ENTRY:.*:]]
52+
// CL20-NEXT: [[LV1:%.*]] = alloca i32, align 4, addrspace(5)
53+
// CL20-NEXT: [[LV2:%.*]] = alloca i32, align 4, addrspace(5)
54+
// CL20-NEXT: [[LA:%.*]] = alloca [100 x i32], align 4, addrspace(5)
55+
// CL20-NEXT: [[LP1:%.*]] = alloca ptr, align 8, addrspace(5)
56+
// CL20-NEXT: [[LP2:%.*]] = alloca ptr, align 8, addrspace(5)
57+
// CL20-NEXT: [[LVC:%.*]] = alloca i32, align 4, addrspace(5)
58+
// CL20-NEXT: [[LV1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LV1]] to ptr
59+
// CL20-NEXT: [[LV2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LV2]] to ptr
60+
// CL20-NEXT: [[LA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LA]] to ptr
61+
// CL20-NEXT: [[LP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP1]] to ptr
62+
// CL20-NEXT: [[LP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP2]] to ptr
63+
// CL20-NEXT: [[LVC_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LVC]] to ptr
64+
// CL20-NEXT: store i32 1, ptr [[LV1_ASCAST]], align 4
65+
// CL20-NEXT: store i32 2, ptr [[LV2_ASCAST]], align 4
66+
// CL20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[LA_ASCAST]], i64 0, i64 0
67+
// CL20-NEXT: store i32 3, ptr [[ARRAYIDX]], align 4
68+
// CL20-NEXT: store ptr [[LV1_ASCAST]], ptr [[LP1_ASCAST]], align 8
69+
// CL20-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[LA_ASCAST]], i64 0, i64 0
70+
// CL20-NEXT: store ptr [[ARRAYDECAY]], ptr [[LP2_ASCAST]], align 8
71+
// CL20-NEXT: call void @func1(ptr noundef [[LV1_ASCAST]]) #[[ATTR2:[0-9]+]]
72+
// CL20-NEXT: store i32 4, ptr [[LVC_ASCAST]], align 4
73+
// CL20-NEXT: store i32 4, ptr [[LV1_ASCAST]], align 4
74+
// CL20-NEXT: ret void
75+
//
1976
void func2(void) {
20-
// CHECK: %lv1 = alloca i32, align 4, addrspace(5)
21-
// CHECK: %lv2 = alloca i32, align 4, addrspace(5)
22-
// CHECK: %la = alloca [100 x i32], align 4, addrspace(5)
23-
// CL12: %lp1 = alloca ptr addrspace(5), align 4, addrspace(5)
24-
// CL12: %lp2 = alloca ptr addrspace(5), align 4, addrspace(5)
25-
// CL20: %lp1 = alloca ptr, align 8, addrspace(5)
26-
// CL20: %lp2 = alloca ptr, align 8, addrspace(5)
27-
// CHECK: %lvc = alloca i32, align 4, addrspace(5)
28-
29-
// CHECK: store i32 1, ptr addrspace(5) %lv1
3077
int lv1;
3178
lv1 = 1;
32-
// CHECK: store i32 2, ptr addrspace(5) %lv2
3379
int lv2 = 2;
3480

35-
// CHECK: %[[arrayidx:.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) %la, i64 0, i64 0
36-
// CHECK: store i32 3, ptr addrspace(5) %[[arrayidx]], align 4
3781
int la[100];
3882
la[0] = 3;
3983

40-
// CL12: store ptr addrspace(5) %lv1, ptr addrspace(5) %lp1, align 4
41-
// CL20: %[[r0:.*]] = addrspacecast ptr addrspace(5) %lv1 to ptr
42-
// CL20: store ptr %[[r0]], ptr addrspace(5) %lp1, align 8
4384
int *lp1 = &lv1;
4485

45-
// CHECK: %[[arraydecay:.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) %la, i64 0, i64 0
46-
// CL12: store ptr addrspace(5) %[[arraydecay]], ptr addrspace(5) %lp2, align 4
47-
// CL20: %[[r1:.*]] = addrspacecast ptr addrspace(5) %[[arraydecay]] to ptr
48-
// CL20: store ptr %[[r1]], ptr addrspace(5) %lp2, align 8
4986
int *lp2 = la;
5087

51-
// CL12: call void @func1(ptr addrspace(5) noundef %lv1)
52-
// CL20: %[[r2:.*]] = addrspacecast ptr addrspace(5) %lv1 to ptr
53-
// CL20: call void @func1(ptr noundef %[[r2]])
5488
func1(&lv1);
5589

56-
// CHECK: store i32 4, ptr addrspace(5) %lvc
57-
// CHECK: store i32 4, ptr addrspace(5) %lv1
5890
const int lvc = 4;
5991
lv1 = lvc;
6092
}
6193

62-
// CHECK-LABEL: define{{.*}} void @func3()
63-
// CHECK: %a = alloca [16 x [1 x float]], align 4, addrspace(5)
64-
// CHECK: call void @llvm.memset.p5.i64(ptr addrspace(5) align 4 %a, i8 0, i64 64, i1 false)
94+
// CL12-LABEL: define dso_local void @func3(
95+
// CL12-SAME: ) #[[ATTR0]] {
96+
// CL12-NEXT: [[ENTRY:.*:]]
97+
// CL12-NEXT: [[A:%.*]] = alloca [16 x [1 x float]], align 4, addrspace(5)
98+
// CL12-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 4 [[A]], i8 0, i64 64, i1 false)
99+
// CL12-NEXT: ret void
100+
//
101+
// CL20-LABEL: define dso_local void @func3(
102+
// CL20-SAME: ) #[[ATTR0]] {
103+
// CL20-NEXT: [[ENTRY:.*:]]
104+
// CL20-NEXT: [[A:%.*]] = alloca [16 x [1 x float]], align 4, addrspace(5)
105+
// CL20-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
106+
// CL20-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A_ASCAST]], i8 0, i64 64, i1 false)
107+
// CL20-NEXT: ret void
108+
//
65109
void func3(void) {
66110
float a[16][1] = {{0.}};
67111
}

0 commit comments

Comments
 (0)