|
| 1 | +From 39a3ac0065c23d1e2d55dfd8792cc28a146a4307 Mon Sep 17 00:00:00 2001 |
| 2 | +From: Alexey Bader < [email protected]> |
| 3 | +Date: Tue, 19 Feb 2019 15:19:06 +0000 |
| 4 | +Subject: [PATCH 1/2] [OpenCL] Change type of block pointer for OpenCL |
| 5 | + |
| 6 | +Summary: |
| 7 | + |
| 8 | +For some reason OpenCL blocks in LLVM IR are represented as function pointers. |
| 9 | +These pointers do not point to any real function and never get called. Actually |
| 10 | +they point to some structure, which in turn contains pointer to the real block |
| 11 | +invoke function. |
| 12 | +This patch changes represntation of OpenCL blocks in LLVM IR from function |
| 13 | +pointers to pointers to `%struct.__block_literal_generic`. |
| 14 | +Such representation allows to avoid unnecessary bitcasts and simplifies |
| 15 | +further processing (e.g. translation to SPIR-V ) of the module for targets |
| 16 | +which do not support function pointers. |
| 17 | + |
| 18 | +Patch by: Alexey Sotkin. |
| 19 | + |
| 20 | +Reviewers: Anastasia, yaxunl, svenvh |
| 21 | + |
| 22 | +Reviewed By: Anastasia |
| 23 | + |
| 24 | +Subscribers: alexbatashev, cfe-commits |
| 25 | + |
| 26 | +Tags: #clang |
| 27 | + |
| 28 | +Differential Revision: https://reviews.llvm.org/D58277 |
| 29 | + |
| 30 | +git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@354337 91177308-0d34-0410-b5e6-96231b3b80d8 |
| 31 | +--- |
| 32 | + lib/CodeGen/CodeGenTypes.cpp | 4 +++- |
| 33 | + test/CodeGenOpenCL/blocks.cl | 18 ++++++++---------- |
| 34 | + test/CodeGenOpenCL/cl20-device-side-enqueue.cl | 18 +++++++++--------- |
| 35 | + 3 files changed, 20 insertions(+), 20 deletions(-) |
| 36 | + |
| 37 | +diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp |
| 38 | +index 2acf1ac..93b3ebf 100644 |
| 39 | +--- a/lib/CodeGen/CodeGenTypes.cpp |
| 40 | ++++ b/lib/CodeGen/CodeGenTypes.cpp |
| 41 | +@@ -637,7 +637,9 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { |
| 42 | + |
| 43 | + case Type::BlockPointer: { |
| 44 | + const QualType FTy = cast<BlockPointerType>(Ty)->getPointeeType(); |
| 45 | +- llvm::Type *PointeeType = ConvertTypeForMem(FTy); |
| 46 | ++ llvm::Type *PointeeType = CGM.getLangOpts().OpenCL |
| 47 | ++ ? CGM.getGenericBlockLiteralType() |
| 48 | ++ : ConvertTypeForMem(FTy); |
| 49 | + unsigned AS = Context.getTargetAddressSpace(FTy); |
| 50 | + ResultType = llvm::PointerType::get(PointeeType, AS); |
| 51 | + break; |
| 52 | +diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl |
| 53 | +index 675240c..19aacc3 100644 |
| 54 | +--- a/test/CodeGenOpenCL/blocks.cl |
| 55 | ++++ b/test/CodeGenOpenCL/blocks.cl |
| 56 | +@@ -35,11 +35,10 @@ void foo(){ |
| 57 | + // SPIR: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]], i32 0, i32 3 |
| 58 | + // SPIR: %[[i_value:.*]] = load i32, i32* %i |
| 59 | + // SPIR: store i32 %[[i_value]], i32* %[[block_captured]], |
| 60 | +- // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to i32 ()* |
| 61 | +- // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast i32 ()* %[[blk_ptr]] to i32 () addrspace(4)* |
| 62 | +- // SPIR: store i32 () addrspace(4)* %[[blk_gen_ptr]], i32 () addrspace(4)** %[[block_B:.*]], |
| 63 | +- // SPIR: %[[blk_gen_ptr:.*]] = load i32 () addrspace(4)*, i32 () addrspace(4)** %[[block_B]] |
| 64 | +- // SPIR: %[[block_literal:.*]] = bitcast i32 () addrspace(4)* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)* |
| 65 | ++ // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to %struct.__opencl_block_literal_generic* |
| 66 | ++ // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic* %[[blk_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)* |
| 67 | ++ // SPIR: store %struct.__opencl_block_literal_generic addrspace(4)* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B:.*]], |
| 68 | ++ // SPIR: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic addrspace(4)*, %struct.__opencl_block_literal_generic addrspace(4)** %[[block_B]] |
| 69 | + // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2 |
| 70 | + // SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)* |
| 71 | + // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]] |
| 72 | +@@ -50,11 +49,10 @@ void foo(){ |
| 73 | + // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3 |
| 74 | + // AMDGCN: %[[i_value:.*]] = load i32, i32 addrspace(5)* %i |
| 75 | + // AMDGCN: store i32 %[[i_value]], i32 addrspace(5)* %[[block_captured]], |
| 76 | +- // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to i32 () addrspace(5)* |
| 77 | +- // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast i32 () addrspace(5)* %[[blk_ptr]] to i32 ()* |
| 78 | +- // AMDGCN: store i32 ()* %[[blk_gen_ptr]], i32 ()* addrspace(5)* %[[block_B:.*]], |
| 79 | +- // AMDGCN: %[[blk_gen_ptr:.*]] = load i32 ()*, i32 ()* addrspace(5)* %[[block_B]] |
| 80 | +- // AMDGCN: %[[block_literal:.*]] = bitcast i32 ()* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic* |
| 81 | ++ // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to %struct.__opencl_block_literal_generic addrspace(5)* |
| 82 | ++ // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast %struct.__opencl_block_literal_generic addrspace(5)* %[[blk_ptr]] to %struct.__opencl_block_literal_generic* |
| 83 | ++ // AMDGCN: store %struct.__opencl_block_literal_generic* %[[blk_gen_ptr]], %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B:.*]], |
| 84 | ++ // AMDGCN: %[[block_literal:.*]] = load %struct.__opencl_block_literal_generic*, %struct.__opencl_block_literal_generic* addrspace(5)* %[[block_B]] |
| 85 | + // AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2 |
| 86 | + // AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8* |
| 87 | + // AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]] |
| 88 | +diff --git a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl |
| 89 | +index 4732194..8445016 100644 |
| 90 | +--- a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl |
| 91 | ++++ b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl |
| 92 | +@@ -11,7 +11,7 @@ typedef struct {int a;} ndrange_t; |
| 93 | + |
| 94 | + // For a block global variable, first emit the block literal as a global variable, then emit the block variable itself. |
| 95 | + // COMMON: [[BL_GLOBAL:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* [[INV_G:@[^ ]+]] to i8*) to i8 addrspace(4)*) } |
| 96 | +-// COMMON: @block_G = addrspace(1) constant void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*) |
| 97 | ++// COMMON: @block_G = addrspace(1) constant %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*) |
| 98 | + |
| 99 | + // For anonymous blocks without captures, emit block literals as global variable. |
| 100 | + // COMMON: [[BLG1:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) } |
| 101 | +@@ -77,9 +77,9 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { |
| 102 | + // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue |
| 103 | + // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags |
| 104 | + // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL1:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke |
| 105 | +- // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to void ()* |
| 106 | +- // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to void ()* |
| 107 | +- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)* |
| 108 | ++ // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to %struct.__opencl_block_literal_generic* |
| 109 | ++ // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to %struct.__opencl_block_literal_generic* |
| 110 | ++ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)* |
| 111 | + // COMMON-LABEL: call i32 @__enqueue_kernel_basic( |
| 112 | + // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}}, |
| 113 | + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), |
| 114 | +@@ -95,8 +95,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { |
| 115 | + // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %event_wait_list to %opencl.clk_event_t{{.*}}* addrspace(4)* |
| 116 | + // COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)* |
| 117 | + // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL2:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke |
| 118 | +- // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()* |
| 119 | +- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)* |
| 120 | ++ // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to %struct.__opencl_block_literal_generic* |
| 121 | ++ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[BL]] to i8 addrspace(4)* |
| 122 | + // COMMON-LABEL: call i32 @__enqueue_kernel_basic_events |
| 123 | + // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], |
| 124 | + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), |
| 125 | +@@ -300,13 +300,13 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { |
| 126 | + // Emits global block literal [[BLG8]] and invoke function [[INVG8]]. |
| 127 | + // The full type of these expressions are long (and repeated elsewhere), so we |
| 128 | + // capture it as part of the regex for convenience and clarity. |
| 129 | +- // COMMON: store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %block_A |
| 130 | ++ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %block_A |
| 131 | + void (^const block_A)(void) = ^{ |
| 132 | + return; |
| 133 | + }; |
| 134 | + |
| 135 | + // Emits global block literal [[BLG9]] and invoke function [[INVG9]]. |
| 136 | +- // COMMON: store void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*), void (i8 addrspace(3)*) addrspace(4)** %block_B |
| 137 | ++ // COMMON: store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %block_B |
| 138 | + void (^const block_B)(local void *) = ^(local void *a) { |
| 139 | + return; |
| 140 | + }; |
| 141 | +@@ -346,7 +346,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { |
| 142 | + // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke |
| 143 | + // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue |
| 144 | + // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags |
| 145 | +- // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* {{.*}} to i8 addrspace(4)* |
| 146 | ++ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* {{.*}} to i8 addrspace(4)* |
| 147 | + // COMMON-LABEL: call i32 @__enqueue_kernel_basic( |
| 148 | + // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}}, |
| 149 | + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), |
| 150 | +-- |
| 151 | +1.8.3.1 |
| 152 | + |
0 commit comments