1- From 177cce531fd3665bb964a03db51890e0241e3e72 Mon Sep 17 00:00:00 2001
1+ From e80206b25bfc4120351bc7c42ac856d6b7257f01 Mon Sep 17 00:00:00 2001
22From: Alexey Sotkin <
[email protected] >
33Date: Thu, 21 Feb 2019 17:14:36 +0300
44Subject: [PATCH] Update LowerOpenCL pass to handle new blocks represntation in
@@ -9,8 +9,8 @@ Subject: [PATCH] Update LowerOpenCL pass to handle new blocks represntation in
99 test/global_block.ll | 71 ++++-----
1010 test/literal-struct.ll | 31 ++--
1111 test/transcoding/block_w_struct_return.ll | 47 +++---
12- test/transcoding/enqueue_kernel.ll | 237 ++++++++++++++++------------
13- 5 files changed, 235 insertions(+), 400 deletions(-)
12+ test/transcoding/enqueue_kernel.ll | 248 ++++++++++++++++- ------------
13+ 5 files changed, 235 insertions(+), 411 deletions(-)
1414
1515diff --git a/lib/SPIRV/SPIRVLowerOCLBlocks.cpp b/lib/SPIRV/SPIRVLowerOCLBlocks.cpp
1616index c80bf04..b42a4ec 100644
@@ -602,10 +602,10 @@ index a68820f..ebd2c5f 100644
602602+ !5 = !{!"int*"}
603603+ !6 = !{!""}
604604diff --git a/test/transcoding/enqueue_kernel.ll b/test/transcoding/enqueue_kernel.ll
605- index 1f0b360..761043e 100644
605+ index 23b230a..c164d37 100644
606606--- a/test/transcoding/enqueue_kernel.ll
607607+++ b/test/transcoding/enqueue_kernel.ll
608- @@ -51 ,11 +51 ,12 @@
608+ @@ -57 ,11 +57 ,12 @@
609609 ; ModuleID = 'enqueue_kernel.cl'
610610 source_filename = "enqueue_kernel.cl"
611611 target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
@@ -619,7 +619,7 @@ index 1f0b360..761043e 100644
619619
620620 ; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer1:[0-9]+]] "__device_side_enqueue_block_invoke_kernel"
621621 ; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer2:[0-9]+]] "__device_side_enqueue_block_invoke_2_kernel"
622- @@ -66 ,89 +67 ,123 @@ target triple = "spir-unknown-unknown"
622+ @@ -73 ,89 +74 ,123 @@ target triple = "spir-unknown-unknown"
623623
624624 ; CHECK-SPIRV: TypeInt [[Int32Ty:[0-9]+]] 32
625625 ; CHECK-SPIRV: TypeInt [[Int8Ty:[0-9]+]] 8
@@ -649,10 +649,10 @@ index 1f0b360..761043e 100644
649649- ; CHECK-LLVM: [[BlockTy2:%[0-9a-z\.]+]] = type <{ i32, i32, i32 addrspace(1)*, i32, i8 }>
650650- ; CHECK-LLVM: [[BlockTy3:%[0-9a-z\.]+]] = type <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>
651651- ; CHECK-LLVM: [[BlockTy4:%[0-9a-z\.]+]] = type <{ i32, i32 }>
652- + ; CHECK-LLVM: [[BlockTy1:%[0-9a-z\. ]+]] = type { i32, i32, i8 addrspace(4)* }
653- + ; CHECK-LLVM: [[BlockTy2:%[0-9a-z\. ]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>
654- + ; CHECK-LLVM: [[BlockTy3:%[0-9a-z\. ]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>
655- + ; CHECK-LLVM: [[BlockTy4:%[0-9a-z\. ]+]] = type <{ i32, i32, i8 addrspace(4)* }>
652+ + ; CHECK-LLVM: [[BlockTy1:%[0-9 ]+]] = type { i32, i32, i8 addrspace(4)* }
653+ + ; CHECK-LLVM: [[BlockTy2:%[0-9 ]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>
654+ + ; CHECK-LLVM: [[BlockTy3:%[0-9 ]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>
655+ + ; CHECK-LLVM: [[BlockTy4:%[0-9 ]+]] = type <{ i32, i32, i8 addrspace(4)* }>
656656
657657- ; CHECK-LLVM: @__block_literal_global = internal addrspace(1) constant [[BlockTy1]] { i32 8, i32 4 }, align 4
658658- ; CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant [[BlockTy1]] { i32 8, i32 4 }, align 4
@@ -740,7 +740,7 @@ index 1f0b360..761043e 100644
740740+ ; CHECK-LLVM: [[Block2:%[0-9]+]] = bitcast [[BlockTy2]]* %block to %struct.__opencl_block_literal_generic*
741741+ ; CHECK-LLVM: [[Block2Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block2]] to i8 addrspace(4)*
742742 ; CHECK-LLVM: [[BlockInv2:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8 addrspace(4)*
743- - ; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events (%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null , i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2]])
743+ - ; CHECK-LLVM: call i32 @__enqueue_kernel_basic (%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2]])
744744-
745745- %5 = call i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* byval %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %4)
746746- %6 = addrspacecast %opencl.clk_event_t** %event_wait_list to %opencl.clk_event_t* addrspace(4)*
@@ -787,7 +787,7 @@ index 1f0b360..761043e 100644
787787
788788 ; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event1:[0-9]+]]
789789 ; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event2:[0-9]+]]
790- @@ -158 ,16 +193 ,24 @@ entry:
790+ @@ -165 ,16 +200 ,24 @@ entry:
791791 ; [[ConstInt2]] [[Event1]] [[Event2]]
792792 ; [[BlockKer2]] [[BlockLit2]] [[ConstInt20]] [[ConstInt8]]
793793
@@ -821,7 +821,7 @@ index 1f0b360..761043e 100644
821821
822822 ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf31:[0-9]+]]
823823 ; CHECK-SPIRV: Bitcast {{[0-9]+}} [[BlockLit3Tmp:[0-9]+]] [[BlockGlb1:[0-9]+]]
824- @@ -182 ,14 +225 ,18 @@ entry:
824+ @@ -189 ,14 +232 ,18 @@ entry:
825825 ; CHECK-LLVM: [[BlockInv0:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8 addrspace(4)*
826826 ; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv0]], i8 addrspace(4)* [[Block0]], i32 1, i32* {{.*}})
827827
@@ -848,11 +848,22 @@ index 1f0b360..761043e 100644
848848
849849 ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf41:[0-9]+]]
850850 ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf42:[0-9]+]]
851- @@ -206,24 +253 ,27 @@ entry:
851+ @@ -213,35 +260 ,27 @@ entry:
852852 ; CHECK-LLVM: [[BlockInv1:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8 addrspace(4)*
853- ; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs (%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null , i8 addrspace(4)* [[BlockInv1]], i8 addrspace(4)* [[Block1]], i32 3, i32* {{.*}})
853+ ; CHECK-LLVM: call i32 @__enqueue_kernel_varargs (%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i8 addrspace(4)* [[BlockInv1]], i8 addrspace(4)* [[Block1]], i32 3, i32* {{.*}})
854854
855855- %20 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %17)
856+ -
857+ - ; CHECK-SPIRV: PtrCastToGeneric [[Int8PtrGenTy]] [[BlockLit2:[0-9]+]]
858+ - ; CHECK-SPIRV: EnqueueKernel [[Int32Ty]] {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} {{[0-9]+}}
859+ - ; [[ConstInt0]] [[EventNull]] [[Event1]]
860+ - ; [[BlockKer5]] [[BlockLit5]] [[ConstInt20]] [[ConstInt8]]
861+ -
862+ - ; CHECK-LLVM: [[BlockInv5:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_5_kernel to i8 addrspace(4)*
863+ - ; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv5]], i8 addrspace(4)* [[Block3]])
864+ -
865+ - %21 = call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* %7, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_5_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %9)
866+ -
856867+ %40 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %33, i32 %34, %struct.ndrange_t* %tmp12, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %37)
857868 ret void
858869 }
@@ -883,7 +894,7 @@ index 1f0b360..761043e 100644
883894 %2 = load i32, i32 addrspace(4)* %block.capture.addr2, align 4
884895 %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 %2
885896 store i32 %conv, i32 addrspace(1)* %arrayidx, align 4
886- @@ -243 ,19 +293 ,19 @@ declare i32 @__enqueue_kernel_basic(%opencl.queue_t*, i32, %struct.ndrange_t*, i
897+ @@ -261 ,19 +300 ,19 @@ declare i32 @__enqueue_kernel_basic(%opencl.queue_t*, i32, %struct.ndrange_t*, i
887898 define internal spir_func void @__device_side_enqueue_block_invoke_2(i8 addrspace(4)* %.block_descriptor) #2 {
888899 entry:
889900 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
@@ -910,7 +921,7 @@ index 1f0b360..761043e 100644
910921 %4 = load i32, i32 addrspace(4)* %block.capture.addr3, align 4
911922 %arrayidx4 = getelementptr inbounds i32, i32 addrspace(1)* %3, i32 %4
912923 store i32 %2, i32 addrspace(1)* %arrayidx4, align 4
913- @@ -276 ,11 +326 ,11 @@ define internal spir_func void @__device_side_enqueue_block_invoke_3(i8 addrspac
924+ @@ -294 ,11 +333 ,11 @@ define internal spir_func void @__device_side_enqueue_block_invoke_3(i8 addrspac
914925 entry:
915926 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4
916927 %p.addr = alloca i8 addrspace(3)*, align 4
@@ -925,7 +936,7 @@ index 1f0b360..761043e 100644
925936 ret void
926937 }
927938
928- @@ -300 ,13 +350 ,13 @@ entry:
939+ @@ -318 ,13 +357 ,13 @@ entry:
929940 %p1.addr = alloca i8 addrspace(3)*, align 4
930941 %p2.addr = alloca i8 addrspace(3)*, align 4
931942 %p3.addr = alloca i8 addrspace(3)*, align 4
@@ -942,9 +953,9 @@ index 1f0b360..761043e 100644
942953 ret void
943954 }
944955
945- @@ -329,27 +379,20 @@ declare i32 @__enqueue_kernel_varargs(%opencl.queue_t*, i32, %struct.ndrange_t*,
946- ; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_3_kernel(i8 addrspace(4)*, i8 addrspace(3)*)
956+ @@ -379,27 +418,20 @@ entry:
947957 ; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_4_kernel(i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)
958+ ; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_5_kernel(i8 addrspace(4)*)
948959
949960- attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
950961+ attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
0 commit comments