|
5 | 5 | ; are properly handled by LowerWGScope pass. Check that WG-shared local "shadow" variables are created |
6 | 6 | ; and before each PFWI invocation leader WI stores its private copy of the variable into the shadow, |
7 | 7 | ; then all WIs load the shadow value into their private copies ("materialize" the private copy). |
| 8 | +; Also check that an indirect call to a function marked with parallel_for_work_item is treated |
| 9 | +; the same as a direct call. |
8 | 10 |
|
9 | 11 | %struct.bar = type { i8 } |
10 | 12 | %struct.zot = type { %struct.widget, %struct.widget, %struct.widget, %struct.foo } |
@@ -54,6 +56,7 @@ define internal spir_func void @wibble(ptr addrspace(4) %arg, ptr byval(%struct. |
54 | 56 | ; CHECK-NEXT: call void @_Z22__spirv_ControlBarrieriii(i32 2, i32 2, i32 272) #[[ATTR0]] |
55 | 57 | ; CHECK-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[ARG1]] to ptr addrspace(4) |
56 | 58 | ; CHECK-NEXT: call spir_func void @bar(ptr addrspace(4) [[TMP9]], ptr byval([[STRUCT_FOO_0]]) align 1 [[TMP1]]) |
| 59 | +; CHECK-NEXT: call spir_func void @foo(ptr addrspace(4) [[TMP9]], ptr byval([[STRUCT_FOO_0]]) align 1 [[TMP1]]) |
57 | 60 | ; CHECK-NEXT: ret void |
58 | 61 | ; |
59 | 62 | bb: |
|
62 | 65 | store ptr addrspace(4) %arg, ptr %0, align 8 |
63 | 66 | %2 = addrspacecast ptr %arg1 to ptr addrspace(4) |
64 | 67 | call spir_func void @bar(ptr addrspace(4) %2, ptr byval(%struct.foo.0) align 1 %1) |
| 68 | + call spir_func void @foo(ptr addrspace(4) %2, ptr byval(%struct.foo.0) align 1 %1) |
| 69 | + ret void |
| 70 | +} |
| 71 | + |
| 72 | +define internal spir_func void @foo(ptr addrspace(4) %arg, ptr byval(%struct.foo.0) align 1 %arg1) align 2 !work_group_scope !0 { |
| 73 | +; CHECK: bb: |
| 74 | +; CHECK-NEXT: [[TMP0:%.*]] = alloca ptr addrspace(4), align 8 |
| 75 | +; CHECK-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_FOO_0:%.*]], align 1 |
| 76 | +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationIndex, align 4 |
| 77 | +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrieriii(i32 2, i32 2, i32 272) #[[ATTR0]] |
| 78 | +; CHECK-NEXT: [[CMPZ3:%.*]] = icmp eq i64 [[TMP2]], 0 |
| 79 | +; CHECK-NEXT: br i1 [[CMPZ3]], label [[LEADER:%.*]], label [[MERGE:%.*]] |
| 80 | +; CHECK: leader: |
| 81 | +; CHECK-NEXT: call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) align 8 @ArgShadow.4, ptr align 1 [[ARG1:%.*]], i64 1, i1 false) |
| 82 | +; CHECK-NEXT: br label [[MERGE]] |
| 83 | +; CHECK: merge: |
| 84 | +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrieriii(i32 2, i32 2, i32 272) #[[ATTR0]] |
| 85 | +; CHECK-NEXT: call void @llvm.memcpy.p0.p3.i64(ptr align 1 [[ARG1]], ptr addrspace(3) align 8 @ArgShadow.4, i64 1, i1 false) |
| 86 | +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationIndex, align 4 |
| 87 | +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrieriii(i32 2, i32 2, i32 272) #[[ATTR0]] |
| 88 | +; CHECK-NEXT: [[CMPZ:%.*]] = icmp eq i64 [[TMP3]], 0 |
| 89 | +; CHECK-NEXT: br i1 [[CMPZ]], label [[WG_LEADER:%.*]], label [[WG_CF:%.*]] |
| 90 | +; CHECK: wg_leader: |
| 91 | +; CHECK-NEXT: store ptr addrspace(4) [[ARG:%.*]], ptr [[TMP0]], align 8 |
| 92 | +; CHECK-NEXT: br label [[WG_CF]] |
| 93 | +; CHECK: wg_cf: |
| 94 | +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationIndex, align 4 |
| 95 | +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrieriii(i32 2, i32 2, i32 272) #[[ATTR0]] |
| 96 | +; CHECK-NEXT: [[CMPZ2:%.*]] = icmp eq i64 [[TMP4]], 0 |
| 97 | +; CHECK-NEXT: br i1 [[CMPZ2]], label [[TESTMAT:%.*]], label [[LEADERMAT:%.*]] |
| 98 | +; CHECK: TestMat: |
| 99 | +; CHECK-NEXT: call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) align 8 @WGCopy.3, ptr align 1 [[TMP1]], i64 1, i1 false) |
| 100 | +; CHECK-NEXT: [[MAT_LD:%.*]] = load ptr addrspace(4), ptr [[TMP0]], align 8 |
| 101 | +; CHECK-NEXT: store ptr addrspace(4) [[MAT_LD]], ptr addrspace(3) @WGCopy.2, align 8 |
| 102 | +; CHECK-NEXT: br label [[LEADERMAT]] |
| 103 | +; CHECK: LeaderMat: |
| 104 | +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrieriii(i32 2, i32 2, i32 272) #[[ATTR0]] |
| 105 | +; CHECK-NEXT: [[MAT_LD1:%.*]] = load ptr addrspace(4), ptr addrspace(3) @WGCopy.2, align 8 |
| 106 | +; CHECK-NEXT: store ptr addrspace(4) [[MAT_LD1]], ptr [[TMP0]], align 8 |
| 107 | +; CHECK-NEXT: call void @llvm.memcpy.p0.p3.i64(ptr align 1 [[TMP1]], ptr addrspace(3) align 8 @WGCopy.3, i64 1, i1 false) |
| 108 | +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrieriii(i32 2, i32 2, i32 272) #[[ATTR0]] |
| 109 | +; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[ARG1]] to ptr addrspace(4) |
| 110 | +; CHECK-NEXT: call spir_func void @bar(ptr addrspace(4) [[TMP5]], ptr byval([[STRUCT_FOO_0]]) align 1 [[TMP1]]) |
| 111 | +; CHECK-NEXT: ret void |
| 112 | +; |
| 113 | +bb: |
| 114 | + %1 = alloca ptr addrspace(4), align 8 |
| 115 | + %2 = alloca %struct.foo.0, align 1 |
| 116 | + store ptr addrspace(4) %arg, ptr %1, align 8 |
| 117 | + %3 = addrspacecast ptr %arg1 to ptr addrspace(4) |
| 118 | + call spir_func void @bar(ptr addrspace(4) %3, ptr byval(%struct.foo.0) align 1 %2) |
65 | 119 | ret void |
66 | 120 | } |
67 | 121 |
|
|
0 commit comments