|
6 | 6 | ; CHECK-DAG: %[[#uint:]] = OpTypeInt 32 0 |
7 | 7 | ; CHECK-DAG: %[[#scope:]] = OpConstant %[[#uint]] 3 |
8 | 8 | ; CHECK-DAG: %[[#f32:]] = OpTypeFloat 32 |
9 | | -; CHECK-DAG: %[[#expr:]] = OpFunctionParameter %[[#f32]] |
| 9 | +; CHECK-DAG: %[[#bool:]] = OpTypeBool |
| 10 | +; CHECK-DAG: %[[#v4_bool:]] = OpTypeVector %[[#bool]] 4 |
| 11 | +; CHECK-DAG: %[[#fexpr:]] = OpFunctionParameter %[[#f32]] |
| 12 | +; CHECK-DAG: %[[#iexpr:]] = OpFunctionParameter %[[#uint]] |
10 | 13 | ; CHECK-DAG: %[[#idx:]] = OpFunctionParameter %[[#uint]] |
| 14 | +; CHECK-DAG: %[[#vbexpr:]] = OpFunctionParameter %[[#v4_bool]] |
11 | 15 |
|
12 | | -define spir_func void @test_1(float %expr, i32 %idx) #0 { |
| 16 | +define spir_func void @test_1(float %fexpr, i32 %iexpr, <4 x i1> %vbexpr, i32 %idx) #0 { |
13 | 17 | entry: |
14 | 18 | %0 = call token @llvm.experimental.convergence.entry() |
15 | | -; CHECK: %[[#ret:]] = OpGroupNonUniformShuffle %[[#f32]] %[[#expr]] %[[#idx]] %[[#scope]] |
16 | | - %1 = call float @llvm.spv.wave.read.lane.at(float %expr, i32 %idx) [ "convergencectrl"(token %0) ] |
| 19 | +; CHECK: %[[#fret:]] = OpGroupNonUniformShuffle %[[#f32]] %[[#fexpr]] %[[#idx]] %[[#scope]] |
| 20 | + %1 = call float @llvm.spv.wave.read.lane.at.f32(float %fexpr, i32 %idx) [ "convergencectrl"(token %0) ] |
| 21 | +; CHECK: %[[#iret:]] = OpGroupNonUniformShuffle %[[#uint]] %[[#iexpr]] %[[#idx]] %[[#scope]] |
| 22 | + %2 = call i32 @llvm.spv.wave.read.lane.at.i32(i32 %iexpr, i32 %idx) [ "convergencectrl"(token %0) ] |
| 23 | +; CHECK: %[[#vbret:]] = OpGroupNonUniformShuffle %[[#v4_bool]] %[[#vbexpr]] %[[#idx]] %[[#scope]] |
| 24 | + %3 = call <4 x i1> @llvm.spv.wave.read.lane.at.v4i1(<4 x i1> %vbexpr, i32 %idx) [ "convergencectrl"(token %0) ] |
17 | 25 | ret void |
18 | 26 | } |
19 | 27 |
|
20 | | -declare i32 @__hlsl_wave_get_lane_index() #1 |
| 28 | +declare float @__hlsl_wave_read_lane_at.f32(float, i32) #1 |
| 29 | +declare i32 @__hlsl_wave_read_lane_at.i32(i32, i32) #1 |
| 30 | +declare <4 x i1> @__hlsl_wave_read_lane_at.v4i1(<4 x i1>, i32) #1 |
21 | 31 |
|
22 | 32 | attributes #0 = { convergent norecurse "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } |
23 | 33 | attributes #1 = { convergent } |
|
0 commit comments