|
| 1 | +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 < %s | FileCheck %s |
| 2 | + |
| 3 | +; CHECK-LABEL: .shader_functions: |
| 4 | + |
| 5 | +; Make sure that .vgpr_count doesn't include the %inactive.vgpr registers. |
| 6 | +; The shader is free to use any of the VGPRs mapped to a %inactive.vgpr as long as it only touches its active lanes. |
| 7 | +; In that case, the VGPR should be included in the .vgpr_count |
| 8 | +; CHECK-LABEL: _miss_1: |
| 9 | +; CHECK: .vgpr_count:{{.*}}0xd{{$}} |
| 10 | + |
| 11 | +define amdgpu_cs_chain void @_miss_1(ptr inreg %next.callee, i32 inreg %global.table, i32 inreg %max.outgoing.vgpr.count, |
| 12 | + i32 %vcr, { i32 } %system.data, |
| 13 | + i32 %inactive.vgpr, i32 %inactive.vgpr1, i32 %inactive.vgpr2, i32 %inactive.vgpr3, |
| 14 | + i32 %inactive.vgpr4, i32 %inactive.vgpr5, i32 %inactive.vgpr6, i32 %inactive.vgpr7, |
| 15 | + i32 %inactive.vgpr8, i32 %inactive.vgpr9) |
| 16 | + local_unnamed_addr { |
| 17 | +entry: |
| 18 | + %system.data.value = extractvalue { i32 } %system.data, 0 |
| 19 | + %dead.val = call i32 @llvm.amdgcn.dead.i32() |
| 20 | + %is.whole.wave = call i1 @llvm.amdgcn.init.whole.wave() |
| 21 | + br i1 %is.whole.wave, label %shader, label %tail |
| 22 | + |
| 23 | +shader: |
| 24 | + %system.data.extract = extractvalue { i32 } %system.data, 0 |
| 25 | + %data.mul = mul i32 %system.data.extract, 2 |
| 26 | + %data.add = add i32 %data.mul, 1 |
| 27 | + call void asm sideeffect "; use VGPR for %inactive.vgpr2", "~{v12}"() |
| 28 | + br label %tail |
| 29 | + |
| 30 | +tail: |
| 31 | + %final.vcr = phi i32 [ %vcr, %entry ], [ %data.mul, %shader ] |
| 32 | + %final.sys.data = phi i32 [ %system.data.value, %entry ], [ %data.add, %shader ] |
| 33 | + %final.inactive0 = phi i32 [ %inactive.vgpr, %entry ], [ %dead.val, %shader ] |
| 34 | + %final.inactive1 = phi i32 [ %inactive.vgpr1, %entry ], [ %dead.val, %shader ] |
| 35 | + %final.inactive2 = phi i32 [ %inactive.vgpr2, %entry ], [ %dead.val, %shader ] |
| 36 | + %final.inactive3 = phi i32 [ %inactive.vgpr3, %entry ], [ %dead.val, %shader ] |
| 37 | + %final.inactive4 = phi i32 [ %inactive.vgpr4, %entry ], [ %dead.val, %shader ] |
| 38 | + %final.inactive5 = phi i32 [ %inactive.vgpr5, %entry ], [ %dead.val, %shader ] |
| 39 | + %final.inactive6 = phi i32 [ %inactive.vgpr6, %entry ], [ %dead.val, %shader ] |
| 40 | + %final.inactive7 = phi i32 [ %inactive.vgpr7, %entry ], [ %dead.val, %shader ] |
| 41 | + %final.inactive8 = phi i32 [ %inactive.vgpr8, %entry ], [ %dead.val, %shader ] |
| 42 | + %final.inactive9 = phi i32 [ %inactive.vgpr9, %entry ], [ %dead.val, %shader ] |
| 43 | + |
| 44 | + %struct.init = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } poison, i32 %final.vcr, 0 |
| 45 | + %struct.with.data = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %struct.init, i32 %final.sys.data, 1 |
| 46 | + %struct.with.inactive0 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %struct.with.data, i32 %final.inactive0, 2 |
| 47 | + %struct.with.inactive1 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %struct.with.inactive0, i32 %final.inactive1, 3 |
| 48 | + %struct.with.inactive2 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %struct.with.inactive1, i32 %final.inactive2, 4 |
| 49 | + %struct.with.inactive3 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %struct.with.inactive2, i32 %final.inactive3, 5 |
| 50 | + %struct.with.inactive4 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %struct.with.inactive3, i32 %final.inactive4, 6 |
| 51 | + %struct.with.inactive5 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %struct.with.inactive4, i32 %final.inactive5, 7 |
| 52 | + %struct.with.inactive6 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %struct.with.inactive5, i32 %final.inactive6, 8 |
| 53 | + %struct.with.inactive7 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %struct.with.inactive6, i32 %final.inactive7, 9 |
| 54 | + %struct.with.inactive8 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %struct.with.inactive7, i32 %final.inactive8, 10 |
| 55 | + %final.struct = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %struct.with.inactive8, i32 %final.inactive9, 11 |
| 56 | + |
| 57 | + %vec.global = insertelement <4 x i32> poison, i32 %global.table, i64 0 |
| 58 | + %vec.max.vgpr = insertelement <4 x i32> %vec.global, i32 %max.outgoing.vgpr.count, i64 1 |
| 59 | + %vec.sys.data = insertelement <4 x i32> %vec.max.vgpr, i32 %final.sys.data, i64 2 |
| 60 | + %final.vec = insertelement <4 x i32> %vec.sys.data, i32 0, i64 3 |
| 61 | + |
| 62 | + call void (ptr, i32, <4 x i32>, { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }, i32, ...) |
| 63 | + @llvm.amdgcn.cs.chain.p0.i32.v4i32.sl_i32i32i32i32i32i32i32i32i32i32i32i32s( |
| 64 | + ptr %next.callee, i32 0, <4 x i32> inreg %final.vec, |
| 65 | + { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %final.struct, |
| 66 | + i32 1, i32 %max.outgoing.vgpr.count, i32 -1, ptr @retry_vgpr_alloc.v4i32) |
| 67 | + unreachable |
| 68 | +} |
| 69 | + |
| 70 | +declare i32 @llvm.amdgcn.dead.i32() |
| 71 | +declare i1 @llvm.amdgcn.init.whole.wave() |
| 72 | +declare void @llvm.amdgcn.cs.chain.p0.i32.v4i32.sl_i32i32i32i32i32i32i32i32i32i32i32i32s(ptr, i32, <4 x i32>, { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }, i32 immarg, ...) |
| 73 | + |
| 74 | +declare amdgpu_cs_chain void @retry_vgpr_alloc.v4i32(<4 x i32> inreg) |
| 75 | + |
| 76 | +!amdgpu.pal.metadata.msgpack = !{!0} |
| 77 | + |
| 78 | +!0 = !{!"\82\B0amdpal.pipelines\91\8B\A4.api\A6Vulkan\B2.compute_registers\85\AB.tg_size_en\C3\AA.tgid_x_en\C3\AA.tgid_y_en\C3\AA.tgid_z_en\C3\AF.tidig_comp_cnt\00\B0.hardware_stages\81\A3.cs\8D\AF.checksum_value\00\AB.debug_mode\00\AB.float_mode\CC\C0\A9.image_op\C2\AC.mem_ordered\C3\AB.sgpr_limitj\B7.threadgroup_dimensions\93 \01\01\AD.trap_present\00\B2.user_data_reg_map\90\AB.user_sgprs\10\AB.vgpr_limit\CD\01\00\AF.wavefront_size \AF.wg_round_robin\C2\B7.internal_pipeline_hash\92\CF|{2&\DCC\85M\CFep\8A\EDR\DE\D6\E1\B1.shader_functions\81\A7_miss_1\82\B4.frontend_stack_size\00\B4.outgoing_vgpr_countP\A8.shaders\81\A8.compute\82\B0.api_shader_hash\92\00\00\B1.hardware_mapping\91\A3.cs\B0.spill_threshold\CD\FF\FF\A5.type\A2Cs\B0.user_data_limit\01\A9.uses_cps\C3\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CF\B4\AF\9D\0B\07\88\03\02\CF\01o\C9\CAf?)\DA\AD.llpc_version\A476.0\AEamdpal.version\92\03\00"} |
0 commit comments