|
| 1 | +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 |
| 2 | +# RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx1031 -run-pass=si-fold-operands -o - %s | FileCheck %s |
| 3 | +--- | |
| 4 | + %struct.bar = type { %struct.bar.0, %struct.bar.0, %struct.bar.0 } |
| 5 | + %struct.bar.0 = type { %struct.blam } |
| 6 | + %struct.blam = type { i32, i32, i32, i32 } |
| 7 | + |
| 8 | + @global = external addrspace(3) global %struct.bar |
| 9 | + |
| 10 | + define void @snork() { |
| 11 | + bb: |
| 12 | + %call = call float @llvm.amdgcn.rcp.f32(float 0.000000e+00) |
| 13 | + %fmul = fmul ninf float %call, 0.000000e+00 |
| 14 | + %fptoui = fptoui float %fmul to i32 |
| 15 | + %zext = zext i32 %fptoui to i64 |
| 16 | + %mul = mul i64 2, %zext |
| 17 | + %trunc = trunc i64 %mul to i32 |
| 18 | + %0 = insertelement <4 x i32> poison, i32 %trunc, i32 0 |
| 19 | + %1 = insertelement <4 x i32> %0, i32 0, i32 1 |
| 20 | + %2 = insertelement <4 x i32> %1, i32 0, i32 2 |
| 21 | + %3 = insertelement <4 x i32> %2, i32 %trunc, i32 3 |
| 22 | + store <4 x i32> %3, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @global, i32 32), align 16 |
| 23 | + %load = load <4 x i32>, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @global, i32 32), align 16 |
| 24 | + %extractelement = extractelement <4 x i32> %load, i64 0 |
| 25 | + %icmp = icmp ne i32 %extractelement, 0 |
| 26 | + %extractelement1 = extractelement <4 x i32> %load, i64 3 |
| 27 | + %icmp2 = icmp ne i32 %extractelement1, 0 |
| 28 | + %select = select i1 %icmp, i1 true, i1 %icmp2 |
| 29 | + %select.inv = xor i1 %select, true |
| 30 | + br i1 %select.inv, label %bb3, label %bb5, !amdgpu.uniform !0 |
| 31 | + |
| 32 | + bb3: ; preds = %bb |
| 33 | + %and = and <4 x i32> %load, splat (i32 1) |
| 34 | + br label %bb5, !amdgpu.uniform !0 |
| 35 | + |
| 36 | + bb5: ; preds = %bb3, %bb |
| 37 | + ret void |
| 38 | + } |
| 39 | + |
| 40 | + ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) |
| 41 | + declare float @llvm.amdgcn.rcp.f32(float) |
| 42 | + |
| 43 | + !0 = !{} |
| 44 | +... |
| 45 | +--- |
| 46 | +name: snork |
| 47 | +alignment: 1 |
| 48 | +tracksRegLiveness: true |
| 49 | +noPhis: false |
| 50 | +isSSA: true |
| 51 | +noVRegs: false |
| 52 | +hasFakeUses: false |
| 53 | +registers: |
| 54 | + - { id: 0, class: sgpr_128 } |
| 55 | + - { id: 1, class: sgpr_64 } |
| 56 | + - { id: 2, class: sgpr_64 } |
| 57 | + - { id: 3, class: sgpr_64 } |
| 58 | + - { id: 4, class: sgpr_64 } |
| 59 | + - { id: 5, class: sgpr_32 } |
| 60 | + - { id: 6, class: sgpr_32 } |
| 61 | + - { id: 7, class: sgpr_32 } |
| 62 | + - { id: 8, class: sgpr_32 } |
| 63 | + - { id: 9, class: sreg_32 } |
| 64 | + - { id: 10, class: sgpr_128 } |
| 65 | + - { id: 11, class: vgpr_32 } |
| 66 | + - { id: 12, class: vreg_128 } |
| 67 | + - { id: 13, class: sreg_32 } |
| 68 | + - { id: 14, class: sreg_32 } |
| 69 | + - { id: 15, class: sreg_32 } |
| 70 | +frameInfo: |
| 71 | + maxAlignment: 1 |
| 72 | +machineFunctionInfo: |
| 73 | + maxKernArgAlign: 1 |
| 74 | + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' |
| 75 | + frameOffsetReg: '$sgpr33' |
| 76 | + stackPtrOffsetReg: '$sgpr32' |
| 77 | + argumentInfo: |
| 78 | + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } |
| 79 | + dispatchPtr: { reg: '$sgpr4_sgpr5' } |
| 80 | + queuePtr: { reg: '$sgpr6_sgpr7' } |
| 81 | + dispatchID: { reg: '$sgpr10_sgpr11' } |
| 82 | + workGroupIDX: { reg: '$sgpr12' } |
| 83 | + workGroupIDY: { reg: '$sgpr13' } |
| 84 | + workGroupIDZ: { reg: '$sgpr14' } |
| 85 | + LDSKernelId: { reg: '$sgpr15' } |
| 86 | + implicitArgPtr: { reg: '$sgpr8_sgpr9' } |
| 87 | + workItemIDX: { reg: '$vgpr31', mask: 1023 } |
| 88 | + workItemIDY: { reg: '$vgpr31', mask: 1047552 } |
| 89 | + workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } |
| 90 | + occupancy: 16 |
| 91 | + sgprForEXECCopy: '$sgpr105' |
| 92 | +body: | |
| 93 | + ; CHECK-LABEL: name: snork |
| 94 | + ; CHECK: bb.0.bb: |
| 95 | + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) |
| 96 | + ; CHECK-NEXT: {{ $}} |
| 97 | + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 |
| 98 | + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3 |
| 99 | + ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 target-flags(amdgpu-abs32-lo) @global, implicit $exec |
| 100 | + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE]] |
| 101 | + ; CHECK-NEXT: DS_WRITE_B128_gfx9 killed [[V_MOV_B32_e32_]], [[COPY]], 32, 0, implicit $exec :: (store (s128) into `ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @global, i32 32)`, addrspace 3) |
| 102 | + ; CHECK-NEXT: S_CMP_LG_U32 0, 0, implicit-def $scc |
| 103 | + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc |
| 104 | + ; CHECK-NEXT: S_BRANCH %bb.1 |
| 105 | + ; CHECK-NEXT: {{ $}} |
| 106 | + ; CHECK-NEXT: bb.1.bb3: |
| 107 | + ; CHECK-NEXT: successors: %bb.2(0x80000000) |
| 108 | + ; CHECK-NEXT: {{ $}} |
| 109 | + ; CHECK-NEXT: bb.2.bb5: |
| 110 | + ; CHECK-NEXT: SI_RETURN |
| 111 | + bb.0.bb: |
| 112 | + successors: %bb.1, %bb.2 |
| 113 | +
|
| 114 | + %9:sreg_32 = S_MOV_B32 0 |
| 115 | + %10:sgpr_128 = REG_SEQUENCE %9, %subreg.sub0, %9, %subreg.sub1, %9, %subreg.sub2, %9, %subreg.sub3 |
| 116 | + %11:vgpr_32 = V_MOV_B32_e32 target-flags(amdgpu-abs32-lo) @global, implicit $exec |
| 117 | + %12:vreg_128 = COPY %10 |
| 118 | + DS_WRITE_B128_gfx9 killed %11, %12, 32, 0, implicit $exec :: (store (s128) into `ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @global, i32 32)`, addrspace 3) |
| 119 | + %15:sreg_32 = S_OR_B32 %10.sub0, %10.sub3, implicit-def dead $scc |
| 120 | + S_CMP_LG_U32 killed %15, 0, implicit-def $scc |
| 121 | + S_CBRANCH_SCC1 %bb.2, implicit $scc |
| 122 | + S_BRANCH %bb.1 |
| 123 | +
|
| 124 | + bb.1.bb3: |
| 125 | +
|
| 126 | + bb.2.bb5: |
| 127 | + SI_RETURN |
| 128 | +... |
0 commit comments