Skip to content

Commit 34c61fc

Browse files
committed
fixup! AMDGPU: Fix assert when multi operands to update after folding imm
1 parent be9c98f commit 34c61fc

File tree

1 file changed

+5
-77
lines changed

1 file changed

+5
-77
lines changed

llvm/test/CodeGen/AMDGPU/bug-multi-operands-to-update-after-fold.mir

Lines changed: 5 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -1,93 +1,21 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
22
# RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx1031 -run-pass=si-fold-operands -o - %s | FileCheck %s
33
--- |
4-
%struct.bar = type { %struct.bar.0, %struct.bar.0, %struct.bar.0 }
5-
%struct.bar.0 = type { %struct.blam }
6-
%struct.blam = type { i32, i32, i32, i32 }
7-
8-
@global = external addrspace(3) global %struct.bar
4+
@global = external addrspace(3) global i32
95

106
define void @snork() {
117
bb:
12-
%call = call float @llvm.amdgcn.rcp.f32(float 0.000000e+00)
13-
%fmul = fmul ninf float %call, 0.000000e+00
14-
%fptoui = fptoui float %fmul to i32
15-
%zext = zext i32 %fptoui to i64
16-
%mul = mul i64 2, %zext
17-
%trunc = trunc i64 %mul to i32
18-
%0 = insertelement <4 x i32> poison, i32 %trunc, i32 0
19-
%1 = insertelement <4 x i32> %0, i32 0, i32 1
20-
%2 = insertelement <4 x i32> %1, i32 0, i32 2
21-
%3 = insertelement <4 x i32> %2, i32 %trunc, i32 3
22-
store <4 x i32> %3, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @global, i32 32), align 16
23-
%load = load <4 x i32>, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @global, i32 32), align 16
24-
%extractelement = extractelement <4 x i32> %load, i64 0
25-
%icmp = icmp ne i32 %extractelement, 0
26-
%extractelement1 = extractelement <4 x i32> %load, i64 3
27-
%icmp2 = icmp ne i32 %extractelement1, 0
28-
%select = select i1 %icmp, i1 true, i1 %icmp2
29-
%select.inv = xor i1 %select, true
30-
br i1 %select.inv, label %bb3, label %bb5, !amdgpu.uniform !0
8+
br label %bb3
319

32-
bb3: ; preds = %bb
33-
%and = and <4 x i32> %load, splat (i32 1)
34-
br label %bb5, !amdgpu.uniform !0
10+
bb3:
11+
br label %bb5
3512

36-
bb5: ; preds = %bb3, %bb
13+
bb5:
3714
ret void
3815
}
39-
40-
declare float @llvm.amdgcn.rcp.f32(float)
41-
42-
!0 = !{}
4316
...
4417
---
4518
name: snork
46-
alignment: 1
47-
tracksRegLiveness: true
48-
noPhis: false
49-
isSSA: true
50-
noVRegs: false
51-
hasFakeUses: false
52-
registers:
53-
- { id: 0, class: sgpr_128 }
54-
- { id: 1, class: sgpr_64 }
55-
- { id: 2, class: sgpr_64 }
56-
- { id: 3, class: sgpr_64 }
57-
- { id: 4, class: sgpr_64 }
58-
- { id: 5, class: sgpr_32 }
59-
- { id: 6, class: sgpr_32 }
60-
- { id: 7, class: sgpr_32 }
61-
- { id: 8, class: sgpr_32 }
62-
- { id: 9, class: sreg_32 }
63-
- { id: 10, class: sgpr_128 }
64-
- { id: 11, class: vgpr_32 }
65-
- { id: 12, class: vreg_128 }
66-
- { id: 13, class: sreg_32 }
67-
- { id: 14, class: sreg_32 }
68-
- { id: 15, class: sreg_32 }
69-
frameInfo:
70-
maxAlignment: 1
71-
machineFunctionInfo:
72-
maxKernArgAlign: 1
73-
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
74-
frameOffsetReg: '$sgpr33'
75-
stackPtrOffsetReg: '$sgpr32'
76-
argumentInfo:
77-
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
78-
dispatchPtr: { reg: '$sgpr4_sgpr5' }
79-
queuePtr: { reg: '$sgpr6_sgpr7' }
80-
dispatchID: { reg: '$sgpr10_sgpr11' }
81-
workGroupIDX: { reg: '$sgpr12' }
82-
workGroupIDY: { reg: '$sgpr13' }
83-
workGroupIDZ: { reg: '$sgpr14' }
84-
LDSKernelId: { reg: '$sgpr15' }
85-
implicitArgPtr: { reg: '$sgpr8_sgpr9' }
86-
workItemIDX: { reg: '$vgpr31', mask: 1023 }
87-
workItemIDY: { reg: '$vgpr31', mask: 1047552 }
88-
workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
89-
occupancy: 16
90-
sgprForEXECCopy: '$sgpr105'
9119
body: |
9220
; CHECK-LABEL: name: snork
9321
; CHECK: bb.0.bb:

0 commit comments

Comments
 (0)