Skip to content

Commit 0c5e402

Browse files
committed
[AMDGPU] Do not fold into v_accvpr_mov/write/read
In SIFoldOperands, leave copies for moving between agpr and vgpr registers. The register coalescer is able to handle the copies more efficiently than v_accvgpr_mov, v_accvgpr_write, and v_accvgpr_read. Otherwise, the compiler generates unneccesary instructions such as v_accvgpr_mov a0, a0.
1 parent 79e859e commit 0c5e402

File tree

3 files changed

+415
-13
lines changed

3 files changed

+415
-13
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1096,21 +1096,8 @@ void SIFoldOperandsImpl::foldOperand(
10961096
B.addImm(Defs[I].second);
10971097
}
10981098
LLVM_DEBUG(dbgs() << "Folded " << *UseMI);
1099-
return;
11001099
}
11011100

1102-
if (Size != 4)
1103-
return;
1104-
1105-
Register Reg0 = UseMI->getOperand(0).getReg();
1106-
Register Reg1 = UseMI->getOperand(1).getReg();
1107-
if (TRI->isAGPR(*MRI, Reg0) && TRI->isVGPR(*MRI, Reg1))
1108-
UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64));
1109-
else if (TRI->isVGPR(*MRI, Reg0) && TRI->isAGPR(*MRI, Reg1))
1110-
UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64));
1111-
else if (ST->hasGFX90AInsts() && TRI->isAGPR(*MRI, Reg0) &&
1112-
TRI->isAGPR(*MRI, Reg1))
1113-
UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_MOV_B32));
11141101
return;
11151102
}
11161103

Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -verify-machineinstrs --run-pass si-fold-operands %s -o - | FileCheck %s
3+
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -verify-machineinstrs -start-before=si-fold-operands -stop-after=register-coalescer %s -o - | FileCheck %s --check-prefixes=COALESCE
4+
# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs -start-before=si-fold-operands -stop-after=register-coalescer %s -o - | FileCheck %s --check-prefixes=GFX908-COALESCE
5+
6+
---
7+
name: test
8+
tracksRegLiveness: true
9+
body: |
10+
; CHECK-LABEL: name: test
11+
; CHECK: bb.0:
12+
; CHECK-NEXT: successors: %bb.1(0x80000000)
13+
; CHECK-NEXT: liveins: $sgpr4_sgpr5
14+
; CHECK-NEXT: {{ $}}
15+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
16+
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
17+
; CHECK-NEXT: S_BITCMP1_B32 killed [[S_LOAD_DWORD_IMM]], 0, implicit-def $scc
18+
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
19+
; CHECK-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64_xexec = S_CSELECT_B64 -1, 0, implicit $scc
20+
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
21+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:agpr_32 = COPY [[V_MOV_B32_e32_]]
22+
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
23+
; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[S_CSELECT_B64_]], implicit $exec
24+
; CHECK-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 [[V_CNDMASK_B32_e64_]], 1, implicit $exec
25+
; CHECK-NEXT: {{ $}}
26+
; CHECK-NEXT: bb.1:
27+
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
28+
; CHECK-NEXT: {{ $}}
29+
; CHECK-NEXT: [[PHI:%[0-9]+]]:agpr_32 = PHI [[COPY1]], %bb.0, %24, %bb.3
30+
; CHECK-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.0, %11, %bb.3
31+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]]
32+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:agpr_32 = COPY [[PHI]]
33+
; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
34+
; CHECK-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def $scc
35+
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit $vcc
36+
; CHECK-NEXT: S_BRANCH %bb.2
37+
; CHECK-NEXT: {{ $}}
38+
; CHECK-NEXT: bb.2:
39+
; CHECK-NEXT: successors: %bb.3(0x80000000)
40+
; CHECK-NEXT: {{ $}}
41+
; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI1]], 1, implicit-def dead $scc
42+
; CHECK-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[PHI1]], 31, implicit-def dead $scc
43+
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 killed [[S_ASHR_I32_]], killed [[S_OR_B32_]], implicit-def dead $scc
44+
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1, [[V_MOV_B32_e32_]], %subreg.sub2, [[V_MOV_B32_e32_]], %subreg.sub3
45+
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
46+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
47+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:areg_128_align2 = COPY [[REG_SEQUENCE]]
48+
; CHECK-NEXT: [[V_MFMA_F32_16X16X16F16_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 [[COPY4]], [[COPY4]], killed [[COPY5]], 0, 0, 0, implicit $mode, implicit $exec
49+
; CHECK-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0
50+
; CHECK-NEXT: {{ $}}
51+
; CHECK-NEXT: bb.3:
52+
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.1(0x40000000)
53+
; CHECK-NEXT: {{ $}}
54+
; CHECK-NEXT: [[PHI2:%[0-9]+]]:sreg_32 = PHI [[DEF]], %bb.1, [[S_AND_B32_]], %bb.2
55+
; CHECK-NEXT: [[PHI3:%[0-9]+]]:agpr_32 = PHI [[COPY3]], %bb.1, [[V_MFMA_F32_16X16X16F16_e64_]].sub0, %bb.2
56+
; CHECK-NEXT: [[PHI4:%[0-9]+]]:sreg_64_xexec = PHI [[S_MOV_B64_]], %bb.1, [[S_MOV_B64_1]], %bb.2
57+
; CHECK-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[PHI4]], implicit $exec
58+
; CHECK-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 [[V_CNDMASK_B32_e64_1]], 1, implicit $exec
59+
; CHECK-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_1]], implicit-def $scc
60+
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
61+
; CHECK-NEXT: S_BRANCH %bb.4
62+
; CHECK-NEXT: {{ $}}
63+
; CHECK-NEXT: bb.4:
64+
; CHECK-NEXT: successors: %bb.5(0x80000000)
65+
; CHECK-NEXT: {{ $}}
66+
; CHECK-NEXT: bb.5:
67+
; CHECK-NEXT: S_ENDPGM 0
68+
;
69+
; COALESCE-LABEL: name: test
70+
; COALESCE: bb.0:
71+
; COALESCE-NEXT: successors: %bb.1(0x80000000)
72+
; COALESCE-NEXT: liveins: $sgpr4_sgpr5
73+
; COALESCE-NEXT: {{ $}}
74+
; COALESCE-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
75+
; COALESCE-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
76+
; COALESCE-NEXT: S_BITCMP1_B32 [[S_LOAD_DWORD_IMM]], 0, implicit-def $scc
77+
; COALESCE-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sgpr_64 = S_MOV_B32 0
78+
; COALESCE-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64_xexec = S_CSELECT_B64 -1, 0, implicit killed $scc
79+
; COALESCE-NEXT: undef [[V_MOV_B32_e32_:%[0-9]+]].sub1:vreg_128_align2 = V_MOV_B32_e32 0, implicit $exec
80+
; COALESCE-NEXT: undef [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]].sub0:areg_128_align2 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
81+
; COALESCE-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[S_CSELECT_B64_]], implicit $exec
82+
; COALESCE-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_]], implicit $exec
83+
; COALESCE-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
84+
; COALESCE-NEXT: {{ $}}
85+
; COALESCE-NEXT: bb.1:
86+
; COALESCE-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
87+
; COALESCE-NEXT: {{ $}}
88+
; COALESCE-NEXT: [[V_MOV_B32_e32_:%[0-9]+]].sub0:vreg_128_align2 = COPY [[V_ACCVGPR_WRITE_B32_e64_]].sub0
89+
; COALESCE-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 -1
90+
; COALESCE-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
91+
; COALESCE-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit killed $vcc
92+
; COALESCE-NEXT: S_BRANCH %bb.2
93+
; COALESCE-NEXT: {{ $}}
94+
; COALESCE-NEXT: bb.2:
95+
; COALESCE-NEXT: successors: %bb.3(0x80000000)
96+
; COALESCE-NEXT: {{ $}}
97+
; COALESCE-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_MOV_B32_1]], 1, implicit-def dead $scc
98+
; COALESCE-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[S_MOV_B32_1]], 31, implicit-def dead $scc
99+
; COALESCE-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_ASHR_I32_]], [[S_OR_B32_]], implicit-def dead $scc
100+
; COALESCE-NEXT: [[V_MOV_B32_e32_:%[0-9]+]].sub2:vreg_128_align2 = COPY [[V_MOV_B32_e32_]].sub1
101+
; COALESCE-NEXT: [[V_MOV_B32_e32_:%[0-9]+]].sub3:vreg_128_align2 = COPY [[V_MOV_B32_e32_]].sub1
102+
; COALESCE-NEXT: [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_64 = COPY [[S_MOV_B32_]].sub0
103+
; COALESCE-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B32_]]
104+
; COALESCE-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[V_MOV_B32_e32_]]
105+
; COALESCE-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 [[COPY1]], [[COPY1]], [[COPY2]], 0, 0, 0, implicit $mode, implicit $exec
106+
; COALESCE-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 0
107+
; COALESCE-NEXT: {{ $}}
108+
; COALESCE-NEXT: bb.3:
109+
; COALESCE-NEXT: successors: %bb.4(0x40000000), %bb.1(0x40000000)
110+
; COALESCE-NEXT: {{ $}}
111+
; COALESCE-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[S_MOV_B64_]], implicit $exec
112+
; COALESCE-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_1]], implicit $exec
113+
; COALESCE-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
114+
; COALESCE-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
115+
; COALESCE-NEXT: S_BRANCH %bb.4
116+
; COALESCE-NEXT: {{ $}}
117+
; COALESCE-NEXT: bb.4:
118+
; COALESCE-NEXT: successors: %bb.5(0x80000000)
119+
; COALESCE-NEXT: {{ $}}
120+
; COALESCE-NEXT: bb.5:
121+
; COALESCE-NEXT: S_ENDPGM 0
122+
;
123+
; GFX908-COALESCE-LABEL: name: test
124+
; GFX908-COALESCE: bb.0:
125+
; GFX908-COALESCE-NEXT: successors: %bb.1(0x80000000)
126+
; GFX908-COALESCE-NEXT: liveins: $sgpr4_sgpr5
127+
; GFX908-COALESCE-NEXT: {{ $}}
128+
; GFX908-COALESCE-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
129+
; GFX908-COALESCE-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
130+
; GFX908-COALESCE-NEXT: S_BITCMP1_B32 [[S_LOAD_DWORD_IMM]], 0, implicit-def $scc
131+
; GFX908-COALESCE-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sgpr_64 = S_MOV_B32 0
132+
; GFX908-COALESCE-NEXT: [[S_CSELECT_B64_:%[0-9]+]]:sreg_64_xexec = S_CSELECT_B64 -1, 0, implicit killed $scc
133+
; GFX908-COALESCE-NEXT: undef [[V_MOV_B32_e32_:%[0-9]+]].sub1:vreg_128_align2 = V_MOV_B32_e32 0, implicit $exec
134+
; GFX908-COALESCE-NEXT: undef [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]].sub0:areg_128_align2 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
135+
; GFX908-COALESCE-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[S_CSELECT_B64_]], implicit $exec
136+
; GFX908-COALESCE-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_]], implicit $exec
137+
; GFX908-COALESCE-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
138+
; GFX908-COALESCE-NEXT: {{ $}}
139+
; GFX908-COALESCE-NEXT: bb.1:
140+
; GFX908-COALESCE-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
141+
; GFX908-COALESCE-NEXT: {{ $}}
142+
; GFX908-COALESCE-NEXT: [[V_MOV_B32_e32_:%[0-9]+]].sub0:vreg_128_align2 = COPY [[V_ACCVGPR_WRITE_B32_e64_]].sub0
143+
; GFX908-COALESCE-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 -1
144+
; GFX908-COALESCE-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
145+
; GFX908-COALESCE-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit killed $vcc
146+
; GFX908-COALESCE-NEXT: S_BRANCH %bb.2
147+
; GFX908-COALESCE-NEXT: {{ $}}
148+
; GFX908-COALESCE-NEXT: bb.2:
149+
; GFX908-COALESCE-NEXT: successors: %bb.3(0x80000000)
150+
; GFX908-COALESCE-NEXT: {{ $}}
151+
; GFX908-COALESCE-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_MOV_B32_1]], 1, implicit-def dead $scc
152+
; GFX908-COALESCE-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[S_MOV_B32_1]], 31, implicit-def dead $scc
153+
; GFX908-COALESCE-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_ASHR_I32_]], [[S_OR_B32_]], implicit-def dead $scc
154+
; GFX908-COALESCE-NEXT: [[V_MOV_B32_e32_:%[0-9]+]].sub2:vreg_128_align2 = COPY [[V_MOV_B32_e32_]].sub1
155+
; GFX908-COALESCE-NEXT: [[V_MOV_B32_e32_:%[0-9]+]].sub3:vreg_128_align2 = COPY [[V_MOV_B32_e32_]].sub1
156+
; GFX908-COALESCE-NEXT: [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_64 = COPY [[S_MOV_B32_]].sub0
157+
; GFX908-COALESCE-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B32_]]
158+
; GFX908-COALESCE-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[V_MOV_B32_e32_]]
159+
; GFX908-COALESCE-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 [[COPY1]], [[COPY1]], [[COPY2]], 0, 0, 0, implicit $mode, implicit $exec
160+
; GFX908-COALESCE-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 0
161+
; GFX908-COALESCE-NEXT: {{ $}}
162+
; GFX908-COALESCE-NEXT: bb.3:
163+
; GFX908-COALESCE-NEXT: successors: %bb.4(0x40000000), %bb.1(0x40000000)
164+
; GFX908-COALESCE-NEXT: {{ $}}
165+
; GFX908-COALESCE-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[S_MOV_B64_]], implicit $exec
166+
; GFX908-COALESCE-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_1]], implicit $exec
167+
; GFX908-COALESCE-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
168+
; GFX908-COALESCE-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
169+
; GFX908-COALESCE-NEXT: S_BRANCH %bb.4
170+
; GFX908-COALESCE-NEXT: {{ $}}
171+
; GFX908-COALESCE-NEXT: bb.4:
172+
; GFX908-COALESCE-NEXT: successors: %bb.5(0x80000000)
173+
; GFX908-COALESCE-NEXT: {{ $}}
174+
; GFX908-COALESCE-NEXT: bb.5:
175+
; GFX908-COALESCE-NEXT: S_ENDPGM 0
176+
bb.0:
177+
successors: %bb.1
178+
liveins: $sgpr4_sgpr5
179+
180+
%521:sgpr_64(p4) = COPY $sgpr4_sgpr5
181+
%655:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %521(p4), 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
182+
S_BITCMP1_B32 killed %655, 0, implicit-def $scc
183+
%526:sgpr_32 = S_MOV_B32 0
184+
%690:sreg_64_xexec = S_CSELECT_B64 -1, 0, implicit $scc
185+
%815:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
186+
%658:sreg_32 = IMPLICIT_DEF
187+
%660:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %690, implicit $exec
188+
%689:sreg_64_xexec = V_CMP_NE_U32_e64 %660, 1, implicit $exec
189+
190+
bb.1:
191+
successors: %bb.2, %bb.3
192+
193+
%125:vgpr_32 = PHI %815, %bb.0, %384, %bb.3
194+
%130:sreg_32 = PHI %526, %bb.0, %260, %bb.3
195+
%820:agpr_32 = COPY %125
196+
%659:sreg_64 = S_MOV_B64 -1
197+
$vcc = S_AND_B64 $exec, %689, implicit-def $scc
198+
S_CBRANCH_VCCNZ %bb.3, implicit $vcc
199+
S_BRANCH %bb.2
200+
201+
bb.2:
202+
successors: %bb.3
203+
204+
%665:sreg_32 = S_OR_B32 %130, 1, implicit-def dead $scc
205+
%667:sreg_32 = S_ASHR_I32 %130, 31, implicit-def dead $scc
206+
%131:sreg_32 = S_AND_B32 killed %667, killed %665, implicit-def dead $scc
207+
%685:vreg_128_align2 = REG_SEQUENCE %125, %subreg.sub0, %815, %subreg.sub1, %815, %subreg.sub2, %815, %subreg.sub3
208+
%671:sreg_64 = REG_SEQUENCE %526, %subreg.sub0, %526, %subreg.sub1
209+
%673:vreg_64_align2 = COPY %671
210+
%675:areg_128_align2 = COPY %685
211+
%672:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 %673, %673, killed %675, 0, 0, 0, implicit $mode, implicit $exec
212+
%255:vgpr_32 = COPY %672.sub0
213+
%663:sreg_64 = S_MOV_B64 0
214+
215+
bb.3:
216+
successors: %bb.4, %bb.1
217+
218+
%260:sreg_32 = PHI %658, %bb.1, %131, %bb.2
219+
%821:agpr_32 = PHI %820, %bb.1, %672.sub0, %bb.2
220+
%389:sreg_64_xexec = PHI %659, %bb.1, %663, %bb.2
221+
%384:vgpr_32 = COPY %821
222+
%676:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %389, implicit $exec
223+
%684:sreg_64_xexec = V_CMP_NE_U32_e64 %676, 1, implicit $exec
224+
$vcc = S_AND_B64 $exec, %684, implicit-def $scc
225+
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
226+
S_BRANCH %bb.4
227+
228+
bb.4:
229+
successors: %bb.5
230+
231+
bb.5:
232+
S_ENDPGM 0
233+
234+
...

0 commit comments

Comments
 (0)