22# RUN: llc %s -mtriple=amdgcn -mcpu=gfx900 -run-pass=si-peephole-sdwa -o - | FileCheck %s
33
44---
5- name : change-compare-dest-to-vcc # carry-in def is a comparison that can write to VCC
5+ name : cndmask_b32 # can be directly converted to SDWA without a copy to VCC
6+ tracksRegLiveness : true
7+ body : |
8+ bb.0:
9+ liveins: $vgpr0, $vgpr1, $vcc
10+
11+ ; CHECK-LABEL: name: cndmask_b32
12+ ; CHECK: liveins: $vgpr0, $vgpr1, $vcc
13+ ; CHECK-NEXT: {{ $}}
14+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
15+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
16+ ; CHECK-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[COPY]], implicit $exec
17+ ; CHECK-NEXT: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[COPY1]], implicit $exec
18+ ; CHECK-NEXT: [[V_CNDMASK_B32_sdwa:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_sdwa 0, [[COPY]], 0, [[COPY1]], 0, 6, 0, 5, 5, implicit $vcc, implicit $exec
19+ ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_sdwa]]
20+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0
21+ %0:vgpr_32 = COPY $vgpr1
22+ %1:vgpr_32 = COPY $vgpr0
23+ %2:vgpr_32 = V_LSHRREV_B32_e64 16, %0, implicit $exec
24+ %3:vgpr_32 = V_LSHRREV_B32_e64 16, %1, implicit $exec
25+ %4:vgpr_32 = V_CNDMASK_B32_e32 killed %2, killed %3, implicit $exec, implicit $vcc
26+ $vgpr0 = COPY %4
27+ SI_RETURN implicit $vgpr0
28+
29+ ...
30+
31+ # For SDWA conversion of V_CNDMASK, the carry-in operand must be
32+ # available in VCC. This is achieved by introducing a COPY
33+ # instruction. Comparison instructions could be changed to VOP2 form
34+ # instead, but we prefer to use a COPY.
35+
36+ ---
37+ name : carry-compare
638tracksRegLiveness : true
739body : |
840 bb.0:
941 liveins: $vgpr0
1042
11- ; CHECK-LABEL: name: change -compare-dest-to-vcc
43+ ; CHECK-LABEL: name: carry -compare
1244 ; CHECK: liveins: $vgpr0
1345 ; CHECK-NEXT: {{ $}}
1446 ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
1547 ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
16- ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 undef [[DEF]], 1, implicit $exec
17- ; CHECK-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, undef [[DEF1]], implicit $exec
48+ ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[DEF]], 1, implicit $exec
49+ ; CHECK-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[DEF1]], implicit $exec
1850 ; CHECK-NEXT: $vcc = COPY killed [[V_CMP_EQ_U32_e64_]]
1951 ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
20- ; CHECK-NEXT: [[V_CNDMASK_B32_sdwa:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_sdwa 0, [[V_MOV_B32_e32_]], 0, undef [[DEF1]], 0, 6, 0, 6, 5, implicit $vcc, implicit $exec
52+ ; CHECK-NEXT: [[V_CNDMASK_B32_sdwa:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_sdwa 0, [[V_MOV_B32_e32_]], 0, [[DEF1]], 0, 6, 0, 6, 5, implicit $vcc, implicit $exec
2153 ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_sdwa]]
2254 ; CHECK-NEXT: SI_RETURN implicit $vgpr0
2355 %0:vgpr_32 = IMPLICIT_DEF
@@ -30,22 +62,22 @@ body: |
3062 ...
3163
3264---
33- name : change -compare-class-dest-to-vcc # check that non-compare instr V_CMP_CLASS is also handled
65+ name : carry -compare-class
3466tracksRegLiveness : true
3567body : |
3668 bb.0:
3769 liveins: $vgpr0
3870
39- ; CHECK-LABEL: name: change -compare-class-dest-to-vcc
71+ ; CHECK-LABEL: name: carry -compare-class
4072 ; CHECK: liveins: $vgpr0
4173 ; CHECK-NEXT: {{ $}}
4274 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
4375 ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
44- ; CHECK-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 2, undef [[DEF]], 1, implicit $exec
45- ; CHECK-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, undef [[DEF1]], implicit $exec
76+ ; CHECK-NEXT: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_CLASS_F32_e64 2, [[DEF]], 1, implicit $exec
77+ ; CHECK-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[DEF1]], implicit $exec
4678 ; CHECK-NEXT: $vcc = COPY killed [[V_CMP_CLASS_F32_e64_]]
4779 ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
48- ; CHECK-NEXT: [[V_CNDMASK_B32_sdwa:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_sdwa 0, [[V_MOV_B32_e32_]], 0, undef [[DEF1]], 0, 6, 0, 6, 5, implicit $vcc, implicit $exec
80+ ; CHECK-NEXT: [[V_CNDMASK_B32_sdwa:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_sdwa 0, [[V_MOV_B32_e32_]], 0, [[DEF1]], 0, 6, 0, 6, 5, implicit $vcc, implicit $exec
4981 ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_sdwa]]
5082 ; CHECK-NEXT: SI_RETURN implicit $vgpr0
5183 %0:sreg_32_xm0_xexec = IMPLICIT_DEF
@@ -59,20 +91,20 @@ body: |
5991
6092...
6193---
62- name : carry-copy- non-compare # copy of carry-in necessary because def. instr. cannot be changed to write to VCC
94+ name : carry-non-compare
6395tracksRegLiveness : true
6496body : |
6597 bb.0:
6698 liveins: $vgpr0
6799
68- ; CHECK-LABEL: name: carry-copy- non-compare
100+ ; CHECK-LABEL: name: carry-non-compare
69101 ; CHECK: liveins: $vgpr0
70102 ; CHECK-NEXT: {{ $}}
71103 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
72104 ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
73- ; CHECK-NEXT: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_F16 8, undef [[DEF1]], 8, undef [[DEF1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
105+ ; CHECK-NEXT: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_F16 8, [[DEF1]], 8, [[DEF1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
74106 ; CHECK-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[V_PK_MAX_F16_]], implicit $exec
75- ; CHECK-NEXT: $vcc = COPY killed undef [[DEF]]
107+ ; CHECK-NEXT: $vcc = COPY killed [[DEF]]
76108 ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
77109 ; CHECK-NEXT: [[V_CNDMASK_B32_sdwa:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_sdwa 0, [[V_MOV_B32_e32_]], 0, [[V_PK_MAX_F16_]], 0, 6, 0, 6, 5, implicit $vcc, implicit $exec
78110 ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_sdwa]]
@@ -88,20 +120,20 @@ body: |
88120
89121...
90122---
91- name : carry-copy- multiuse # copy of carry-in necessary because of second use
123+ name : carry-multiuse
92124tracksRegLiveness : true
93125body : |
94126 bb.0:
95127 liveins: $vgpr0, $vgpr1
96128
97- ; CHECK-LABEL: name: carry-copy- multiuse
129+ ; CHECK-LABEL: name: carry-multiuse
98130 ; CHECK: liveins: $vgpr0, $vgpr1
99131 ; CHECK-NEXT: {{ $}}
100132 ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
101133 ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
102134 ; CHECK-NEXT: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_F16 8, undef [[DEF1]], 8, undef [[DEF1]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
103135 ; CHECK-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[V_PK_MAX_F16_]], implicit $exec
104- ; CHECK-NEXT: $vcc = COPY killed undef [[DEF]]
136+ ; CHECK-NEXT: $vcc = COPY [[DEF]]
105137 ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
106138 ; CHECK-NEXT: [[V_CNDMASK_B32_sdwa:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_sdwa 0, [[V_MOV_B32_e32_]], 0, [[V_PK_MAX_F16_]], 0, 6, 0, 6, 5, implicit $vcc, implicit $exec
107139 ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_sdwa]]
@@ -111,7 +143,7 @@ body: |
111143 %1:vgpr_32 = IMPLICIT_DEF
112144 %2:vgpr_32 = V_PK_MAX_F16 8, undef %1, 8, undef %1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
113145 %3:vgpr_32 = V_LSHRREV_B32_e64 16, %2, implicit $exec
114- %4:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %3, killed %0, implicit $exec
146+ %4:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %3, %0, implicit $exec
115147 $vgpr0 = COPY %4
116148 $vgpr1 = COPY %0
117149 SI_RETURN implicit $vgpr0
@@ -130,9 +162,9 @@ body: |
130162 ; CHECK-NEXT: {{ $}}
131163 ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
132164 ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
133- ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 undef [[DEF]], 1, implicit $exec
134- ; CHECK-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, undef [[DEF1]], implicit $exec
135- ; CHECK-NEXT: V_CMP_EQ_U32_e32 1, undef [[DEF1]], implicit-def $vcc, implicit $exec
165+ ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[DEF]], 1, implicit $exec
166+ ; CHECK-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[DEF1]], implicit $exec
167+ ; CHECK-NEXT: V_CMP_EQ_U32_e32 1, [[DEF1]], implicit-def $vcc, implicit $exec
136168 ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, [[V_LSHRREV_B32_e64_]], [[V_CMP_EQ_U32_e64_]], implicit $exec
137169 ; CHECK-NEXT: [[V_CNDMASK_B32_e32_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 killed [[V_LSHRREV_B32_e64_]], killed [[V_LSHRREV_B32_e64_]], implicit $vcc, implicit $exec
138170 ; CHECK-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]]
@@ -150,13 +182,13 @@ body: |
150182
151183...
152184---
153- name : cannot-shrink-source-mods # cannot shrink because of source modifiers
185+ name : cannot-shrink-with- source-mods
154186tracksRegLiveness : true
155187body : |
156188 bb.0:
157189 liveins: $vgpr0
158190
159- ; CHECK-LABEL: name: cannot-shrink-source-mods
191+ ; CHECK-LABEL: name: cannot-shrink-with- source-mods
160192 ; CHECK: liveins: $vgpr0
161193 ; CHECK-NEXT: {{ $}}
162194 ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
0 commit comments