|
3 | 3 | ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx801 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
|
4 | 4 | ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
5 | 5 | ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
|
6 |
| -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s |
| 6 | +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16, -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-TRUE16 %s |
| 7 | +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16, -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-FAKE16 %s |
7 | 8 | ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
|
8 | 9 |
|
9 | 10 | define amdgpu_ps i16 @s_mul_i16(i16 inreg %num, i16 inreg %den) {
|
@@ -65,11 +66,23 @@ define i16 @v_mul_i16(i16 %num, i16 %den) {
|
65 | 66 | ; GFX9-NEXT: v_mul_lo_u16_e32 v0, v0, v1
|
66 | 67 | ; GFX9-NEXT: s_setpc_b64 s[30:31]
|
67 | 68 | ;
|
68 |
| -; GFX10PLUS-LABEL: v_mul_i16: |
69 |
| -; GFX10PLUS: ; %bb.0: |
70 |
| -; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
71 |
| -; GFX10PLUS-NEXT: v_mul_lo_u16 v0, v0, v1 |
72 |
| -; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] |
| 69 | +; GFX10-LABEL: v_mul_i16: |
| 70 | +; GFX10: ; %bb.0: |
| 71 | +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 72 | +; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1 |
| 73 | +; GFX10-NEXT: s_setpc_b64 s[30:31] |
| 74 | +; |
| 75 | +; GFX11-TRUE16-LABEL: v_mul_i16: |
| 76 | +; GFX11-TRUE16: ; %bb.0: |
| 77 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 78 | +; GFX11-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v1.l |
| 79 | +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| 80 | +; |
| 81 | +; GFX11-FAKE16-LABEL: v_mul_i16: |
| 82 | +; GFX11-FAKE16: ; %bb.0: |
| 83 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 84 | +; GFX11-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1 |
| 85 | +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] |
73 | 86 | ;
|
74 | 87 | ; GFX12-LABEL: v_mul_i16:
|
75 | 88 | ; GFX12: ; %bb.0:
|
@@ -147,12 +160,26 @@ define zeroext i16 @v_mul_i16_zeroext(i16 zeroext %num, i16 zeroext %den) {
|
147 | 160 | ; GFX9-NEXT: v_mul_lo_u16_e32 v0, v0, v1
|
148 | 161 | ; GFX9-NEXT: s_setpc_b64 s[30:31]
|
149 | 162 | ;
|
150 |
| -; GFX10PLUS-LABEL: v_mul_i16_zeroext: |
151 |
| -; GFX10PLUS: ; %bb.0: |
152 |
| -; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
153 |
| -; GFX10PLUS-NEXT: v_mul_lo_u16 v0, v0, v1 |
154 |
| -; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
155 |
| -; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] |
| 163 | +; GFX10-LABEL: v_mul_i16_zeroext: |
| 164 | +; GFX10: ; %bb.0: |
| 165 | +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 166 | +; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1 |
| 167 | +; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| 168 | +; GFX10-NEXT: s_setpc_b64 s[30:31] |
| 169 | +; |
| 170 | +; GFX11-TRUE16-LABEL: v_mul_i16_zeroext: |
| 171 | +; GFX11-TRUE16: ; %bb.0: |
| 172 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 173 | +; GFX11-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v1.l |
| 174 | +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| 175 | +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| 176 | +; |
| 177 | +; GFX11-FAKE16-LABEL: v_mul_i16_zeroext: |
| 178 | +; GFX11-FAKE16: ; %bb.0: |
| 179 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 180 | +; GFX11-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1 |
| 181 | +; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| 182 | +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] |
156 | 183 | ;
|
157 | 184 | ; GFX12-LABEL: v_mul_i16_zeroext:
|
158 | 185 | ; GFX12: ; %bb.0:
|
@@ -236,12 +263,26 @@ define signext i16 @v_mul_i16_signext(i16 signext %num, i16 signext %den) {
|
236 | 263 | ; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 16
|
237 | 264 | ; GFX9-NEXT: s_setpc_b64 s[30:31]
|
238 | 265 | ;
|
239 |
| -; GFX10PLUS-LABEL: v_mul_i16_signext: |
240 |
| -; GFX10PLUS: ; %bb.0: |
241 |
| -; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
242 |
| -; GFX10PLUS-NEXT: v_mul_lo_u16 v0, v0, v1 |
243 |
| -; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 16 |
244 |
| -; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] |
| 266 | +; GFX10-LABEL: v_mul_i16_signext: |
| 267 | +; GFX10: ; %bb.0: |
| 268 | +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 269 | +; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1 |
| 270 | +; GFX10-NEXT: v_bfe_i32 v0, v0, 0, 16 |
| 271 | +; GFX10-NEXT: s_setpc_b64 s[30:31] |
| 272 | +; |
| 273 | +; GFX11-TRUE16-LABEL: v_mul_i16_signext: |
| 274 | +; GFX11-TRUE16: ; %bb.0: |
| 275 | +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 276 | +; GFX11-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v1.l |
| 277 | +; GFX11-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 16 |
| 278 | +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| 279 | +; |
| 280 | +; GFX11-FAKE16-LABEL: v_mul_i16_signext: |
| 281 | +; GFX11-FAKE16: ; %bb.0: |
| 282 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 283 | +; GFX11-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1 |
| 284 | +; GFX11-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16 |
| 285 | +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] |
245 | 286 | ;
|
246 | 287 | ; GFX12-LABEL: v_mul_i16_signext:
|
247 | 288 | ; GFX12: ; %bb.0:
|
|
0 commit comments