|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
2 | 2 |
|
3 | | -; RUN: llc -mtriple=amdgcn -amdgpu-codegenprepare-break-large-phis-threshold=4096 < %s | FileCheck -check-prefix=GCN %s |
4 | | -; RUN: llc -mtriple=amdgcn -mcpu=tonga -amdgpu-codegenprepare-break-large-phis-threshold=4096 < %s | FileCheck -check-prefixes=VI %s |
5 | | -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -amdgpu-codegenprepare-break-large-phis-threshold=4096 < %s | FileCheck -check-prefixes=GFX9 %s |
6 | | -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-codegenprepare-break-large-phis-threshold=4096 < %s | FileCheck -check-prefixes=GFX11 %s |
| 3 | +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s |
| 4 | +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s |
| 5 | +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s |
| 6 | +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s |
7 | 7 |
|
8 | 8 | define <4 x float> @bitcast_v4i32_to_v4f32(<4 x i32> %a, i32 %b) { |
9 | 9 | ; GCN-LABEL: bitcast_v4i32_to_v4f32: |
@@ -312,9 +312,10 @@ define <4 x i32> @bitcast_v2i64_to_v4i32(<2 x i64> %a, i32 %b) { |
312 | 312 | ; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0 |
313 | 313 | ; GFX11-NEXT: ; %bb.1: ; %cmp.true |
314 | 314 | ; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v2, 3 |
315 | | -; GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo |
| 315 | +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) |
| 316 | +; GFX11-NEXT: v_add_co_ci_u32_e64 v3, null, 0, v3, vcc_lo |
316 | 317 | ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, 3 |
317 | | -; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| 318 | +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
318 | 319 | ; GFX11-NEXT: ; %bb.2: ; %end |
319 | 320 | ; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 |
320 | 321 | ; GFX11-NEXT: s_setpc_b64 s[30:31] |
@@ -2309,9 +2310,10 @@ define <4 x float> @bitcast_v2i64_to_v4f32(<2 x i64> %a, i32 %b) { |
2309 | 2310 | ; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0 |
2310 | 2311 | ; GFX11-NEXT: ; %bb.1: ; %cmp.true |
2311 | 2312 | ; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v2, 3 |
2312 | | -; GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo |
| 2313 | +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) |
| 2314 | +; GFX11-NEXT: v_add_co_ci_u32_e64 v3, null, 0, v3, vcc_lo |
2313 | 2315 | ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, 3 |
2314 | | -; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| 2316 | +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
2315 | 2317 | ; GFX11-NEXT: ; %bb.2: ; %end |
2316 | 2318 | ; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 |
2317 | 2319 | ; GFX11-NEXT: s_setpc_b64 s[30:31] |
@@ -4215,9 +4217,10 @@ define <2 x double> @bitcast_v2i64_to_v2f64(<2 x i64> %a, i32 %b) { |
4215 | 4217 | ; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0 |
4216 | 4218 | ; GFX11-NEXT: ; %bb.1: ; %cmp.true |
4217 | 4219 | ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, 3 |
4218 | | -; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| 4220 | +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) |
| 4221 | +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
4219 | 4222 | ; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v2, 3 |
4220 | | -; GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo |
| 4223 | +; GFX11-NEXT: v_add_co_ci_u32_e64 v3, null, 0, v3, vcc_lo |
4221 | 4224 | ; GFX11-NEXT: ; %bb.2: ; %end |
4222 | 4225 | ; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 |
4223 | 4226 | ; GFX11-NEXT: s_setpc_b64 s[30:31] |
@@ -4395,9 +4398,10 @@ define <8 x i16> @bitcast_v2i64_to_v8i16(<2 x i64> %a, i32 %b) { |
4395 | 4398 | ; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0 |
4396 | 4399 | ; GFX11-NEXT: ; %bb.1: ; %cmp.true |
4397 | 4400 | ; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v2, 3 |
4398 | | -; GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo |
| 4401 | +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) |
| 4402 | +; GFX11-NEXT: v_add_co_ci_u32_e64 v3, null, 0, v3, vcc_lo |
4399 | 4403 | ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, 3 |
4400 | | -; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| 4404 | +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
4401 | 4405 | ; GFX11-NEXT: ; %bb.2: ; %end |
4402 | 4406 | ; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 |
4403 | 4407 | ; GFX11-NEXT: s_setpc_b64 s[30:31] |
@@ -4660,9 +4664,10 @@ define <8 x half> @bitcast_v2i64_to_v8f16(<2 x i64> %a, i32 %b) { |
4660 | 4664 | ; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0 |
4661 | 4665 | ; GFX11-NEXT: ; %bb.1: ; %cmp.true |
4662 | 4666 | ; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v2, 3 |
4663 | | -; GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo |
| 4667 | +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) |
| 4668 | +; GFX11-NEXT: v_add_co_ci_u32_e64 v3, null, 0, v3, vcc_lo |
4664 | 4669 | ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, 3 |
4665 | | -; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| 4670 | +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
4666 | 4671 | ; GFX11-NEXT: ; %bb.2: ; %end |
4667 | 4672 | ; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 |
4668 | 4673 | ; GFX11-NEXT: s_setpc_b64 s[30:31] |
@@ -4936,9 +4941,10 @@ define <8 x bfloat> @bitcast_v2i64_to_v8bf16(<2 x i64> %a, i32 %b) { |
4936 | 4941 | ; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0 |
4937 | 4942 | ; GFX11-NEXT: ; %bb.1: ; %cmp.true |
4938 | 4943 | ; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v2, 3 |
4939 | | -; GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo |
| 4944 | +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) |
| 4945 | +; GFX11-NEXT: v_add_co_ci_u32_e64 v3, null, 0, v3, vcc_lo |
4940 | 4946 | ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, 3 |
4941 | | -; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo |
| 4947 | +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo |
4942 | 4948 | ; GFX11-NEXT: ; %bb.2: ; %end |
4943 | 4949 | ; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 |
4944 | 4950 | ; GFX11-NEXT: s_setpc_b64 s[30:31] |
@@ -5512,9 +5518,10 @@ define <16 x i8> @bitcast_v2i64_to_v16i8(<2 x i64> %a, i32 %b) { |
5512 | 5518 | ; GFX11-NEXT: s_cbranch_execz .LBB34_4 |
5513 | 5519 | ; GFX11-NEXT: ; %bb.3: ; %cmp.true |
5514 | 5520 | ; GFX11-NEXT: v_add_co_u32 v16, vcc_lo, v16, 3 |
5515 | | -; GFX11-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, 0, v17, vcc_lo |
| 5521 | +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) |
| 5522 | +; GFX11-NEXT: v_add_co_ci_u32_e64 v17, null, 0, v17, vcc_lo |
5516 | 5523 | ; GFX11-NEXT: v_add_co_u32 v18, vcc_lo, v18, 3 |
5517 | | -; GFX11-NEXT: v_add_co_ci_u32_e32 v19, vcc_lo, 0, v19, vcc_lo |
| 5524 | +; GFX11-NEXT: v_add_co_ci_u32_e64 v19, null, 0, v19, vcc_lo |
5518 | 5525 | ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) |
5519 | 5526 | ; GFX11-NEXT: v_lshrrev_b64 v[11:12], 24, v[16:17] |
5520 | 5527 | ; GFX11-NEXT: v_lshrrev_b32_e32 v15, 24, v17 |
|
0 commit comments