Skip to content

Commit 453e627

Browse files
committed
Marked V_MAD_*/V_FMA_* as commutable.
1 parent 1caeb5d commit 453e627

File tree

8 files changed

+52
-40
lines changed

8 files changed

+52
-40
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2824,7 +2824,7 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
28242824
if (isOperandLegal(MI, Src1Idx, &Src0))
28252825
CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
28262826
} else {
2827-
CommutedMI = swapNonRegOperands(MI, Src1, Src0);
2827+
CommutedMI = swapNonRegOperands(MI, Src0, Src1);
28282828
}
28292829

28302830
if (CommutedMI) {

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,9 @@ let isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU] in {
332332
let FPDPRounding = 1 in {
333333
let Predicates = [Has16BitInsts, isGFX8Only] in {
334334
defm V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUdiv_fixup>;
335-
defm V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, any_fma>;
335+
let isCommutable = 1 in {
336+
defm V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, any_fma>;
337+
} // End isCommutable = 1
336338
} // End Predicates = [Has16BitInsts, isGFX8Only]
337339

338340
let SubtargetPredicate = isGFX9Plus in {
@@ -344,10 +346,14 @@ let FPDPRounding = 1 in {
344346

345347
let SubtargetPredicate = Has16BitInsts, isCommutable = 1 in {
346348

347-
defm V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
348-
defm V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
349+
let isCommutable = 1 in {
350+
defm V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
351+
defm V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
352+
} // End isCommutable = 1
349353
let FPDPRounding = 1 in {
350-
defm V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, any_fmad>;
354+
let isCommutable = 1 in{
355+
defm V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, any_fmad>;
356+
} // End isCommutable = 1
351357
let Uses = [MODE, M0, EXEC] in {
352358
let OtherPredicates = [isNotGFX90APlus] in
353359
// For some reason the intrinsic operands are in a different order
@@ -639,8 +645,10 @@ let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
639645
defm V_ADD_I16 : VOP3Inst <"v_add_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>;
640646
defm V_SUB_I16 : VOP3Inst <"v_sub_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>;
641647

642-
defm V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
643-
defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
648+
let isCommutable = 1 in{
649+
defm V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
650+
defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
651+
} // End isCommutable = 1
644652

645653
defm V_CVT_PKNORM_I16_F16 : VOP3Inst <"v_cvt_pknorm_i16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
646654
defm V_CVT_PKNORM_U16_F16 : VOP3Inst <"v_cvt_pknorm_u16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
@@ -871,7 +879,9 @@ let SubtargetPredicate = isGFX10Plus in {
871879
def : PermlanePat<int_amdgcn_permlanex16, V_PERMLANEX16_B32_e64, vt>;
872880
}
873881

874-
defm V_ADD_NC_U16 : VOP3Inst <"v_add_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, add>;
882+
let isCommutable = 1 in {
883+
defm V_ADD_NC_U16 : VOP3Inst <"v_add_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, add>;
884+
}
875885
defm V_SUB_NC_U16 : VOP3Inst <"v_sub_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, sub>;
876886

877887
def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_e64>;

llvm/test/CodeGen/AMDGPU/cmp_shrink.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,6 @@ name: not_shrink_icmp
77
body: |
88
bb.0:
99
; GCN-LABEL: name: not_shrink_icmp
10-
; GCN: S_CMP_GT_I32 1, 65, implicit-def $scc
10+
; GCN: S_CMP_LT_I32 65, 1, implicit-def $scc
1111
S_CMP_GT_I32 1, 65, implicit-def $scc
1212
...
Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,19 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
12
# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -run-pass=machine-cse -verify-machineinstrs %s -o - 2>&1 | FileCheck --check-prefix=GCN %s
23

3-
# GCN-LABEL: name: test_machine_cse_op_sel
4-
# GCN: %2:vgpr_32 = V_ADD_NC_U16_e64 0, %0, 0, %1, 1, 0, implicit $mode, implicit $exec
5-
# GCN: %3:vgpr_32 = V_ADD_NC_U16_e64 0, %1, 0, %0, 1, 0, implicit $mode, implicit $exec
6-
# GCN: DS_WRITE2_B32_gfx9 undef %4:vgpr_32, %2, %3, 0, 1, 0, implicit $exec
74
---
85
name: test_machine_cse_op_sel
96
body: |
107
bb.0:
8+
; GCN-LABEL: name: test_machine_cse_op_sel
9+
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
10+
; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
11+
; GCN-NEXT: [[V_ADD_NC_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_NC_U16_e64 0, [[DEF]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
12+
; GCN-NEXT: DS_WRITE2_B32_gfx9 undef %4:vgpr_32, [[V_ADD_NC_U16_e64_]], [[V_ADD_NC_U16_e64_]], 0, 1, 0, implicit $exec
1113
%0:vgpr_32 = IMPLICIT_DEF
1214
%1:vgpr_32 = IMPLICIT_DEF
13-
%2:vgpr_32 = V_ADD_NC_U16_e64 0, %0, 0, %1, 1, 0, implicit $mode, implicit $exec
14-
%3:vgpr_32 = V_ADD_NC_U16_e64 0, %1, 0, %0, 1, 0, implicit $mode, implicit $exec
15+
%2:vgpr_32 = V_ADD_NC_U16_e64 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
16+
%3:vgpr_32 = V_ADD_NC_U16_e64 0, %1, 0, %0, 0, 0, implicit $mode, implicit $exec
1517
DS_WRITE2_B32_gfx9 undef %4:vgpr_32, %2, %3, 0, 1, 0, implicit $exec
1618
...
1719

llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1669,8 +1669,8 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(ptr addrspace(1) noalias %o
16691669
; GFX10-NEXT: v_or_b32_sdwa v2, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
16701670
; GFX10-NEXT: v_mov_b32_e32 v4, 0
16711671
; GFX10-NEXT: v_cvt_f32_ubyte3_e32 v3, v0
1672-
; GFX10-NEXT: v_add_nc_u16 v1, v1, 0x900
1673-
; GFX10-NEXT: v_add_nc_u16 v5, v2, 0x900
1672+
; GFX10-NEXT: v_add_nc_u16 v1, 0x900, v1
1673+
; GFX10-NEXT: v_add_nc_u16 v5, 0x900, v2
16741674
; GFX10-NEXT: v_cvt_f32_ubyte2_e32 v2, v0
16751675
; GFX10-NEXT: v_lshlrev_b32_e32 v6, 16, v1
16761676
; GFX10-NEXT: v_cvt_f32_ubyte1_e32 v1, v0
@@ -1733,10 +1733,10 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(ptr addrspace(1) noalias %o
17331733
; GFX11-NEXT: v_mov_b32_e32 v4, 0
17341734
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
17351735
; GFX11-NEXT: v_or_b32_e32 v1, v1, v3
1736-
; GFX11-NEXT: v_add_nc_u16 v2, v2, 0x900
1736+
; GFX11-NEXT: v_add_nc_u16 v2, 0x900, v2
17371737
; GFX11-NEXT: v_cvt_f32_ubyte3_e32 v3, v0
17381738
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
1739-
; GFX11-NEXT: v_add_nc_u16 v1, v1, 0x900
1739+
; GFX11-NEXT: v_add_nc_u16 v1, 0x900, v1
17401740
; GFX11-NEXT: v_and_b32_e32 v5, 0xffff, v2
17411741
; GFX11-NEXT: v_cvt_f32_ubyte2_e32 v2, v0
17421742
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)

llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -920,13 +920,13 @@ body: |
920920
; MUBUFW64-NEXT: {{ $}}
921921
; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
922922
; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
923-
; MUBUFW64-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
924-
; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def $vcc, implicit $exec
923+
; MUBUFW64-NEXT: $vgpr1 = V_MOV_B32_e32 12, implicit $exec
924+
; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 0, killed $vgpr1, implicit-def $vcc, implicit $exec
925925
; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
926926
;
927927
; FLATSCRW64-LABEL: name: v_add_co_u32_e32__inline_imm__fi_offset0__kernel__live_vcc
928-
; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
929-
; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 12, killed $vgpr1, implicit-def $vcc, implicit $exec
928+
; FLATSCRW64: $vgpr1 = V_MOV_B32_e32 12, implicit $exec
929+
; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 0, killed $vgpr1, implicit-def $vcc, implicit $exec
930930
; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $vcc
931931
renamable $vgpr0 = V_ADD_CO_U32_e32 12, %stack.0, implicit-def $vcc, implicit $exec
932932
SI_RETURN implicit $vgpr0, implicit $vcc

llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ define i1 @posnormal_bf16(bfloat %x) nounwind {
399399
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
400400
; GFX10CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
401401
; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, -1, v0
402-
; GFX10CHECK-NEXT: v_add_nc_u16 v1, v1, 0xff80
402+
; GFX10CHECK-NEXT: v_add_nc_u16 v1, 0xff80, v1
403403
; GFX10CHECK-NEXT: v_cmp_gt_u16_e64 s4, 0x7f00, v1
404404
; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo
405405
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
@@ -410,7 +410,7 @@ define i1 @posnormal_bf16(bfloat %x) nounwind {
410410
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
411411
; GFX11CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
412412
; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, -1, v0
413-
; GFX11CHECK-NEXT: v_add_nc_u16 v1, v1, 0xff80
413+
; GFX11CHECK-NEXT: v_add_nc_u16 v1, 0xff80, v1
414414
; GFX11CHECK-NEXT: v_cmp_gt_u16_e64 s0, 0x7f00, v1
415415
; GFX11CHECK-NEXT: s_and_b32 s0, s0, vcc_lo
416416
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
@@ -464,7 +464,7 @@ define i1 @negnormal_bf16(bfloat %x) nounwind {
464464
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
465465
; GFX10CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
466466
; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0
467-
; GFX10CHECK-NEXT: v_add_nc_u16 v1, v1, 0xff80
467+
; GFX10CHECK-NEXT: v_add_nc_u16 v1, 0xff80, v1
468468
; GFX10CHECK-NEXT: v_cmp_gt_u16_e64 s4, 0x7f00, v1
469469
; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo
470470
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
@@ -475,7 +475,7 @@ define i1 @negnormal_bf16(bfloat %x) nounwind {
475475
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
476476
; GFX11CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
477477
; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0
478-
; GFX11CHECK-NEXT: v_add_nc_u16 v1, v1, 0xff80
478+
; GFX11CHECK-NEXT: v_add_nc_u16 v1, 0xff80, v1
479479
; GFX11CHECK-NEXT: v_cmp_gt_u16_e64 s0, 0x7f00, v1
480480
; GFX11CHECK-NEXT: s_and_b32 s0, s0, vcc_lo
481481
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
@@ -1350,7 +1350,7 @@ define i1 @isnormal_bf16(bfloat %x) {
13501350
; GFX10CHECK: ; %bb.0:
13511351
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13521352
; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1353-
; GFX10CHECK-NEXT: v_add_nc_u16 v0, v0, 0xff80
1353+
; GFX10CHECK-NEXT: v_add_nc_u16 v0, 0xff80, v0
13541354
; GFX10CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f00, v0
13551355
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
13561356
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
@@ -1359,7 +1359,7 @@ define i1 @isnormal_bf16(bfloat %x) {
13591359
; GFX11CHECK: ; %bb.0:
13601360
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13611361
; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1362-
; GFX11CHECK-NEXT: v_add_nc_u16 v0, v0, 0xff80
1362+
; GFX11CHECK-NEXT: v_add_nc_u16 v0, 0xff80, v0
13631363
; GFX11CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f00, v0
13641364
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
13651365
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
@@ -1404,7 +1404,7 @@ define i1 @not_isnormal_bf16(bfloat %x) {
14041404
; GFX10CHECK: ; %bb.0:
14051405
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14061406
; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1407-
; GFX10CHECK-NEXT: v_add_nc_u16 v0, v0, 0xff80
1407+
; GFX10CHECK-NEXT: v_add_nc_u16 v0, 0xff80, v0
14081408
; GFX10CHECK-NEXT: v_cmp_lt_u16_e32 vcc_lo, 0x7eff, v0
14091409
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
14101410
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
@@ -1413,7 +1413,7 @@ define i1 @not_isnormal_bf16(bfloat %x) {
14131413
; GFX11CHECK: ; %bb.0:
14141414
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14151415
; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1416-
; GFX11CHECK-NEXT: v_add_nc_u16 v0, v0, 0xff80
1416+
; GFX11CHECK-NEXT: v_add_nc_u16 v0, 0xff80, v0
14171417
; GFX11CHECK-NEXT: v_cmp_lt_u16_e32 vcc_lo, 0x7eff, v0
14181418
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
14191419
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
@@ -1466,7 +1466,7 @@ define i1 @not_is_plus_normal_bf16(bfloat %x) {
14661466
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14671467
; GFX10CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
14681468
; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0
1469-
; GFX10CHECK-NEXT: v_add_nc_u16 v1, v1, 0xff80
1469+
; GFX10CHECK-NEXT: v_add_nc_u16 v1, 0xff80, v1
14701470
; GFX10CHECK-NEXT: v_cmp_lt_u16_e64 s4, 0x7eff, v1
14711471
; GFX10CHECK-NEXT: s_or_b32 s4, s4, vcc_lo
14721472
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
@@ -1477,7 +1477,7 @@ define i1 @not_is_plus_normal_bf16(bfloat %x) {
14771477
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14781478
; GFX11CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
14791479
; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0
1480-
; GFX11CHECK-NEXT: v_add_nc_u16 v1, v1, 0xff80
1480+
; GFX11CHECK-NEXT: v_add_nc_u16 v1, 0xff80, v1
14811481
; GFX11CHECK-NEXT: v_cmp_lt_u16_e64 s0, 0x7eff, v1
14821482
; GFX11CHECK-NEXT: s_or_b32 s0, s0, vcc_lo
14831483
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
@@ -1531,7 +1531,7 @@ define i1 @not_is_neg_normal_bf16(bfloat %x) {
15311531
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15321532
; GFX10CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
15331533
; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, -1, v0
1534-
; GFX10CHECK-NEXT: v_add_nc_u16 v1, v1, 0xff80
1534+
; GFX10CHECK-NEXT: v_add_nc_u16 v1, 0xff80, v1
15351535
; GFX10CHECK-NEXT: v_cmp_lt_u16_e64 s4, 0x7eff, v1
15361536
; GFX10CHECK-NEXT: s_or_b32 s4, s4, vcc_lo
15371537
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4
@@ -1542,7 +1542,7 @@ define i1 @not_is_neg_normal_bf16(bfloat %x) {
15421542
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15431543
; GFX11CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0
15441544
; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, -1, v0
1545-
; GFX11CHECK-NEXT: v_add_nc_u16 v1, v1, 0xff80
1545+
; GFX11CHECK-NEXT: v_add_nc_u16 v1, 0xff80, v1
15461546
; GFX11CHECK-NEXT: v_cmp_lt_u16_e64 s0, 0x7eff, v1
15471547
; GFX11CHECK-NEXT: s_or_b32 s0, s0, vcc_lo
15481548
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
@@ -2571,7 +2571,7 @@ define i1 @not_iszero_or_qnan_bf16(bfloat %x) {
25712571
; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7fc0, v0
25722572
; GFX10CHECK-NEXT: v_cmp_lt_i16_e64 s4, 0x7f80, v0
25732573
; GFX10CHECK-NEXT: v_cmp_eq_u16_e64 s5, 0x7f80, v0
2574-
; GFX10CHECK-NEXT: v_add_nc_u16 v0, v0, 0xff80
2574+
; GFX10CHECK-NEXT: v_add_nc_u16 v0, 0xff80, v0
25752575
; GFX10CHECK-NEXT: v_cmp_gt_u16_e64 s6, 0x7f, v1
25762576
; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo
25772577
; GFX10CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f00, v0
@@ -2589,7 +2589,7 @@ define i1 @not_iszero_or_qnan_bf16(bfloat %x) {
25892589
; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7fc0, v0
25902590
; GFX11CHECK-NEXT: v_cmp_lt_i16_e64 s0, 0x7f80, v0
25912591
; GFX11CHECK-NEXT: v_cmp_eq_u16_e64 s1, 0x7f80, v0
2592-
; GFX11CHECK-NEXT: v_add_nc_u16 v0, v0, 0xff80
2592+
; GFX11CHECK-NEXT: v_add_nc_u16 v0, 0xff80, v0
25932593
; GFX11CHECK-NEXT: v_cmp_gt_u16_e64 s2, 0x7f, v1
25942594
; GFX11CHECK-NEXT: s_and_b32 s0, s0, vcc_lo
25952595
; GFX11CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f00, v0
@@ -2671,7 +2671,7 @@ define i1 @not_iszero_or_snan_bf16(bfloat %x) {
26712671
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
26722672
; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
26732673
; GFX10CHECK-NEXT: v_add_nc_u16 v1, v0, -1
2674-
; GFX10CHECK-NEXT: v_add_nc_u16 v2, v0, 0xff80
2674+
; GFX10CHECK-NEXT: v_add_nc_u16 v2, 0xff80, v0
26752675
; GFX10CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
26762676
; GFX10CHECK-NEXT: v_cmp_lt_i16_e64 s5, 0x7fbf, v0
26772677
; GFX10CHECK-NEXT: v_cmp_gt_u16_e64 s4, 0x7f, v1
@@ -2687,7 +2687,7 @@ define i1 @not_iszero_or_snan_bf16(bfloat %x) {
26872687
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
26882688
; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
26892689
; GFX11CHECK-NEXT: v_add_nc_u16 v1, v0, -1
2690-
; GFX11CHECK-NEXT: v_add_nc_u16 v2, v0, 0xff80
2690+
; GFX11CHECK-NEXT: v_add_nc_u16 v2, 0xff80, v0
26912691
; GFX11CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
26922692
; GFX11CHECK-NEXT: v_cmp_lt_i16_e64 s1, 0x7fbf, v0
26932693
; GFX11CHECK-NEXT: v_cmp_gt_u16_e64 s0, 0x7f, v1

llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ define amdgpu_kernel void @no_widen_i16_constant_divergent_load(ptr addrspace(4)
409409
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
410410
; GFX11-NEXT: global_load_u16 v0, v0, s[0:1]
411411
; GFX11-NEXT: s_waitcnt vmcnt(0)
412-
; GFX11-NEXT: v_add_nc_u16 v2, v0, 0x3e7
412+
; GFX11-NEXT: v_add_nc_u16 v2, 0x3e7, v0
413413
; GFX11-NEXT: v_mov_b32_e32 v0, 0
414414
; GFX11-NEXT: v_mov_b32_e32 v1, 0
415415
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)

0 commit comments

Comments
 (0)