Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,9 @@ class CombinerHelper {
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);

// Transform a G_SUB with constant on the RHS to G_ADD.
bool matchCombineSubToAdd(MachineInstr &MI, BuildFnTy &MatchInfo);

// Transform a G_SHL with an extended source into a narrower shift if
// possible.
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData);
Expand Down
13 changes: 11 additions & 2 deletions llvm/include/llvm/Target/GlobalISel/Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,14 @@ def mul_to_shl : GICombineRule<
[{ return Helper.matchCombineMulToShl(*${mi}, ${matchinfo}); }]),
(apply [{ Helper.applyCombineMulToShl(*${mi}, ${matchinfo}); }])>;

// (sub x, C) -> (add x, -C)
def sub_to_add : GICombineRule<
(defs root:$d, build_fn_matchinfo:$matchinfo),
(match (G_CONSTANT $c, $imm),
(G_SUB $d, $op1, $c):$mi,
[{ return Helper.matchCombineSubToAdd(*${mi}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFnNoErase(*${mi}, ${matchinfo}); }])>;

// shl ([asz]ext x), y => zext (shl x, y), if shift does not overflow int
def reduce_shl_of_extend_matchdata : GIDefMatchData<"RegisterImmPair">;
def reduce_shl_of_extend : GICombineRule<
Expand Down Expand Up @@ -1903,8 +1911,9 @@ def bitreverse_shift : GICombineGroup<[bitreverse_shl, bitreverse_lshr]>;
def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp,
select_to_iminmax, match_selects]>;

def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
mul_by_neg_one, idempotent_prop]>;
def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, sub_to_add,
add_p2i_to_ptradd, mul_by_neg_one,
idempotent_prop]>;

def fma_combines : GICombineGroup<[combine_fadd_fmul_to_fmad_or_fma,
combine_fadd_fpext_fmul_to_fmad_or_fma, combine_fadd_fma_fmul_to_fmad_or_fma,
Expand Down
19 changes: 19 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2041,6 +2041,25 @@ void CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
Observer.changedInstr(MI);
}

bool CombinerHelper::matchCombineSubToAdd(MachineInstr &MI,
BuildFnTy &MatchInfo) {
GSub &Sub = cast<GSub>(MI);

LLT Ty = MRI.getType(Sub.getReg(0));

APInt Imm = getIConstantFromReg(Sub.getRHSReg(), MRI);

MatchInfo = [=, &MI](MachineIRBuilder &B) {
auto NegCst = B.buildConstant(Ty, -Imm);
Observer.changingInstr(MI);
MI.setDesc(B.getTII().get(TargetOpcode::G_ADD));
MI.getOperand(2).setReg(NegCst.getReg(0));
MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
Observer.changedInstr(MI);
};
return true;
}

// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
RegisterImmPair &MatchData) {
Expand Down
9 changes: 0 additions & 9 deletions llvm/lib/Target/RISCV/RISCVGISel.td
Original file line number Diff line number Diff line change
Expand Up @@ -96,15 +96,6 @@ def gi_sh2add_uw_op : GIComplexOperandMatcher<s32, "selectSHXADD_UWOp<2>">,
def gi_sh3add_uw_op : GIComplexOperandMatcher<s32, "selectSHXADD_UWOp<3>">,
GIComplexPatternEquiv<sh3add_uw_op>;

// FIXME: Canonicalize (sub X, C) -> (add X, -C) earlier.
def : Pat<(XLenVT (sub GPR:$rs1, simm12Plus1:$imm)),
(ADDI GPR:$rs1, (NegImm simm12Plus1:$imm))>;

let Predicates = [IsRV64] in {
def : Pat<(i32 (sub GPR:$rs1, simm12Plus1i32:$imm)),
(ADDIW GPR:$rs1, (i64 (NegImm $imm)))>;
}

// Ptr type used in patterns with GlobalISelEmitter
def PtrVT : PtrValueTypeByHwMode<XLenVT, 0>;

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir
Original file line number Diff line number Diff line change
Expand Up @@ -308,8 +308,8 @@ body: |
; CHECK: liveins: $w0, $w1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %a:_(s64) = COPY $x0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 71
; CHECK-NEXT: %sub:_(s64) = G_SUB %a, [[C]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -71
; CHECK-NEXT: %sub:_(s64) = G_ADD %a, [[C]]
; CHECK-NEXT: $x0 = COPY %sub(s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%a:_(s64) = COPY $x0
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@ body: |
; CHECK-LABEL: name: test_combine_trunc_sub_i128
; CHECK: %lhs:_(s128) = COPY $q0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s128)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
; CHECK-NEXT: %small:_(s32) = G_SUB [[TRUNC]], [[C]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -5
; CHECK-NEXT: %small:_(s32) = G_ADD [[TRUNC]], [[C]]
; CHECK-NEXT: $w0 = COPY %small(s32)
%lhs:_(s128) = COPY $q0
%rhs:_(s128) = G_CONSTANT i128 5
Expand All @@ -103,8 +103,8 @@ body: |
bb.1:
; CHECK-LABEL: name: test_combine_trunc_sub_i128_multi_use
; CHECK: %lhs:_(s128) = COPY $q0
; CHECK-NEXT: %rhs:_(s128) = G_CONSTANT i128 5
; CHECK-NEXT: %res:_(s128) = G_SUB %lhs, %rhs
; CHECK-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 -5
; CHECK-NEXT: %res:_(s128) = G_ADD %lhs, [[C]]
; CHECK-NEXT: %small:_(s32) = G_TRUNC %res(s128)
; CHECK-NEXT: $q0 = COPY %res(s128)
; CHECK-NEXT: $w0 = COPY %small(s32)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ body: |
%11:_(s8) = G_CONSTANT i8 1
; CHECK: [[T3:%[0-9]+]]:_(s8) = G_TRUNC [[T0]](s32)
%7:_(s8) = G_SUB %2, %11
; CHECK: [[T4:%[0-9]+]]:_(s8) = G_SUB [[T3]], {{.*}}
; CHECK: [[T4:%[0-9]+]]:_(s8) = G_ADD [[T3]], {{.*}}
G_BR %bb.3.exit
bb.3.exit:
; CHECK: bb.3.exit:
Expand Down Expand Up @@ -197,7 +197,7 @@ body: |
%7:_(s8) = G_CONSTANT i8 1
; CHECK: [[T3:%[0-9]+]]:_(s8) = G_TRUNC [[T0]](s32)
%8:_(s8) = G_SUB %2, %7
; CHECK: [[T4:%[0-9]+]]:_(s8) = G_SUB [[T3]], {{.*}}
; CHECK: [[T4:%[0-9]+]]:_(s8) = G_ADD [[T3]], {{.*}}
G_BR %bb.3.exit
bb.3.exit:
; CHECK: bb.3.exit:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,8 +289,8 @@ body: |
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %x:_(s32) = COPY $w0
; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: %op:_(s32) = G_SUB %x, %cst
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
; CHECK-NEXT: %op:_(s32) = G_ADD %x, [[C]]
; CHECK-NEXT: $w0 = COPY %op(s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%x:_(s32) = COPY $w0
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1669,7 +1669,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 64, v3
; GFX6-NEXT: v_lshr_b64 v[6:7], v[0:1], v3
; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], v8
; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 64, v3
; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v3
; GFX6-NEXT: v_ashr_i64 v[10:11], v[4:5], v3
; GFX6-NEXT: v_or_b32_e32 v6, v6, v8
; GFX6-NEXT: v_ashrrev_i32_e32 v8, 31, v5
Expand All @@ -1692,7 +1692,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 64, v3
; GFX8-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
; GFX8-NEXT: v_lshlrev_b64 v[8:9], v8, v[4:5]
; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 64, v3
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xffffffc0, v3
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is worse. We already have the patterns to undo this, but I guess they didn't import.

; GFX8-NEXT: v_ashrrev_i64 v[10:11], v3, v[4:5]
; GFX8-NEXT: v_or_b32_e32 v6, v6, v8
; GFX8-NEXT: v_ashrrev_i32_e32 v8, 31, v5
Expand All @@ -1715,7 +1715,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX9-NEXT: v_sub_u32_e32 v8, 64, v3
; GFX9-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
; GFX9-NEXT: v_lshlrev_b64 v[8:9], v8, v[4:5]
; GFX9-NEXT: v_subrev_u32_e32 v2, 64, v3
; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffc0, v3
; GFX9-NEXT: v_ashrrev_i64 v[10:11], v3, v[4:5]
; GFX9-NEXT: v_or_b32_e32 v6, v6, v8
; GFX9-NEXT: v_ashrrev_i32_e32 v8, 31, v5
Expand All @@ -1735,7 +1735,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_bfe_i32 v4, v2, 0, 1
; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v3
; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v3
; GFX10-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v3
; GFX10-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
; GFX10-NEXT: v_ashrrev_i32_e32 v5, 31, v4
Expand All @@ -1758,7 +1758,7 @@ define i65 @v_ashr_i65(i65 %value, i65 %amount) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_bfe_i32 v4, v2, 0, 1
; GFX11-NEXT: v_sub_nc_u32_e32 v2, 64, v3
; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v3
; GFX11-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v3
; GFX11-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3
; GFX11-NEXT: v_ashrrev_i32_e32 v5, 31, v4
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1438,7 +1438,7 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
; SI-NEXT: v_ashrrev_i32_e32 v2, 31, v0
; SI-NEXT: v_ffbh_i32_e32 v3, 0
; SI-NEXT: v_add_i32_e32 v2, vcc, 32, v2
; SI-NEXT: v_subrev_i32_e32 v3, vcc, 1, v3
; SI-NEXT: v_add_i32_e32 v3, vcc, -1, v3
; SI-NEXT: v_mov_b32_e32 v1, 0
; SI-NEXT: v_min_u32_e32 v2, v3, v2
; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], v2
Expand All @@ -1456,7 +1456,7 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
; VI-NEXT: v_ashrrev_i32_e32 v2, 31, v0
; VI-NEXT: v_ffbh_i32_e32 v3, 0
; VI-NEXT: v_add_u32_e32 v2, vcc, 32, v2
; VI-NEXT: v_subrev_u32_e32 v3, vcc, 1, v3
; VI-NEXT: v_add_u32_e32 v3, vcc, -1, v3
; VI-NEXT: v_mov_b32_e32 v1, 0
; VI-NEXT: v_min_u32_e32 v2, v3, v2
; VI-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
Expand Down
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4101,7 +4101,7 @@ define float @v_fdiv_f32_constrhs0_dynamic_25ulp(float %x) #0 {
; GFX10-NEXT: v_rcp_f32_e32 v1, 0x3f40e400
; GFX10-NEXT: v_frexp_mant_f32_e32 v2, v0
; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
; GFX10-NEXT: v_subrev_nc_u32_e32 v0, 14, v0
; GFX10-NEXT: v_add_nc_u32_e32 v0, -14, v0
; GFX10-NEXT: v_mul_f32_e32 v1, v2, v1
; GFX10-NEXT: v_ldexp_f32 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
Expand All @@ -4112,10 +4112,9 @@ define float @v_fdiv_f32_constrhs0_dynamic_25ulp(float %x) #0 {
; GFX11-NEXT: v_rcp_f32_e32 v1, 0x3f40e400
; GFX11-NEXT: v_frexp_mant_f32_e32 v2, v0
; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_subrev_nc_u32_e32 v0, 14, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_mul_f32_e32 v1, v2, v1
; GFX11-NEXT: v_dual_mul_f32 v1, v2, v1 :: v_dual_add_nc_u32 v0, -14, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_ldexp_f32 v0, v1, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
; EG-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp:
Expand Down
Loading
Loading