Skip to content

Commit 716eee0

Browse files
committed
resolved comments
1 parent 74bcf6c commit 716eee0

File tree

9 files changed

+488
-337
lines changed

9 files changed

+488
-337
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,11 @@ struct ShiftOfShiftedLogic {
8181
uint64_t ValSum;
8282
};
8383

84-
struct ShiftOfTruncOfShift {
84+
struct LshrOfTruncOfLshr {
85+
bool Mask = false;
86+
APInt MaskVal;
8587
Register Src;
86-
uint64_t ShiftAmt;
88+
APInt ShiftAmt;
8789
LLT ShiftAmtTy;
8890
LLT InnerShiftTy;
8991
};
@@ -345,11 +347,10 @@ class CombinerHelper {
345347

346348
bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) const;
347349

348-
/// Fold (shift (trunc (shift x, C1)), C2) -> trunc (shift x, (C1 + C2))
349-
bool matchShiftOfTruncOfShift(MachineInstr &MI,
350-
ShiftOfTruncOfShift &MatchInfo) const;
351-
void applyShiftOfTruncOfShift(MachineInstr &MI,
352-
ShiftOfTruncOfShift &MatchInfo) const;
350+
/// Fold (lshr (trunc (lshr x, C1)), C2) -> trunc (shift x, (C1 + C2))
351+
bool matchLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo, MachineInstr &ShiftMI, MachineInstr &TruncMI) const;
352+
void applyLshrOfTruncOfLshr(MachineInstr &MI,
353+
LshrOfTruncOfLshr &MatchInfo) const;
353354

354355
/// Transform a multiply by a power-of-2 value to a left shift.
355356
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const;

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -396,18 +396,21 @@ def commute_shift : GICombineRule<
396396
[{ return Helper.matchCommuteShift(*${d}, ${matchinfo}); }]),
397397
(apply [{ Helper.applyBuildFn(*${d}, ${matchinfo}); }])>;
398398

399-
// Fold (shift (trunc (shift x, C1)), C2) -> trunc (shift x, (C1 + C2))
400-
def shift_right_op : GICombinePatFrag<
401-
(outs root:$dst), (ins),
402-
!foreach(op,
403-
[G_LSHR, G_ASHR],
404-
(pattern (op $dst, $shifted, $amt)))>;
405-
def shift_of_trunc_of_shift_matchdata : GIDefMatchData<"ShiftOfTruncOfShift">;
406-
def shift_of_trunc_of_shift : GICombineRule<
407-
(defs root:$dst, shift_of_trunc_of_shift_matchdata:$matchinfo),
408-
(match (shift_right_op $dst):$root,
409-
[{ return Helper.matchShiftOfTruncOfShift(*${root}, ${matchinfo}); }]),
410-
(apply [{ Helper.applyShiftOfTruncOfShift(*${root}, ${matchinfo}); }])>;
399+
// Fold (lshr (trunc (lshr x, C1)), C2) -> trunc (lshr x, (C1 + C2))
400+
def lshr_of_trunc_of_lshr_matchdata : GIDefMatchData<"LshrOfTruncOfLshr">;
401+
//def lshr_of_trunc_of_lshr : GICombineRule<
402+
// (defs root:$root, lshr_of_trunc_of_lshr_matchdata:$matchinfo),
403+
// (match (G_LSHR $dst, $x, $y):$root,
404+
// [{ return Helper.matchLshrOfTruncOfLshr(*${root}, ${matchinfo}); }]),
405+
// (apply [{ Helper.applyLshrOfTruncOfLshr(*${root}, ${matchinfo}); }])>;
406+
407+
def lshr_of_trunc_of_lshr : GICombineRule<
408+
(defs root:$root, lshr_of_trunc_of_lshr_matchdata:$matchinfo),
409+
(match (G_LSHR $d1, $x, $y):$Shift,
410+
(G_TRUNC $d2, $d1):$Trunc,
411+
(G_LSHR $dst, $d2, $z):$root,
412+
[{ return Helper.matchLshrOfTruncOfLshr(*${root}, ${matchinfo}, *${Shift}, *${Trunc}); }]),
413+
(apply [{ Helper.applyLshrOfTruncOfLshr(*${root}, ${matchinfo}); }])>;
411414

412415
def narrow_binop_feeding_and : GICombineRule<
413416
(defs root:$root, build_fn_matchinfo:$matchinfo),
@@ -2147,7 +2150,7 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
21472150
simplify_neg_minmax, combine_concat_vector,
21482151
sext_trunc, zext_trunc, prefer_sign_combines, shuffle_combines,
21492152
combine_use_vector_truncate, merge_combines, overflow_combines,
2150-
truncsat_combines, shift_of_trunc_of_shift]>;
2153+
truncsat_combines, lshr_of_trunc_of_lshr]>;
21512154

21522155
// A combine group used to for prelegalizer combiners at -O0. The combines in
21532156
// this group have been selected based on experiments to balance code size and

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 37 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2094,57 +2094,63 @@ bool CombinerHelper::matchCommuteShift(MachineInstr &MI,
20942094
return true;
20952095
}
20962096

2097-
bool CombinerHelper::matchShiftOfTruncOfShift(
2098-
MachineInstr &MI, ShiftOfTruncOfShift &MatchInfo) const {
2097+
bool CombinerHelper::matchLshrOfTruncOfLshr(
2098+
MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo, MachineInstr &ShiftMI, MachineInstr &TruncMI) const {
20992099
unsigned ShiftOpcode = MI.getOpcode();
2100-
assert(ShiftOpcode == TargetOpcode::G_LSHR ||
2101-
ShiftOpcode == TargetOpcode::G_ASHR);
2100+
assert(ShiftOpcode == TargetOpcode::G_LSHR);
21022101

21032102
Register N0 = MI.getOperand(1).getReg();
21042103
Register N1 = MI.getOperand(2).getReg();
21052104
unsigned OpSizeInBits = MRI.getType(N0).getScalarSizeInBits();
21062105

2107-
APInt N1C;
2108-
Register InnerShift;
2109-
if (!mi_match(N1, MRI, m_ICstOrSplat(N1C)) ||
2110-
!mi_match(N0, MRI, m_GTrunc(m_Reg(InnerShift))))
2106+
APInt N1C, N001C;
2107+
if (!mi_match(N1, MRI, m_ICstOrSplat(N1C)))
21112108
return false;
2112-
2113-
auto *InnerMI = MRI.getVRegDef(InnerShift);
2114-
if (InnerMI->getOpcode() != ShiftOpcode)
2115-
return false;
2116-
2117-
APInt N001C;
2118-
auto N001 = InnerMI->getOperand(2).getReg();
2109+
auto N001 = ShiftMI.getOperand(2).getReg();
21192110
if (!mi_match(N001, MRI, m_ICstOrSplat(N001C)))
21202111
return false;
21212112

2122-
uint64_t c1 = N001C.getZExtValue();
2123-
uint64_t c2 = N1C.getZExtValue();
2113+
if (N001C.getBitWidth() > N1C.getBitWidth())
2114+
N1C = N1C.zext(N001C.getBitWidth());
2115+
else
2116+
N001C = N001C.zext(N1C.getBitWidth());
2117+
2118+
Register InnerShift = ShiftMI.getOperand(0).getReg();
21242119
LLT InnerShiftTy = MRI.getType(InnerShift);
21252120
uint64_t InnerShiftSize = InnerShiftTy.getScalarSizeInBits();
2126-
if (!(c1 + OpSizeInBits == InnerShiftSize) || !(c1 + c2 < InnerShiftSize))
2127-
return false;
2121+
if ((N1C + N001C).ult(InnerShiftSize)) {
2122+
MatchInfo.Src = ShiftMI.getOperand(1).getReg();
2123+
MatchInfo.ShiftAmt = N1C + N001C;
2124+
MatchInfo.ShiftAmtTy = MRI.getType(N001);
2125+
MatchInfo.InnerShiftTy = InnerShiftTy;
21282126

2129-
MatchInfo.Src = InnerMI->getOperand(1).getReg();
2130-
MatchInfo.ShiftAmt = c1 + c2;
2131-
MatchInfo.ShiftAmtTy = MRI.getType(N001);
2132-
MatchInfo.InnerShiftTy = InnerShiftTy;
2133-
return true;
2127+
if ((N001C + OpSizeInBits) == InnerShiftSize)
2128+
return true;
2129+
if (MRI.hasOneUse(N0) && MRI.hasOneUse(InnerShift)) {
2130+
MatchInfo.Mask = true;
2131+
MatchInfo.MaskVal = APInt(N1C.getBitWidth(), OpSizeInBits) - N1C;
2132+
return true;
2133+
}
2134+
}
2135+
return false;
21342136
}
21352137

2136-
void CombinerHelper::applyShiftOfTruncOfShift(
2137-
MachineInstr &MI, ShiftOfTruncOfShift &MatchInfo) const {
2138+
void CombinerHelper::applyLshrOfTruncOfLshr(
2139+
MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const {
21382140
unsigned ShiftOpcode = MI.getOpcode();
2139-
assert(ShiftOpcode == TargetOpcode::G_LSHR ||
2140-
ShiftOpcode == TargetOpcode::G_ASHR);
2141+
assert(ShiftOpcode == TargetOpcode::G_LSHR);
21412142

21422143
Register Dst = MI.getOperand(0).getReg();
21432144
auto ShiftAmt =
21442145
Builder.buildConstant(MatchInfo.ShiftAmtTy, MatchInfo.ShiftAmt);
2145-
auto Shift = Builder.buildInstr(ShiftOpcode, {MatchInfo.InnerShiftTy},
2146-
{MatchInfo.Src, ShiftAmt});
2147-
Builder.buildTrunc(Dst, Shift);
2146+
auto Shift = Builder.buildLShr(MatchInfo.InnerShiftTy, MatchInfo.Src, ShiftAmt);
2147+
if (MatchInfo.Mask == true) {
2148+
APInt MaskVal = APInt::getLowBitsSet(MatchInfo.InnerShiftTy.getScalarSizeInBits(), MatchInfo.MaskVal.getZExtValue());
2149+
auto Mask = Builder.buildConstant(MatchInfo.ShiftAmtTy, MaskVal);
2150+
auto And = Builder.buildAnd(MatchInfo.InnerShiftTy, Shift, Mask);
2151+
Builder.buildTrunc(Dst, And);
2152+
} else
2153+
Builder.buildTrunc(Dst, Shift);
21482154
MI.eraseFromParent();
21492155
}
21502156

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -369,5 +369,5 @@ def AArch64PostLegalizerCombiner
369369
commute_constant_to_rhs, extract_vec_elt_combines,
370370
push_freeze_to_prevent_poison_from_propagating,
371371
combine_mul_cmlt, combine_use_vector_truncate,
372-
extmultomull, truncsat_combines, shift_of_trunc_of_shift]> {
372+
extmultomull, truncsat_combines, lshr_of_trunc_of_lshr]> {
373373
}

llvm/test/CodeGen/AArch64/combine-sdiv.ll

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1684,14 +1684,24 @@ define i32 @combine_i32_sdiv_const7(i32 %x) {
16841684
}
16851685

16861686
define i32 @combine_i32_sdiv_const100(i32 %x) {
1687-
; CHECK-LABEL: combine_i32_sdiv_const100:
1688-
; CHECK: // %bb.0:
1689-
; CHECK-NEXT: mov w8, #34079 // =0x851f
1690-
; CHECK-NEXT: movk w8, #20971, lsl #16
1691-
; CHECK-NEXT: smull x8, w0, w8
1692-
; CHECK-NEXT: asr x8, x8, #37
1693-
; CHECK-NEXT: add w0, w8, w8, lsr #31
1694-
; CHECK-NEXT: ret
1687+
; CHECK-SD-LABEL: combine_i32_sdiv_const100:
1688+
; CHECK-SD: // %bb.0:
1689+
; CHECK-SD-NEXT: mov w8, #34079 // =0x851f
1690+
; CHECK-SD-NEXT: movk w8, #20971, lsl #16
1691+
; CHECK-SD-NEXT: smull x8, w0, w8
1692+
; CHECK-SD-NEXT: asr x8, x8, #37
1693+
; CHECK-SD-NEXT: add w0, w8, w8, lsr #31
1694+
; CHECK-SD-NEXT: ret
1695+
;
1696+
; CHECK-GI-LABEL: combine_i32_sdiv_const100:
1697+
; CHECK-GI: // %bb.0:
1698+
; CHECK-GI-NEXT: mov w8, #34079 // =0x851f
1699+
; CHECK-GI-NEXT: movk w8, #20971, lsl #16
1700+
; CHECK-GI-NEXT: smull x8, w0, w8
1701+
; CHECK-GI-NEXT: asr x8, x8, #32
1702+
; CHECK-GI-NEXT: asr w8, w8, #5
1703+
; CHECK-GI-NEXT: add w0, w8, w8, lsr #31
1704+
; CHECK-GI-NEXT: ret
16951705
%1 = sdiv i32 %x, 100
16961706
ret i32 %1
16971707
}
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=aarch64-none-eabi -global-isel=0 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3+
; RUN: llc -mtriple=aarch64-none-eabi -global-isel=1 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4+
5+
define i32 @s32_test1(i64 %a) {
6+
; CHECK-LABEL: s32_test1:
7+
; CHECK: // %bb.0:
8+
; CHECK-NEXT: lsr x0, x0, #48
9+
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
10+
; CHECK-NEXT: ret
11+
%r = lshr i64 %a, 32
12+
%ret = trunc i64 %r to i32
13+
%x = lshr i32 %ret, 16
14+
ret i32 %x
15+
}
16+
17+
define i32 @s32_test2(i64 %a) {
18+
; CHECK-LABEL: s32_test2:
19+
; CHECK: // %bb.0:
20+
; CHECK-NEXT: ubfx x0, x0, #32, #16
21+
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
22+
; CHECK-NEXT: ret
23+
%r = lshr i64 %a, 16
24+
%ret = trunc i64 %r to i32
25+
%x = lshr i32 %ret, 16
26+
ret i32 %x
27+
}
28+
29+
define <8 x i8> @v8s8_test1(<8 x i16> %a) {
30+
; CHECK-LABEL: v8s8_test1:
31+
; CHECK: // %bb.0:
32+
; CHECK-NEXT: ushr v0.8h, v0.8h, #12
33+
; CHECK-NEXT: xtn v0.8b, v0.8h
34+
; CHECK-NEXT: ret
35+
%r = lshr <8 x i16> %a, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
36+
%ret = trunc <8 x i16> %r to <8 x i8>
37+
%x = lshr <8 x i8> %ret, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
38+
ret <8 x i8> %x
39+
}
40+
41+
define <8 x i8> @v8s8_test2(<8 x i16> %a) {
42+
; CHECK-SD-LABEL: v8s8_test2:
43+
; CHECK-SD: // %bb.0:
44+
; CHECK-SD-NEXT: ushr v0.8h, v0.8h, #8
45+
; CHECK-SD-NEXT: bic v0.8h, #240
46+
; CHECK-SD-NEXT: xtn v0.8b, v0.8h
47+
; CHECK-SD-NEXT: ret
48+
;
49+
; CHECK-GI-LABEL: v8s8_test2:
50+
; CHECK-GI: // %bb.0:
51+
; CHECK-GI-NEXT: movi v1.8h, #15
52+
; CHECK-GI-NEXT: ushr v0.8h, v0.8h, #8
53+
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
54+
; CHECK-GI-NEXT: xtn v0.8b, v0.8h
55+
; CHECK-GI-NEXT: ret
56+
%r = lshr <8 x i16> %a, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
57+
%ret = trunc <8 x i16> %r to <8 x i8>
58+
%x = lshr <8 x i8> %ret, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
59+
ret <8 x i8> %x
60+
}
61+
62+
define <4 x i16> @v4s16_test1(<4 x i32> %a) {
63+
; CHECK-LABEL: v4s16_test1:
64+
; CHECK: // %bb.0:
65+
; CHECK-NEXT: ushr v0.4s, v0.4s, #24
66+
; CHECK-NEXT: xtn v0.4h, v0.4s
67+
; CHECK-NEXT: ret
68+
%r = lshr <4 x i32> %a, <i32 16, i32 16, i32 16, i32 16>
69+
%ret = trunc <4 x i32> %r to <4 x i16>
70+
%x = lshr <4 x i16> %ret, <i16 8, i16 8, i16 8, i16 8>
71+
ret <4 x i16> %x
72+
}
73+
74+
define <4 x i16> @v4s16_test2(<4 x i32> %a) {
75+
; CHECK-SD-LABEL: v4s16_test2:
76+
; CHECK-SD: // %bb.0:
77+
; CHECK-SD-NEXT: shrn v0.4h, v0.4s, #16
78+
; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
79+
; CHECK-SD-NEXT: ret
80+
;
81+
; CHECK-GI-LABEL: v4s16_test2:
82+
; CHECK-GI: // %bb.0:
83+
; CHECK-GI-NEXT: movi v1.2d, #0x0000ff000000ff
84+
; CHECK-GI-NEXT: ushr v0.4s, v0.4s, #16
85+
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
86+
; CHECK-GI-NEXT: xtn v0.4h, v0.4s
87+
; CHECK-GI-NEXT: ret
88+
%r = lshr <4 x i32> %a, <i32 8, i32 8, i32 8, i32 8>
89+
%ret = trunc <4 x i32> %r to <4 x i16>
90+
%x = lshr <4 x i16> %ret, <i16 8, i16 8, i16 8, i16 8>
91+
ret <4 x i16> %x
92+
}
93+
94+
define <2 x i32> @v2s32_test1(<2 x i64> %a) {
95+
; CHECK-LABEL: v2s32_test1:
96+
; CHECK: // %bb.0:
97+
; CHECK-NEXT: ushr v0.2d, v0.2d, #48
98+
; CHECK-NEXT: xtn v0.2s, v0.2d
99+
; CHECK-NEXT: ret
100+
%r = lshr <2 x i64> %a, <i64 32, i64 32>
101+
%ret = trunc <2 x i64> %r to <2 x i32>
102+
%x = lshr <2 x i32> %ret, <i32 16, i32 16>
103+
ret <2 x i32> %x
104+
}
105+
106+
define <2 x i32> @v2s32_test2(<2 x i64> %a) {
107+
; CHECK-SD-LABEL: v2s32_test2:
108+
; CHECK-SD: // %bb.0:
109+
; CHECK-SD-NEXT: movi d1, #0x00ffff0000ffff
110+
; CHECK-SD-NEXT: shrn v0.2s, v0.2d, #32
111+
; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
112+
; CHECK-SD-NEXT: ret
113+
;
114+
; CHECK-GI-LABEL: v2s32_test2:
115+
; CHECK-GI: // %bb.0:
116+
; CHECK-GI-NEXT: movi v1.2d, #0x0000000000ffff
117+
; CHECK-GI-NEXT: ushr v0.2d, v0.2d, #32
118+
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
119+
; CHECK-GI-NEXT: xtn v0.2s, v0.2d
120+
; CHECK-GI-NEXT: ret
121+
%r = lshr <2 x i64> %a, <i64 16, i64 16>
122+
%ret = trunc <2 x i64> %r to <2 x i32>
123+
%x = lshr <2 x i32> %ret, <i32 16, i32 16>
124+
ret <2 x i32> %x
125+
}

0 commit comments

Comments
 (0)