Skip to content

Commit 22a7ebb

Browse files
[AArch64][GlobalISel] Add combine for build_vector(unmerge, unmerge, undef, undef)
1 parent dda95d9 commit 22a7ebb

File tree

4 files changed

+120
-51
lines changed

4 files changed

+120
-51
lines changed

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,13 @@ def combine_mul_cmlt : GICombineRule<
332332
(apply [{ applyCombineMulCMLT(*${root}, MRI, B, ${matchinfo}); }])
333333
>;
334334

335+
def combine_build_unmerge : GICombineRule<
336+
(defs root:$root, register_matchinfo:$unmergeSrc),
337+
(match (wip_match_opcode G_BUILD_VECTOR):$root,
338+
[{ return matchCombineBuildUnmerge(*${root}, MRI, ${unmergeSrc}); }]),
339+
(apply [{ applyCombineBuildUnmerge(*${root}, MRI, B, ${unmergeSrc}); }])
340+
>;
341+
335342
// Post-legalization combines which should happen at all optimization levels.
336343
// (E.g. ones that facilitate matching for the selector) For example, matching
337344
// pseudos.
@@ -366,7 +373,7 @@ def AArch64PostLegalizerCombiner
366373
select_to_minmax, or_to_bsp, combine_concat_vector,
367374
commute_constant_to_rhs, extract_vec_elt_combines,
368375
push_freeze_to_prevent_poison_from_propagating,
369-
combine_mul_cmlt, combine_use_vector_truncate,
376+
combine_mul_cmlt, combine_use_vector_truncate, combine_build_unmerge,
370377
extmultomull, truncsat_combines, lshr_of_trunc_of_lshr,
371378
funnel_shift_from_or_shift_constants_are_legal]> {
372379
}

llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp

Lines changed: 95 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include "llvm/CodeGen/TargetOpcodes.h"
4040
#include "llvm/CodeGen/TargetPassConfig.h"
4141
#include "llvm/Support/Debug.h"
42+
#include <set>
4243

4344
#define GET_GICOMBINER_DEPS
4445
#include "AArch64GenPostLegalizeGICombiner.inc"
@@ -133,6 +134,99 @@ bool isZeroExtended(Register R, MachineRegisterInfo &MRI) {
133134
return MRI.getVRegDef(R)->getOpcode() == TargetOpcode::G_ZEXT;
134135
}
135136

137+
// This pattern aims to match the following shape to avoid extra mov
138+
// instructions
139+
// G_BUILD_VECTOR(
140+
// G_UNMERGE_VALUES(src, 0)
141+
// G_UNMERGE_VALUES(src, 1)
142+
// G_IMPLICIT_DEF
143+
// G_IMPLICIT_DEF
144+
// )
145+
// ->
146+
// G_CONCAT_VECTORS(
147+
// undef
148+
// src
149+
// )
150+
bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
151+
Register &UnmergeSrc) {
152+
assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
153+
154+
unsigned UnmergeInstrCount = 0;
155+
unsigned UndefInstrCount = 0;
156+
157+
unsigned UnmergeEltCount = 0;
158+
unsigned UnmergeEltSize = 0;
159+
160+
Register UnmergeSrcTemp;
161+
162+
std::set<int> KnownRegs;
163+
164+
for (auto Use : MI.all_uses()) {
165+
auto *Def = getDefIgnoringCopies(Use.getReg(), MRI);
166+
167+
if (!Def) {
168+
return false;
169+
}
170+
171+
unsigned Opcode = Def->getOpcode();
172+
173+
switch (Opcode) {
174+
default:
175+
return false;
176+
case TargetOpcode::G_IMPLICIT_DEF:
177+
++UndefInstrCount;
178+
break;
179+
case TargetOpcode::G_UNMERGE_VALUES:
180+
++UnmergeInstrCount;
181+
182+
UnmergeEltSize = MRI.getType(Use.getReg()).getScalarSizeInBits();
183+
UnmergeEltCount = Def->getNumDefs();
184+
if (UnmergeEltCount < 2 || (UnmergeEltSize * UnmergeEltCount != 64 &&
185+
UnmergeEltSize * UnmergeEltCount != 128)) {
186+
return false;
187+
}
188+
189+
// Unmerge should only use one register so we can use the last one
190+
for (auto UnmergeUse : Def->all_uses())
191+
UnmergeSrcTemp = UnmergeUse.getReg();
192+
193+
// Track unique sources for the G_UNMERGE_VALUES
194+
unsigned RegId = UnmergeSrcTemp.id();
195+
if (KnownRegs.find(RegId) != KnownRegs.end())
196+
continue;
197+
198+
KnownRegs.insert(RegId);
199+
200+
// We know the unmerge is a valid target now so store the register.
201+
UnmergeSrc = UnmergeSrcTemp;
202+
203+
break;
204+
}
205+
}
206+
207+
// Only want to match patterns that pad half of a vector with undefined. We
208+
// also want to ensure that these values come from a single unmerge and all
209+
// unmerged values are consumed.
210+
if (UndefInstrCount != UnmergeInstrCount ||
211+
UnmergeEltCount != UnmergeInstrCount || KnownRegs.size() != 1) {
212+
return false;
213+
}
214+
215+
return true;
216+
}
217+
218+
void applyCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
219+
MachineIRBuilder &B, Register &UnmergeSrc) {
220+
assert(UnmergeSrc && "Expected there to be one matching G_UNMERGE_VALUES");
221+
B.setInstrAndDebugLoc(MI);
222+
223+
Register UndefVec = MRI.createGenericVirtualRegister(MRI.getType(UnmergeSrc));
224+
B.buildUndef(UndefVec);
225+
B.buildConcatVectors(MI.getOperand(0), {UnmergeSrc, UndefVec});
226+
227+
MI.eraseFromParent();
228+
}
229+
136230
bool matchAArch64MulConstCombine(
137231
MachineInstr &MI, MachineRegisterInfo &MRI,
138232
std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
@@ -890,4 +984,4 @@ namespace llvm {
890984
FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone) {
891985
return new AArch64PostLegalizerCombiner(IsOptNone);
892986
}
893-
} // end namespace llvm
987+
} // end namespace llvm

llvm/test/CodeGen/AArch64/fptrunc.ll

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -345,19 +345,11 @@ entry:
345345
}
346346

347347
define <2 x half> @fptrunc_v2f32_v2f16(<2 x float> %a) {
348-
; CHECK-SD-LABEL: fptrunc_v2f32_v2f16:
349-
; CHECK-SD: // %bb.0: // %entry
350-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
351-
; CHECK-SD-NEXT: fcvtn v0.4h, v0.4s
352-
; CHECK-SD-NEXT: ret
353-
;
354-
; CHECK-GI-LABEL: fptrunc_v2f32_v2f16:
355-
; CHECK-GI: // %bb.0: // %entry
356-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
357-
; CHECK-GI-NEXT: mov v1.s[0], v0.s[0]
358-
; CHECK-GI-NEXT: mov v1.s[1], v0.s[1]
359-
; CHECK-GI-NEXT: fcvtn v0.4h, v1.4s
360-
; CHECK-GI-NEXT: ret
348+
; CHECK-LABEL: fptrunc_v2f32_v2f16:
349+
; CHECK: // %bb.0: // %entry
350+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
351+
; CHECK-NEXT: fcvtn v0.4h, v0.4s
352+
; CHECK-NEXT: ret
361353
entry:
362354
%c = fptrunc <2 x float> %a to <2 x half>
363355
ret <2 x half> %c

llvm/test/CodeGen/AArch64/itofp.ll

Lines changed: 12 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -5763,18 +5763,14 @@ define <2 x half> @stofp_v2i64_v2f16(<2 x i64> %a) {
57635763
; CHECK-NOFP16-GI: // %bb.0: // %entry
57645764
; CHECK-NOFP16-GI-NEXT: scvtf v0.2d, v0.2d
57655765
; CHECK-NOFP16-GI-NEXT: fcvtn v0.2s, v0.2d
5766-
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
5767-
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
5768-
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
5766+
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
57695767
; CHECK-NOFP16-GI-NEXT: ret
57705768
;
57715769
; CHECK-FP16-GI-LABEL: stofp_v2i64_v2f16:
57725770
; CHECK-FP16-GI: // %bb.0: // %entry
57735771
; CHECK-FP16-GI-NEXT: scvtf v0.2d, v0.2d
57745772
; CHECK-FP16-GI-NEXT: fcvtn v0.2s, v0.2d
5775-
; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
5776-
; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
5777-
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
5773+
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
57785774
; CHECK-FP16-GI-NEXT: ret
57795775
entry:
57805776
%c = sitofp <2 x i64> %a to <2 x half>
@@ -5808,18 +5804,14 @@ define <2 x half> @utofp_v2i64_v2f16(<2 x i64> %a) {
58085804
; CHECK-NOFP16-GI: // %bb.0: // %entry
58095805
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2d, v0.2d
58105806
; CHECK-NOFP16-GI-NEXT: fcvtn v0.2s, v0.2d
5811-
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
5812-
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
5813-
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
5807+
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
58145808
; CHECK-NOFP16-GI-NEXT: ret
58155809
;
58165810
; CHECK-FP16-GI-LABEL: utofp_v2i64_v2f16:
58175811
; CHECK-FP16-GI: // %bb.0: // %entry
58185812
; CHECK-FP16-GI-NEXT: ucvtf v0.2d, v0.2d
58195813
; CHECK-FP16-GI-NEXT: fcvtn v0.2s, v0.2d
5820-
; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
5821-
; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
5822-
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
5814+
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
58235815
; CHECK-FP16-GI-NEXT: ret
58245816
entry:
58255817
%c = uitofp <2 x i64> %a to <2 x half>
@@ -6232,17 +6224,13 @@ define <2 x half> @stofp_v2i32_v2f16(<2 x i32> %a) {
62326224
; CHECK-NOFP16-GI-LABEL: stofp_v2i32_v2f16:
62336225
; CHECK-NOFP16-GI: // %bb.0: // %entry
62346226
; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s
6235-
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
6236-
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
6237-
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
6227+
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
62386228
; CHECK-NOFP16-GI-NEXT: ret
62396229
;
62406230
; CHECK-FP16-GI-LABEL: stofp_v2i32_v2f16:
62416231
; CHECK-FP16-GI: // %bb.0: // %entry
62426232
; CHECK-FP16-GI-NEXT: scvtf v0.2s, v0.2s
6243-
; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
6244-
; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
6245-
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
6233+
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
62466234
; CHECK-FP16-GI-NEXT: ret
62476235
entry:
62486236
%c = sitofp <2 x i32> %a to <2 x half>
@@ -6267,17 +6255,13 @@ define <2 x half> @utofp_v2i32_v2f16(<2 x i32> %a) {
62676255
; CHECK-NOFP16-GI-LABEL: utofp_v2i32_v2f16:
62686256
; CHECK-NOFP16-GI: // %bb.0: // %entry
62696257
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s
6270-
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
6271-
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
6272-
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
6258+
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
62736259
; CHECK-NOFP16-GI-NEXT: ret
62746260
;
62756261
; CHECK-FP16-GI-LABEL: utofp_v2i32_v2f16:
62766262
; CHECK-FP16-GI: // %bb.0: // %entry
62776263
; CHECK-FP16-GI-NEXT: ucvtf v0.2s, v0.2s
6278-
; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
6279-
; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
6280-
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
6264+
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
62816265
; CHECK-FP16-GI-NEXT: ret
62826266
entry:
62836267
%c = uitofp <2 x i32> %a to <2 x half>
@@ -6480,9 +6464,7 @@ define <2 x half> @stofp_v2i16_v2f16(<2 x i16> %a) {
64806464
; CHECK-NOFP16-GI-NEXT: shl v0.2s, v0.2s, #16
64816465
; CHECK-NOFP16-GI-NEXT: sshr v0.2s, v0.2s, #16
64826466
; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s
6483-
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
6484-
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
6485-
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
6467+
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
64866468
; CHECK-NOFP16-GI-NEXT: ret
64876469
entry:
64886470
%c = sitofp <2 x i16> %a to <2 x half>
@@ -6509,9 +6491,7 @@ define <2 x half> @utofp_v2i16_v2f16(<2 x i16> %a) {
65096491
; CHECK-NOFP16-GI-NEXT: movi d1, #0x00ffff0000ffff
65106492
; CHECK-NOFP16-GI-NEXT: and v0.8b, v0.8b, v1.8b
65116493
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s
6512-
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
6513-
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
6514-
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
6494+
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
65156495
; CHECK-NOFP16-GI-NEXT: ret
65166496
entry:
65176497
%c = uitofp <2 x i16> %a to <2 x half>
@@ -6766,9 +6746,7 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) {
67666746
; CHECK-NOFP16-GI-NEXT: shl v0.2s, v0.2s, #24
67676747
; CHECK-NOFP16-GI-NEXT: sshr v0.2s, v0.2s, #24
67686748
; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s
6769-
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
6770-
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
6771-
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
6749+
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
67726750
; CHECK-NOFP16-GI-NEXT: ret
67736751
;
67746752
; CHECK-FP16-GI-LABEL: stofp_v2i8_v2f16:
@@ -6817,9 +6795,7 @@ define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) {
68176795
; CHECK-NOFP16-GI-NEXT: movi d1, #0x0000ff000000ff
68186796
; CHECK-NOFP16-GI-NEXT: and v0.8b, v0.8b, v1.8b
68196797
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s
6820-
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
6821-
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
6822-
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
6798+
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
68236799
; CHECK-NOFP16-GI-NEXT: ret
68246800
;
68256801
; CHECK-FP16-GI-LABEL: utofp_v2i8_v2f16:

0 commit comments

Comments
 (0)