Skip to content

Commit e6c5082

Browse files
Working
1 parent 3183138 commit e6c5082

File tree

5 files changed

+128
-73
lines changed

5 files changed

+128
-73
lines changed

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,13 @@ def combine_mul_cmlt : GICombineRule<
332332
(apply [{ applyCombineMulCMLT(*${root}, MRI, B, ${matchinfo}); }])
333333
>;
334334

335+
def combine_build_unmerge : GICombineRule<
336+
(defs root:$root, register_vector_matchinfo:$unmergedValues, register_vector_matchinfo:$undefinedValues),
337+
(match (wip_match_opcode G_BUILD_VECTOR):$root,
338+
[{ return matchCombineBuildUnmerge(*${root}, MRI, ${unmergedValues}, ${undefinedValues}); }]),
339+
(apply [{ applyCombineBuildUnmerge(*${root}, MRI, B, ${unmergedValues}, ${undefinedValues}); }])
340+
>;
341+
335342
// Post-legalization combines which should happen at all optimization levels.
336343
// (E.g. ones that facilitate matching for the selector) For example, matching
337344
// pseudos.
@@ -366,6 +373,6 @@ def AArch64PostLegalizerCombiner
366373
select_to_minmax, or_to_bsp, combine_concat_vector,
367374
commute_constant_to_rhs, extract_vec_elt_combines,
368375
push_freeze_to_prevent_poison_from_propagating,
369-
combine_mul_cmlt, combine_use_vector_truncate,
376+
combine_mul_cmlt, combine_use_vector_truncate, combine_build_unmerge,
370377
extmultomull, truncsat_combines, lshr_of_trunc_of_lshr]> {
371378
}

llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
//===----------------------------------------------------------------------===//
2121

2222
#include "AArch64TargetMachine.h"
23+
#include "llvm/ADT/ArrayRef.h"
2324
#include "llvm/ADT/STLExtras.h"
2425
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
2526
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
@@ -36,9 +37,13 @@
3637
#include "llvm/CodeGen/MachineDominators.h"
3738
#include "llvm/CodeGen/MachineFunctionPass.h"
3839
#include "llvm/CodeGen/MachineRegisterInfo.h"
40+
#include "llvm/CodeGen/Register.h"
3941
#include "llvm/CodeGen/TargetOpcodes.h"
4042
#include "llvm/CodeGen/TargetPassConfig.h"
43+
#include "llvm/CodeGenTypes/MachineValueType.h"
4144
#include "llvm/Support/Debug.h"
45+
#include <cassert>
46+
#include <iostream>
4247

4348
#define GET_GICOMBINER_DEPS
4449
#include "AArch64GenPostLegalizeGICombiner.inc"
@@ -133,6 +138,77 @@ bool isZeroExtended(Register R, MachineRegisterInfo &MRI) {
133138
return MRI.getVRegDef(R)->getOpcode() == TargetOpcode::G_ZEXT;
134139
}
135140

141+
bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
142+
SmallVectorImpl<Register> &unmergedValues,
143+
SmallVectorImpl<Register> &undefinedValues) {
144+
assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
145+
146+
undefinedValues.clear();
147+
unmergedValues.clear();
148+
149+
for (auto Use : MI.all_uses()) {
150+
auto *Def = getDefIgnoringCopies(Use.getReg(), MRI);
151+
152+
if (!Def) {
153+
undefinedValues.clear();
154+
unmergedValues.clear();
155+
return false;
156+
}
157+
158+
auto Opcode = Def->getOpcode();
159+
160+
switch (Opcode) {
161+
default:
162+
return false;
163+
case TargetOpcode::G_IMPLICIT_DEF:
164+
undefinedValues.push_back(Use.getReg());
165+
break;
166+
case TargetOpcode::G_UNMERGE_VALUES:
167+
if (Def->getNumDefs() > 2 ||
168+
MRI.getType(Use.getReg()) == LLT::scalar(16)) {
169+
undefinedValues.clear();
170+
unmergedValues.clear();
171+
return false;
172+
}
173+
174+
unmergedValues.push_back(Def->getOperand(2).getReg());
175+
176+
break;
177+
}
178+
}
179+
180+
if (!(undefinedValues.size() == 2 && unmergedValues.size() == 2) &&
181+
!(undefinedValues.size() == 0 && unmergedValues.size() == 4)) {
182+
undefinedValues.clear();
183+
unmergedValues.clear();
184+
return false;
185+
}
186+
187+
return true;
188+
}
189+
190+
void applyCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
191+
MachineIRBuilder &B,
192+
SmallVectorImpl<Register> &unmergedValues,
193+
SmallVectorImpl<Register> &undefinedValues) {
194+
B.setInstrAndDebugLoc(MI);
195+
196+
if (unmergedValues.size() == 2) {
197+
auto llt = LLT::fixed_vector(
198+
undefinedValues.size(),
199+
LLT::scalar(MRI.getType(unmergedValues[0]).getScalarSizeInBits()));
200+
Register DefVec = MRI.createGenericVirtualRegister(llt);
201+
202+
B.buildBuildVector(DefVec, undefinedValues);
203+
B.buildConcatVectors(MI.getOperand(0), {unmergedValues[0], DefVec});
204+
} else {
205+
B.buildConcatVectors(MI.getOperand(0),
206+
{unmergedValues[0], unmergedValues[3]});
207+
}
208+
209+
MI.eraseFromParent();
210+
}
211+
136212
bool matchAArch64MulConstCombine(
137213
MachineInstr &MI, MachineRegisterInfo &MRI,
138214
std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {

llvm/test/CodeGen/AArch64/arm64-neon-copy.ll

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1268,11 +1268,17 @@ define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
12681268
}
12691269

12701270
define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
1271-
; CHECK-LABEL: testDUP.v1i32:
1272-
; CHECK: // %bb.0:
1273-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1274-
; CHECK-NEXT: dup v0.4s, v0.s[0]
1275-
; CHECK-NEXT: ret
1271+
; CHECK-SD-LABEL: testDUP.v1i32:
1272+
; CHECK-SD: // %bb.0:
1273+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
1274+
; CHECK-SD-NEXT: dup v0.4s, v0.s[0]
1275+
; CHECK-SD-NEXT: ret
1276+
;
1277+
; CHECK-GI-LABEL: testDUP.v1i32:
1278+
; CHECK-GI: // %bb.0:
1279+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1280+
; CHECK-GI-NEXT: mov v0.d[1], v0.d[0]
1281+
; CHECK-GI-NEXT: ret
12761282
%b = extractelement <1 x i32> %a, i32 0
12771283
%c = insertelement <4 x i32> undef, i32 %b, i32 0
12781284
%d = insertelement <4 x i32> %c, i32 %b, i32 1
@@ -2243,11 +2249,17 @@ define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
22432249
}
22442250

22452251
define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
2246-
; CHECK-LABEL: concat_vector_v4i32:
2247-
; CHECK: // %bb.0:
2248-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
2249-
; CHECK-NEXT: dup v0.4s, v0.s[0]
2250-
; CHECK-NEXT: ret
2252+
; CHECK-SD-LABEL: concat_vector_v4i32:
2253+
; CHECK-SD: // %bb.0:
2254+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
2255+
; CHECK-SD-NEXT: dup v0.4s, v0.s[0]
2256+
; CHECK-SD-NEXT: ret
2257+
;
2258+
; CHECK-GI-LABEL: concat_vector_v4i32:
2259+
; CHECK-GI: // %bb.0:
2260+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
2261+
; CHECK-GI-NEXT: mov v0.d[1], v0.d[0]
2262+
; CHECK-GI-NEXT: ret
22512263
%r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
22522264
ret <4 x i32> %r
22532265
}

llvm/test/CodeGen/AArch64/fptrunc.ll

Lines changed: 10 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -277,19 +277,11 @@ entry:
277277
}
278278

279279
define <2 x half> @fptrunc_v2f64_v2f16(<2 x double> %a) {
280-
; CHECK-SD-LABEL: fptrunc_v2f64_v2f16:
281-
; CHECK-SD: // %bb.0: // %entry
282-
; CHECK-SD-NEXT: fcvtxn v0.2s, v0.2d
283-
; CHECK-SD-NEXT: fcvtn v0.4h, v0.4s
284-
; CHECK-SD-NEXT: ret
285-
;
286-
; CHECK-GI-LABEL: fptrunc_v2f64_v2f16:
287-
; CHECK-GI: // %bb.0: // %entry
288-
; CHECK-GI-NEXT: fcvtxn v0.2s, v0.2d
289-
; CHECK-GI-NEXT: mov v1.s[0], v0.s[0]
290-
; CHECK-GI-NEXT: mov v1.s[1], v0.s[1]
291-
; CHECK-GI-NEXT: fcvtn v0.4h, v1.4s
292-
; CHECK-GI-NEXT: ret
280+
; CHECK-LABEL: fptrunc_v2f64_v2f16:
281+
; CHECK: // %bb.0: // %entry
282+
; CHECK-NEXT: fcvtxn v0.2s, v0.2d
283+
; CHECK-NEXT: fcvtn v0.4h, v0.4s
284+
; CHECK-NEXT: ret
293285
entry:
294286
%c = fptrunc <2 x double> %a to <2 x half>
295287
ret <2 x half> %c
@@ -334,19 +326,11 @@ entry:
334326
}
335327

336328
define <2 x half> @fptrunc_v2f32_v2f16(<2 x float> %a) {
337-
; CHECK-SD-LABEL: fptrunc_v2f32_v2f16:
338-
; CHECK-SD: // %bb.0: // %entry
339-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
340-
; CHECK-SD-NEXT: fcvtn v0.4h, v0.4s
341-
; CHECK-SD-NEXT: ret
342-
;
343-
; CHECK-GI-LABEL: fptrunc_v2f32_v2f16:
344-
; CHECK-GI: // %bb.0: // %entry
345-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
346-
; CHECK-GI-NEXT: mov v1.s[0], v0.s[0]
347-
; CHECK-GI-NEXT: mov v1.s[1], v0.s[1]
348-
; CHECK-GI-NEXT: fcvtn v0.4h, v1.4s
349-
; CHECK-GI-NEXT: ret
329+
; CHECK-LABEL: fptrunc_v2f32_v2f16:
330+
; CHECK: // %bb.0: // %entry
331+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
332+
; CHECK-NEXT: fcvtn v0.4h, v0.4s
333+
; CHECK-NEXT: ret
350334
entry:
351335
%c = fptrunc <2 x float> %a to <2 x half>
352336
ret <2 x half> %c

llvm/test/CodeGen/AArch64/itofp.ll

Lines changed: 12 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -5763,18 +5763,14 @@ define <2 x half> @stofp_v2i64_v2f16(<2 x i64> %a) {
57635763
; CHECK-NOFP16-GI: // %bb.0: // %entry
57645764
; CHECK-NOFP16-GI-NEXT: scvtf v0.2d, v0.2d
57655765
; CHECK-NOFP16-GI-NEXT: fcvtn v0.2s, v0.2d
5766-
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
5767-
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
5768-
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
5766+
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
57695767
; CHECK-NOFP16-GI-NEXT: ret
57705768
;
57715769
; CHECK-FP16-GI-LABEL: stofp_v2i64_v2f16:
57725770
; CHECK-FP16-GI: // %bb.0: // %entry
57735771
; CHECK-FP16-GI-NEXT: scvtf v0.2d, v0.2d
57745772
; CHECK-FP16-GI-NEXT: fcvtn v0.2s, v0.2d
5775-
; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
5776-
; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
5777-
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
5773+
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
57785774
; CHECK-FP16-GI-NEXT: ret
57795775
entry:
57805776
%c = sitofp <2 x i64> %a to <2 x half>
@@ -5808,18 +5804,14 @@ define <2 x half> @utofp_v2i64_v2f16(<2 x i64> %a) {
58085804
; CHECK-NOFP16-GI: // %bb.0: // %entry
58095805
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2d, v0.2d
58105806
; CHECK-NOFP16-GI-NEXT: fcvtn v0.2s, v0.2d
5811-
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
5812-
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
5813-
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
5807+
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
58145808
; CHECK-NOFP16-GI-NEXT: ret
58155809
;
58165810
; CHECK-FP16-GI-LABEL: utofp_v2i64_v2f16:
58175811
; CHECK-FP16-GI: // %bb.0: // %entry
58185812
; CHECK-FP16-GI-NEXT: ucvtf v0.2d, v0.2d
58195813
; CHECK-FP16-GI-NEXT: fcvtn v0.2s, v0.2d
5820-
; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
5821-
; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
5822-
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
5814+
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
58235815
; CHECK-FP16-GI-NEXT: ret
58245816
entry:
58255817
%c = uitofp <2 x i64> %a to <2 x half>
@@ -6232,17 +6224,13 @@ define <2 x half> @stofp_v2i32_v2f16(<2 x i32> %a) {
62326224
; CHECK-NOFP16-GI-LABEL: stofp_v2i32_v2f16:
62336225
; CHECK-NOFP16-GI: // %bb.0: // %entry
62346226
; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s
6235-
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
6236-
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
6237-
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
6227+
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
62386228
; CHECK-NOFP16-GI-NEXT: ret
62396229
;
62406230
; CHECK-FP16-GI-LABEL: stofp_v2i32_v2f16:
62416231
; CHECK-FP16-GI: // %bb.0: // %entry
62426232
; CHECK-FP16-GI-NEXT: scvtf v0.2s, v0.2s
6243-
; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
6244-
; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
6245-
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
6233+
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
62466234
; CHECK-FP16-GI-NEXT: ret
62476235
entry:
62486236
%c = sitofp <2 x i32> %a to <2 x half>
@@ -6267,17 +6255,13 @@ define <2 x half> @utofp_v2i32_v2f16(<2 x i32> %a) {
62676255
; CHECK-NOFP16-GI-LABEL: utofp_v2i32_v2f16:
62686256
; CHECK-NOFP16-GI: // %bb.0: // %entry
62696257
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s
6270-
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
6271-
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
6272-
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
6258+
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
62736259
; CHECK-NOFP16-GI-NEXT: ret
62746260
;
62756261
; CHECK-FP16-GI-LABEL: utofp_v2i32_v2f16:
62766262
; CHECK-FP16-GI: // %bb.0: // %entry
62776263
; CHECK-FP16-GI-NEXT: ucvtf v0.2s, v0.2s
6278-
; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
6279-
; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
6280-
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
6264+
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
62816265
; CHECK-FP16-GI-NEXT: ret
62826266
entry:
62836267
%c = uitofp <2 x i32> %a to <2 x half>
@@ -6480,9 +6464,7 @@ define <2 x half> @stofp_v2i16_v2f16(<2 x i16> %a) {
64806464
; CHECK-NOFP16-GI-NEXT: shl v0.2s, v0.2s, #16
64816465
; CHECK-NOFP16-GI-NEXT: sshr v0.2s, v0.2s, #16
64826466
; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s
6483-
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
6484-
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
6485-
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
6467+
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
64866468
; CHECK-NOFP16-GI-NEXT: ret
64876469
entry:
64886470
%c = sitofp <2 x i16> %a to <2 x half>
@@ -6509,9 +6491,7 @@ define <2 x half> @utofp_v2i16_v2f16(<2 x i16> %a) {
65096491
; CHECK-NOFP16-GI-NEXT: movi d1, #0x00ffff0000ffff
65106492
; CHECK-NOFP16-GI-NEXT: and v0.8b, v0.8b, v1.8b
65116493
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s
6512-
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
6513-
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
6514-
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
6494+
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
65156495
; CHECK-NOFP16-GI-NEXT: ret
65166496
entry:
65176497
%c = uitofp <2 x i16> %a to <2 x half>
@@ -6766,9 +6746,7 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) {
67666746
; CHECK-NOFP16-GI-NEXT: shl v0.2s, v0.2s, #24
67676747
; CHECK-NOFP16-GI-NEXT: sshr v0.2s, v0.2s, #24
67686748
; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s
6769-
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
6770-
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
6771-
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
6749+
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
67726750
; CHECK-NOFP16-GI-NEXT: ret
67736751
;
67746752
; CHECK-FP16-GI-LABEL: stofp_v2i8_v2f16:
@@ -6817,9 +6795,7 @@ define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) {
68176795
; CHECK-NOFP16-GI-NEXT: movi d1, #0x0000ff000000ff
68186796
; CHECK-NOFP16-GI-NEXT: and v0.8b, v0.8b, v1.8b
68196797
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s
6820-
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
6821-
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
6822-
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
6798+
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
68236799
; CHECK-NOFP16-GI-NEXT: ret
68246800
;
68256801
; CHECK-FP16-GI-LABEL: utofp_v2i8_v2f16:

0 commit comments

Comments
 (0)