Skip to content

Commit 75b18ba

Browse files
committed
Revert "[AArch64][GlobalISel] Fold away lowered vector sign-extend of vector compares."
This reverts commit dcd02a5. We should instead use the generic combine.
1 parent 6e504d6 commit 75b18ba

File tree

7 files changed

+774
-257
lines changed

7 files changed

+774
-257
lines changed

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -203,13 +203,6 @@ def split_store_zero_128 : GICombineRule<
203203
(apply [{ applySplitStoreZero128(*${d}, MRI, B, Observer); }])
204204
>;
205205

206-
def sext_via_shifts_of_vcmp : GICombineRule<
207-
(defs root:$d, register_matchinfo:$info),
208-
(match (wip_match_opcode G_ASHR):$d,
209-
[{ return matchSextViaShiftsOfVCmp(*${d}, MRI, ${info}); }]),
210-
(apply [{ Helper.replaceSingleDefInstWithReg(*${d}, ${info}); }])
211-
>;
212-
213206
// Post-legalization combines which should happen at all optimization levels.
214207
// (E.g. ones that facilitate matching for the selector) For example, matching
215208
// pseudos.
@@ -236,6 +229,6 @@ def AArch64PostLegalizerCombinerHelper
236229
constant_fold, identity_combines,
237230
ptr_add_immed_chain, overlapping_and,
238231
split_store_zero_128, undef_combines,
239-
select_to_minmax, sext_via_shifts_of_vcmp]> {
232+
select_to_minmax]> {
240233
let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
241234
}

llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,7 @@
3232
#include "llvm/CodeGen/GlobalISel/Utils.h"
3333
#include "llvm/CodeGen/MachineDominators.h"
3434
#include "llvm/CodeGen/MachineFunctionPass.h"
35-
#include "llvm/CodeGen/MachineInstr.h"
3635
#include "llvm/CodeGen/MachineRegisterInfo.h"
37-
#include "llvm/CodeGen/Register.h"
3836
#include "llvm/CodeGen/TargetOpcodes.h"
3937
#include "llvm/CodeGen/TargetPassConfig.h"
4038
#include "llvm/Support/Debug.h"
@@ -331,40 +329,6 @@ static void applySplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI,
331329
Store.eraseFromParent();
332330
}
333331

334-
// Match a legalized vector sext of a vector compare. Vector compares always
335-
// sign-extend the low bit anyway. Unfortunately we have to match the G_SEXT
336-
// after it's been legalized to shifts since this is after legalization.
337-
static bool matchSextViaShiftsOfVCmp(MachineInstr &MI, MachineRegisterInfo &MRI,
338-
Register &MatchInfo) {
339-
assert(MI.getOpcode() == TargetOpcode::G_ASHR && "Expected G_ASHR");
340-
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
341-
if (!DstTy.isVector())
342-
return false;
343-
344-
Register ShlLHS;
345-
int64_t ShlRHS;
346-
if (!mi_match(MI.getOperand(1).getReg(), MRI,
347-
m_GShl(m_Reg(ShlLHS), m_ICstOrSplat(ShlRHS))))
348-
return false;
349-
350-
// Check the shift amount is correct for a sext.
351-
if (ShlRHS != DstTy.getScalarSizeInBits() - 1)
352-
return false;
353-
if (!mi_match(MI.getOperand(2).getReg(), MRI, m_SpecificICstSplat(ShlRHS)))
354-
return false;
355-
356-
// Check we're trying to extend a vector compare.
357-
if (auto *Cmp = getOpcodeDef<GFCmp>(ShlLHS, MRI)) {
358-
MatchInfo = Cmp->getReg(0);
359-
return true;
360-
}
361-
if (auto *Cmp = getOpcodeDef<GICmp>(ShlLHS, MRI)) {
362-
MatchInfo = Cmp->getReg(0);
363-
return true;
364-
}
365-
return false;
366-
}
367-
368332
#define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
369333
#include "AArch64GenPostLegalizeGICombiner.inc"
370334
#undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS

llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll

Lines changed: 55 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -57,30 +57,32 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
5757
; GISEL-LABEL: combine_vec_udiv_nonuniform:
5858
; GISEL: // %bb.0:
5959
; GISEL-NEXT: adrp x8, .LCPI1_4
60-
; GISEL-NEXT: adrp x9, .LCPI1_5
60+
; GISEL-NEXT: adrp x9, .LCPI1_0
6161
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI1_4]
6262
; GISEL-NEXT: adrp x8, .LCPI1_3
63+
; GISEL-NEXT: ldr q5, [x9, :lo12:.LCPI1_0]
6364
; GISEL-NEXT: neg v1.8h, v1.8h
6465
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_3]
6566
; GISEL-NEXT: adrp x8, .LCPI1_2
6667
; GISEL-NEXT: ushl v1.8h, v0.8h, v1.8h
6768
; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h
6869
; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h
6970
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_2]
70-
; GISEL-NEXT: adrp x8, .LCPI1_1
71+
; GISEL-NEXT: adrp x8, .LCPI1_5
7172
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h
7273
; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h
7374
; GISEL-NEXT: umull2 v4.4s, v3.8h, v2.8h
7475
; GISEL-NEXT: umull v2.4s, v3.4h, v2.4h
75-
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_1]
76-
; GISEL-NEXT: adrp x8, .LCPI1_0
77-
; GISEL-NEXT: neg v3.8h, v3.8h
76+
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_5]
77+
; GISEL-NEXT: adrp x8, .LCPI1_1
78+
; GISEL-NEXT: cmeq v3.8h, v3.8h, v5.8h
7879
; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h
79-
; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI1_5]
80-
; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI1_0]
80+
; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI1_1]
81+
; GISEL-NEXT: shl v3.8h, v3.8h, #15
8182
; GISEL-NEXT: add v1.8h, v2.8h, v1.8h
82-
; GISEL-NEXT: cmeq v2.8h, v4.8h, v5.8h
83-
; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h
83+
; GISEL-NEXT: neg v2.8h, v4.8h
84+
; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
85+
; GISEL-NEXT: sshr v2.8h, v3.8h, #15
8486
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
8587
; GISEL-NEXT: ret
8688
%1 = udiv <8 x i16> %x, <i16 23, i16 34, i16 -23, i16 56, i16 128, i16 -1, i16 -256, i16 -32768>
@@ -106,23 +108,25 @@ define <8 x i16> @combine_vec_udiv_nonuniform2(<8 x i16> %x) {
106108
; GISEL-LABEL: combine_vec_udiv_nonuniform2:
107109
; GISEL: // %bb.0:
108110
; GISEL-NEXT: adrp x8, .LCPI2_3
109-
; GISEL-NEXT: adrp x9, .LCPI2_1
111+
; GISEL-NEXT: adrp x9, .LCPI2_4
112+
; GISEL-NEXT: adrp x10, .LCPI2_0
110113
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI2_3]
111114
; GISEL-NEXT: adrp x8, .LCPI2_2
112-
; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI2_1]
115+
; GISEL-NEXT: ldr q4, [x10, :lo12:.LCPI2_0]
113116
; GISEL-NEXT: neg v1.8h, v1.8h
114117
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_2]
115-
; GISEL-NEXT: adrp x8, .LCPI2_4
118+
; GISEL-NEXT: adrp x8, .LCPI2_1
116119
; GISEL-NEXT: ushl v1.8h, v0.8h, v1.8h
117-
; GISEL-NEXT: neg v4.8h, v4.8h
118120
; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h
121+
; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI2_1]
119122
; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h
120-
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_4]
121-
; GISEL-NEXT: adrp x8, .LCPI2_0
123+
; GISEL-NEXT: ldr q2, [x9, :lo12:.LCPI2_4]
124+
; GISEL-NEXT: cmeq v2.8h, v2.8h, v4.8h
122125
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h
123-
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI2_0]
124-
; GISEL-NEXT: cmeq v2.8h, v2.8h, v3.8h
125-
; GISEL-NEXT: ushl v1.8h, v1.8h, v4.8h
126+
; GISEL-NEXT: neg v3.8h, v5.8h
127+
; GISEL-NEXT: shl v2.8h, v2.8h, #15
128+
; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h
129+
; GISEL-NEXT: sshr v2.8h, v2.8h, #15
126130
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
127131
; GISEL-NEXT: ret
128132
%1 = udiv <8 x i16> %x, <i16 -34, i16 35, i16 36, i16 -37, i16 38, i16 -39, i16 40, i16 -41>
@@ -147,21 +151,23 @@ define <8 x i16> @combine_vec_udiv_nonuniform3(<8 x i16> %x) {
147151
; GISEL-LABEL: combine_vec_udiv_nonuniform3:
148152
; GISEL: // %bb.0:
149153
; GISEL-NEXT: adrp x8, .LCPI3_2
150-
; GISEL-NEXT: adrp x9, .LCPI3_3
154+
; GISEL-NEXT: adrp x9, .LCPI3_0
151155
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI3_2]
152-
; GISEL-NEXT: adrp x8, .LCPI3_1
153-
; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI3_3]
156+
; GISEL-NEXT: adrp x8, .LCPI3_3
157+
; GISEL-NEXT: ldr q3, [x9, :lo12:.LCPI3_0]
154158
; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
155159
; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
156160
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
157-
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_1]
158-
; GISEL-NEXT: adrp x8, .LCPI3_0
159-
; GISEL-NEXT: neg v2.8h, v2.8h
160-
; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h
161-
; GISEL-NEXT: usra v1.8h, v3.8h, #1
162-
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI3_0]
163-
; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
164-
; GISEL-NEXT: cmeq v2.8h, v4.8h, v3.8h
161+
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_3]
162+
; GISEL-NEXT: adrp x8, .LCPI3_1
163+
; GISEL-NEXT: cmeq v2.8h, v2.8h, v3.8h
164+
; GISEL-NEXT: sub v4.8h, v0.8h, v1.8h
165+
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI3_1]
166+
; GISEL-NEXT: shl v2.8h, v2.8h, #15
167+
; GISEL-NEXT: usra v1.8h, v4.8h, #1
168+
; GISEL-NEXT: neg v3.8h, v3.8h
169+
; GISEL-NEXT: sshr v2.8h, v2.8h, #15
170+
; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h
165171
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
166172
; GISEL-NEXT: ret
167173
%1 = udiv <8 x i16> %x, <i16 7, i16 23, i16 25, i16 27, i16 31, i16 47, i16 63, i16 127>
@@ -191,19 +197,21 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
191197
; GISEL-LABEL: combine_vec_udiv_nonuniform4:
192198
; GISEL: // %bb.0:
193199
; GISEL-NEXT: adrp x8, .LCPI4_2
194-
; GISEL-NEXT: adrp x9, .LCPI4_1
200+
; GISEL-NEXT: adrp x9, .LCPI4_0
195201
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI4_2]
196202
; GISEL-NEXT: adrp x8, .LCPI4_3
197-
; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI4_1]
203+
; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI4_0]
198204
; GISEL-NEXT: umull2 v2.8h, v0.16b, v1.16b
199205
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI4_3]
200206
; GISEL-NEXT: umull v1.8h, v0.8b, v1.8b
201-
; GISEL-NEXT: adrp x8, .LCPI4_0
202-
; GISEL-NEXT: neg v4.16b, v4.16b
207+
; GISEL-NEXT: adrp x8, .LCPI4_1
208+
; GISEL-NEXT: cmeq v3.16b, v3.16b, v4.16b
203209
; GISEL-NEXT: uzp2 v1.16b, v1.16b, v2.16b
204-
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI4_0]
205-
; GISEL-NEXT: cmeq v2.16b, v3.16b, v2.16b
206-
; GISEL-NEXT: ushl v1.16b, v1.16b, v4.16b
210+
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI4_1]
211+
; GISEL-NEXT: shl v3.16b, v3.16b, #7
212+
; GISEL-NEXT: neg v2.16b, v2.16b
213+
; GISEL-NEXT: ushl v1.16b, v1.16b, v2.16b
214+
; GISEL-NEXT: sshr v2.16b, v3.16b, #7
207215
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
208216
; GISEL-NEXT: ret
209217
%div = udiv <16 x i8> %x, <i8 -64, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -240,26 +248,28 @@ define <8 x i16> @pr38477(<8 x i16> %a0) {
240248
; GISEL-LABEL: pr38477:
241249
; GISEL: // %bb.0:
242250
; GISEL-NEXT: adrp x8, .LCPI5_3
243-
; GISEL-NEXT: adrp x9, .LCPI5_4
251+
; GISEL-NEXT: adrp x9, .LCPI5_0
244252
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI5_3]
245253
; GISEL-NEXT: adrp x8, .LCPI5_2
254+
; GISEL-NEXT: ldr q5, [x9, :lo12:.LCPI5_0]
246255
; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
247256
; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
248257
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
249258
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI5_2]
250-
; GISEL-NEXT: adrp x8, .LCPI5_1
259+
; GISEL-NEXT: adrp x8, .LCPI5_4
251260
; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h
252261
; GISEL-NEXT: umull2 v4.4s, v3.8h, v2.8h
253262
; GISEL-NEXT: umull v2.4s, v3.4h, v2.4h
254-
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_1]
255-
; GISEL-NEXT: adrp x8, .LCPI5_0
256-
; GISEL-NEXT: neg v3.8h, v3.8h
263+
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_4]
264+
; GISEL-NEXT: adrp x8, .LCPI5_1
265+
; GISEL-NEXT: cmeq v3.8h, v3.8h, v5.8h
257266
; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h
258-
; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI5_4]
259-
; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI5_0]
267+
; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI5_1]
268+
; GISEL-NEXT: shl v3.8h, v3.8h, #15
260269
; GISEL-NEXT: add v1.8h, v2.8h, v1.8h
261-
; GISEL-NEXT: cmeq v2.8h, v4.8h, v5.8h
262-
; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h
270+
; GISEL-NEXT: neg v2.8h, v4.8h
271+
; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
272+
; GISEL-NEXT: sshr v2.8h, v3.8h, #15
263273
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
264274
; GISEL-NEXT: ret
265275
%1 = udiv <8 x i16> %a0, <i16 1, i16 119, i16 73, i16 -111, i16 -3, i16 118, i16 32, i16 31>

llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-sext-of-vcmp.mir

Lines changed: 0 additions & 146 deletions
This file was deleted.

0 commit comments

Comments
 (0)