Skip to content

Commit 4621e17

Browse files
authored
[DAGCombiner] Relax condition for extract_vector_elt combine (#157658)
Checking `isOperationLegalOrCustom` instead of `isOperationLegal` allows more optimization opportunities. In particular, if a target wants to mark `extract_vector_elt` as `Custom` rather than `Legal` in order to optimize some certain cases, this combiner would otherwise miss some improvements. Previously, using `isOperationLegalOrCustom` was avoided due to the risk of getting stuck in infinite loops (as noted in 61ec738). After testing, the issue no longer reproduces, but the coverage is limited to the regression/unit tests and the test-suite.
1 parent d685508 commit 4621e17

23 files changed

+707
-1008
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23933,8 +23933,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
2393323933
// scalar_to_vector here as well.
2393423934

2393523935
if (!LegalOperations ||
23936-
// FIXME: Should really be just isOperationLegalOrCustom.
23937-
TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
23936+
TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
2393823937
TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
2393923938
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
2394023939
DAG.getVectorIdxConstant(OrigElt, DL));

llvm/test/CodeGen/AArch64/shufflevector.ll

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -286,10 +286,11 @@ define i32 @shufflevector_v2i16(<2 x i16> %a, <2 x i16> %b){
286286
; CHECK-SD: // %bb.0:
287287
; CHECK-SD-NEXT: sub sp, sp, #16
288288
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
289-
; CHECK-SD-NEXT: ext v0.8b, v0.8b, v1.8b, #4
290-
; CHECK-SD-NEXT: mov s1, v0.s[1]
291-
; CHECK-SD-NEXT: str h0, [sp, #12]
289+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
290+
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
292291
; CHECK-SD-NEXT: str h1, [sp, #14]
292+
; CHECK-SD-NEXT: mov s0, v0.s[1]
293+
; CHECK-SD-NEXT: str h0, [sp, #12]
293294
; CHECK-SD-NEXT: ldr w0, [sp, #12]
294295
; CHECK-SD-NEXT: add sp, sp, #16
295296
; CHECK-SD-NEXT: ret
@@ -491,10 +492,8 @@ define i32 @shufflevector_v2i16_zeroes(<2 x i16> %a, <2 x i16> %b){
491492
; CHECK-SD-NEXT: sub sp, sp, #16
492493
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
493494
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
494-
; CHECK-SD-NEXT: dup v1.2s, v0.s[0]
495+
; CHECK-SD-NEXT: str h0, [sp, #14]
495496
; CHECK-SD-NEXT: str h0, [sp, #12]
496-
; CHECK-SD-NEXT: mov s1, v1.s[1]
497-
; CHECK-SD-NEXT: str h1, [sp, #14]
498497
; CHECK-SD-NEXT: ldr w0, [sp, #12]
499498
; CHECK-SD-NEXT: add sp, sp, #16
500499
; CHECK-SD-NEXT: ret

llvm/test/CodeGen/Thumb2/active_lane_mask.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ define <7 x i32> @v7i32(i32 %index, i32 %TC, <7 x i32> %V1, <7 x i32> %V2) {
107107
; CHECK-NEXT: vstrw.32 q0, [r0]
108108
; CHECK-NEXT: vldrw.u32 q0, [r2]
109109
; CHECK-NEXT: ldr r2, [sp, #48]
110+
; CHECK-NEXT: adds r0, #16
110111
; CHECK-NEXT: vqadd.u32 q0, q0, r1
111112
; CHECK-NEXT: ldr r1, [sp, #52]
112113
; CHECK-NEXT: vcmp.u32 hi, q3, q0
@@ -119,12 +120,9 @@ define <7 x i32> @v7i32(i32 %index, i32 %TC, <7 x i32> %V1, <7 x i32> %V2) {
119120
; CHECK-NEXT: ldr r1, [sp, #24]
120121
; CHECK-NEXT: vmov q1[2], q1[0], r2, r1
121122
; CHECK-NEXT: vpsel q0, q1, q0
122-
; CHECK-NEXT: vmov r1, s2
123-
; CHECK-NEXT: vmov.f32 s2, s1
124-
; CHECK-NEXT: vmov r3, s0
125-
; CHECK-NEXT: vmov r2, s2
126-
; CHECK-NEXT: strd r3, r2, [r0, #16]
127-
; CHECK-NEXT: str r1, [r0, #24]
123+
; CHECK-NEXT: vmov r1, r2, d0
124+
; CHECK-NEXT: vmov r3, s2
125+
; CHECK-NEXT: stm r0!, {r1, r2, r3}
128126
; CHECK-NEXT: bx lr
129127
; CHECK-NEXT: .p2align 4
130128
; CHECK-NEXT: @ %bb.1:

llvm/test/CodeGen/Thumb2/mve-complex-deinterleaving-i16-add.ll

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -31,24 +31,19 @@ entry:
3131
define arm_aapcs_vfpcc <4 x i16> @complex_add_v4i16(<4 x i16> %a, <4 x i16> %b) {
3232
; CHECK-LABEL: complex_add_v4i16:
3333
; CHECK: @ %bb.0: @ %entry
34-
; CHECK-NEXT: vrev64.32 q2, q0
35-
; CHECK-NEXT: vmov r1, s6
36-
; CHECK-NEXT: vmov r0, s10
37-
; CHECK-NEXT: vrev64.32 q3, q1
38-
; CHECK-NEXT: vmov r2, s4
39-
; CHECK-NEXT: subs r0, r1, r0
40-
; CHECK-NEXT: vmov r1, s8
34+
; CHECK-NEXT: .save {r4, lr}
35+
; CHECK-NEXT: push {r4, lr}
36+
; CHECK-NEXT: vmov r12, r1, d1
37+
; CHECK-NEXT: vmov r2, lr, d3
38+
; CHECK-NEXT: vmov r3, r4, d2
4139
; CHECK-NEXT: subs r1, r2, r1
42-
; CHECK-NEXT: vmov r2, s0
43-
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
44-
; CHECK-NEXT: vmov r0, s14
45-
; CHECK-NEXT: vmov r1, s2
46-
; CHECK-NEXT: add r0, r1
47-
; CHECK-NEXT: vmov r1, s12
48-
; CHECK-NEXT: add r1, r2
49-
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
50-
; CHECK-NEXT: vmov q0, q2
51-
; CHECK-NEXT: bx lr
40+
; CHECK-NEXT: vmov r2, r0, d0
41+
; CHECK-NEXT: subs r0, r3, r0
42+
; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
43+
; CHECK-NEXT: add.w r0, lr, r12
44+
; CHECK-NEXT: adds r1, r4, r2
45+
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
46+
; CHECK-NEXT: pop {r4, pc}
5247
entry:
5348
%a.real = shufflevector <4 x i16> %a, <4 x i16> zeroinitializer, <2 x i32> <i32 0, i32 2>
5449
%a.imag = shufflevector <4 x i16> %a, <4 x i16> zeroinitializer, <2 x i32> <i32 1, i32 3>

llvm/test/CodeGen/Thumb2/mve-complex-deinterleaving-i8-add.ll

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -31,24 +31,19 @@ entry:
3131
define arm_aapcs_vfpcc <4 x i8> @complex_add_v4i8(<4 x i8> %a, <4 x i8> %b) {
3232
; CHECK-LABEL: complex_add_v4i8:
3333
; CHECK: @ %bb.0: @ %entry
34-
; CHECK-NEXT: vrev64.32 q2, q0
35-
; CHECK-NEXT: vmov r1, s6
36-
; CHECK-NEXT: vmov r0, s10
37-
; CHECK-NEXT: vrev64.32 q3, q1
38-
; CHECK-NEXT: vmov r2, s4
39-
; CHECK-NEXT: subs r0, r1, r0
40-
; CHECK-NEXT: vmov r1, s8
34+
; CHECK-NEXT: .save {r4, lr}
35+
; CHECK-NEXT: push {r4, lr}
36+
; CHECK-NEXT: vmov r12, r1, d1
37+
; CHECK-NEXT: vmov r2, lr, d3
38+
; CHECK-NEXT: vmov r3, r4, d2
4139
; CHECK-NEXT: subs r1, r2, r1
42-
; CHECK-NEXT: vmov r2, s0
43-
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
44-
; CHECK-NEXT: vmov r0, s14
45-
; CHECK-NEXT: vmov r1, s2
46-
; CHECK-NEXT: add r0, r1
47-
; CHECK-NEXT: vmov r1, s12
48-
; CHECK-NEXT: add r1, r2
49-
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
50-
; CHECK-NEXT: vmov q0, q2
51-
; CHECK-NEXT: bx lr
40+
; CHECK-NEXT: vmov r2, r0, d0
41+
; CHECK-NEXT: subs r0, r3, r0
42+
; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
43+
; CHECK-NEXT: add.w r0, lr, r12
44+
; CHECK-NEXT: adds r1, r4, r2
45+
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
46+
; CHECK-NEXT: pop {r4, pc}
5247
entry:
5348
%a.real = shufflevector <4 x i8> %a, <4 x i8> zeroinitializer, <2 x i32> <i32 0, i32 2>
5449
%a.imag = shufflevector <4 x i8> %a, <4 x i8> zeroinitializer, <2 x i32> <i32 1, i32 3>

llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -185,11 +185,10 @@ define arm_aapcs_vfpcc <6 x i32> @test_signed_v6f32_v6i32(<6 x float> %f) {
185185
; CHECK-MVEFP: @ %bb.0:
186186
; CHECK-MVEFP-NEXT: vcvt.s32.f32 q1, q1
187187
; CHECK-MVEFP-NEXT: vcvt.s32.f32 q0, q0
188-
; CHECK-MVEFP-NEXT: vmov.f32 s6, s5
189-
; CHECK-MVEFP-NEXT: vmov r2, s4
190-
; CHECK-MVEFP-NEXT: vmov r1, s6
191-
; CHECK-MVEFP-NEXT: strd r2, r1, [r0, #16]
188+
; CHECK-MVEFP-NEXT: vmov r1, r2, d2
189+
; CHECK-MVEFP-NEXT: str r2, [r0, #20]
192190
; CHECK-MVEFP-NEXT: vstrw.32 q0, [r0]
191+
; CHECK-MVEFP-NEXT: str r1, [r0, #16]
193192
; CHECK-MVEFP-NEXT: bx lr
194193
%x = call <6 x i32> @llvm.fptosi.sat.v6f32.v6i32(<6 x float> %f)
195194
ret <6 x i32> %x
@@ -221,13 +220,11 @@ define arm_aapcs_vfpcc <7 x i32> @test_signed_v7f32_v7i32(<7 x float> %f) {
221220
; CHECK-MVEFP: @ %bb.0:
222221
; CHECK-MVEFP-NEXT: vcvt.s32.f32 q1, q1
223222
; CHECK-MVEFP-NEXT: vcvt.s32.f32 q0, q0
224-
; CHECK-MVEFP-NEXT: vmov.f32 s10, s5
225-
; CHECK-MVEFP-NEXT: vmov r2, s4
226223
; CHECK-MVEFP-NEXT: vmov r3, s6
227-
; CHECK-MVEFP-NEXT: vmov r1, s10
228-
; CHECK-MVEFP-NEXT: strd r2, r1, [r0, #16]
229-
; CHECK-MVEFP-NEXT: str r3, [r0, #24]
224+
; CHECK-MVEFP-NEXT: vmov r1, r2, d2
225+
; CHECK-MVEFP-NEXT: strd r2, r3, [r0, #20]
230226
; CHECK-MVEFP-NEXT: vstrw.32 q0, [r0]
227+
; CHECK-MVEFP-NEXT: str r1, [r0, #16]
231228
; CHECK-MVEFP-NEXT: bx lr
232229
%x = call <7 x i32> @llvm.fptosi.sat.v7f32.v7i32(<7 x float> %f)
233230
ret <7 x i32> %x

llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -172,11 +172,10 @@ define arm_aapcs_vfpcc <6 x i32> @test_unsigned_v6f32_v6i32(<6 x float> %f) {
172172
; CHECK-MVEFP: @ %bb.0:
173173
; CHECK-MVEFP-NEXT: vcvt.u32.f32 q1, q1
174174
; CHECK-MVEFP-NEXT: vcvt.u32.f32 q0, q0
175-
; CHECK-MVEFP-NEXT: vmov.f32 s6, s5
176-
; CHECK-MVEFP-NEXT: vmov r2, s4
177-
; CHECK-MVEFP-NEXT: vmov r1, s6
178-
; CHECK-MVEFP-NEXT: strd r2, r1, [r0, #16]
175+
; CHECK-MVEFP-NEXT: vmov r1, r2, d2
176+
; CHECK-MVEFP-NEXT: str r2, [r0, #20]
179177
; CHECK-MVEFP-NEXT: vstrw.32 q0, [r0]
178+
; CHECK-MVEFP-NEXT: str r1, [r0, #16]
180179
; CHECK-MVEFP-NEXT: bx lr
181180
%x = call <6 x i32> @llvm.fptoui.sat.v6f32.v6i32(<6 x float> %f)
182181
ret <6 x i32> %x
@@ -208,13 +207,11 @@ define arm_aapcs_vfpcc <7 x i32> @test_unsigned_v7f32_v7i32(<7 x float> %f) {
208207
; CHECK-MVEFP: @ %bb.0:
209208
; CHECK-MVEFP-NEXT: vcvt.u32.f32 q1, q1
210209
; CHECK-MVEFP-NEXT: vcvt.u32.f32 q0, q0
211-
; CHECK-MVEFP-NEXT: vmov.f32 s10, s5
212-
; CHECK-MVEFP-NEXT: vmov r2, s4
213210
; CHECK-MVEFP-NEXT: vmov r3, s6
214-
; CHECK-MVEFP-NEXT: vmov r1, s10
215-
; CHECK-MVEFP-NEXT: strd r2, r1, [r0, #16]
216-
; CHECK-MVEFP-NEXT: str r3, [r0, #24]
211+
; CHECK-MVEFP-NEXT: vmov r1, r2, d2
212+
; CHECK-MVEFP-NEXT: strd r2, r3, [r0, #20]
217213
; CHECK-MVEFP-NEXT: vstrw.32 q0, [r0]
214+
; CHECK-MVEFP-NEXT: str r1, [r0, #16]
218215
; CHECK-MVEFP-NEXT: bx lr
219216
%x = call <7 x i32> @llvm.fptoui.sat.v7f32.v7i32(<7 x float> %f)
220217
ret <7 x i32> %x

0 commit comments

Comments
 (0)