Skip to content

Commit 4621e17

Browse files
authored
[DAGCombiner] Relax condition for extract_vector_elt combine (#157658)
Checking `isOperationLegalOrCustom` instead of `isOperationLegal` allows more optimization opportunities. In particular, if a target wants to mark `extract_vector_elt` as `Custom` rather than `Legal` in order to optimize some certain cases, this combiner would otherwise miss some improvements. Previously, using `isOperationLegalOrCustom` was avoided due to the risk of getting stuck in infinite loops (as noted in llvm/llvm-project@61ec738). After testing, the issue no longer reproduces, but the coverage is limited to the regression/unit tests and the test-suite.
1 parent d685508 commit 4621e17

23 files changed

+707
-1008
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23933,8 +23933,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
2393323933
// scalar_to_vector here as well.
2393423934

2393523935
if (!LegalOperations ||
23936-
// FIXME: Should really be just isOperationLegalOrCustom.
23937-
TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
23936+
TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
2393823937
TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
2393923938
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
2394023939
DAG.getVectorIdxConstant(OrigElt, DL));

llvm/test/CodeGen/AArch64/shufflevector.ll

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -286,10 +286,11 @@ define i32 @shufflevector_v2i16(<2 x i16> %a, <2 x i16> %b){
286286
; CHECK-SD: // %bb.0:
287287
; CHECK-SD-NEXT: sub sp, sp, #16
288288
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
289-
; CHECK-SD-NEXT: ext v0.8b, v0.8b, v1.8b, #4
290-
; CHECK-SD-NEXT: mov s1, v0.s[1]
291-
; CHECK-SD-NEXT: str h0, [sp, #12]
289+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
290+
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
292291
; CHECK-SD-NEXT: str h1, [sp, #14]
292+
; CHECK-SD-NEXT: mov s0, v0.s[1]
293+
; CHECK-SD-NEXT: str h0, [sp, #12]
293294
; CHECK-SD-NEXT: ldr w0, [sp, #12]
294295
; CHECK-SD-NEXT: add sp, sp, #16
295296
; CHECK-SD-NEXT: ret
@@ -491,10 +492,8 @@ define i32 @shufflevector_v2i16_zeroes(<2 x i16> %a, <2 x i16> %b){
491492
; CHECK-SD-NEXT: sub sp, sp, #16
492493
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
493494
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
494-
; CHECK-SD-NEXT: dup v1.2s, v0.s[0]
495+
; CHECK-SD-NEXT: str h0, [sp, #14]
495496
; CHECK-SD-NEXT: str h0, [sp, #12]
496-
; CHECK-SD-NEXT: mov s1, v1.s[1]
497-
; CHECK-SD-NEXT: str h1, [sp, #14]
498497
; CHECK-SD-NEXT: ldr w0, [sp, #12]
499498
; CHECK-SD-NEXT: add sp, sp, #16
500499
; CHECK-SD-NEXT: ret

llvm/test/CodeGen/Thumb2/active_lane_mask.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ define <7 x i32> @v7i32(i32 %index, i32 %TC, <7 x i32> %V1, <7 x i32> %V2) {
107107
; CHECK-NEXT: vstrw.32 q0, [r0]
108108
; CHECK-NEXT: vldrw.u32 q0, [r2]
109109
; CHECK-NEXT: ldr r2, [sp, #48]
110+
; CHECK-NEXT: adds r0, #16
110111
; CHECK-NEXT: vqadd.u32 q0, q0, r1
111112
; CHECK-NEXT: ldr r1, [sp, #52]
112113
; CHECK-NEXT: vcmp.u32 hi, q3, q0
@@ -119,12 +120,9 @@ define <7 x i32> @v7i32(i32 %index, i32 %TC, <7 x i32> %V1, <7 x i32> %V2) {
119120
; CHECK-NEXT: ldr r1, [sp, #24]
120121
; CHECK-NEXT: vmov q1[2], q1[0], r2, r1
121122
; CHECK-NEXT: vpsel q0, q1, q0
122-
; CHECK-NEXT: vmov r1, s2
123-
; CHECK-NEXT: vmov.f32 s2, s1
124-
; CHECK-NEXT: vmov r3, s0
125-
; CHECK-NEXT: vmov r2, s2
126-
; CHECK-NEXT: strd r3, r2, [r0, #16]
127-
; CHECK-NEXT: str r1, [r0, #24]
123+
; CHECK-NEXT: vmov r1, r2, d0
124+
; CHECK-NEXT: vmov r3, s2
125+
; CHECK-NEXT: stm r0!, {r1, r2, r3}
128126
; CHECK-NEXT: bx lr
129127
; CHECK-NEXT: .p2align 4
130128
; CHECK-NEXT: @ %bb.1:

llvm/test/CodeGen/Thumb2/mve-complex-deinterleaving-i16-add.ll

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -31,24 +31,19 @@ entry:
3131
define arm_aapcs_vfpcc <4 x i16> @complex_add_v4i16(<4 x i16> %a, <4 x i16> %b) {
3232
; CHECK-LABEL: complex_add_v4i16:
3333
; CHECK: @ %bb.0: @ %entry
34-
; CHECK-NEXT: vrev64.32 q2, q0
35-
; CHECK-NEXT: vmov r1, s6
36-
; CHECK-NEXT: vmov r0, s10
37-
; CHECK-NEXT: vrev64.32 q3, q1
38-
; CHECK-NEXT: vmov r2, s4
39-
; CHECK-NEXT: subs r0, r1, r0
40-
; CHECK-NEXT: vmov r1, s8
34+
; CHECK-NEXT: .save {r4, lr}
35+
; CHECK-NEXT: push {r4, lr}
36+
; CHECK-NEXT: vmov r12, r1, d1
37+
; CHECK-NEXT: vmov r2, lr, d3
38+
; CHECK-NEXT: vmov r3, r4, d2
4139
; CHECK-NEXT: subs r1, r2, r1
42-
; CHECK-NEXT: vmov r2, s0
43-
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
44-
; CHECK-NEXT: vmov r0, s14
45-
; CHECK-NEXT: vmov r1, s2
46-
; CHECK-NEXT: add r0, r1
47-
; CHECK-NEXT: vmov r1, s12
48-
; CHECK-NEXT: add r1, r2
49-
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
50-
; CHECK-NEXT: vmov q0, q2
51-
; CHECK-NEXT: bx lr
40+
; CHECK-NEXT: vmov r2, r0, d0
41+
; CHECK-NEXT: subs r0, r3, r0
42+
; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
43+
; CHECK-NEXT: add.w r0, lr, r12
44+
; CHECK-NEXT: adds r1, r4, r2
45+
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
46+
; CHECK-NEXT: pop {r4, pc}
5247
entry:
5348
%a.real = shufflevector <4 x i16> %a, <4 x i16> zeroinitializer, <2 x i32> <i32 0, i32 2>
5449
%a.imag = shufflevector <4 x i16> %a, <4 x i16> zeroinitializer, <2 x i32> <i32 1, i32 3>

llvm/test/CodeGen/Thumb2/mve-complex-deinterleaving-i8-add.ll

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -31,24 +31,19 @@ entry:
3131
define arm_aapcs_vfpcc <4 x i8> @complex_add_v4i8(<4 x i8> %a, <4 x i8> %b) {
3232
; CHECK-LABEL: complex_add_v4i8:
3333
; CHECK: @ %bb.0: @ %entry
34-
; CHECK-NEXT: vrev64.32 q2, q0
35-
; CHECK-NEXT: vmov r1, s6
36-
; CHECK-NEXT: vmov r0, s10
37-
; CHECK-NEXT: vrev64.32 q3, q1
38-
; CHECK-NEXT: vmov r2, s4
39-
; CHECK-NEXT: subs r0, r1, r0
40-
; CHECK-NEXT: vmov r1, s8
34+
; CHECK-NEXT: .save {r4, lr}
35+
; CHECK-NEXT: push {r4, lr}
36+
; CHECK-NEXT: vmov r12, r1, d1
37+
; CHECK-NEXT: vmov r2, lr, d3
38+
; CHECK-NEXT: vmov r3, r4, d2
4139
; CHECK-NEXT: subs r1, r2, r1
42-
; CHECK-NEXT: vmov r2, s0
43-
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
44-
; CHECK-NEXT: vmov r0, s14
45-
; CHECK-NEXT: vmov r1, s2
46-
; CHECK-NEXT: add r0, r1
47-
; CHECK-NEXT: vmov r1, s12
48-
; CHECK-NEXT: add r1, r2
49-
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
50-
; CHECK-NEXT: vmov q0, q2
51-
; CHECK-NEXT: bx lr
40+
; CHECK-NEXT: vmov r2, r0, d0
41+
; CHECK-NEXT: subs r0, r3, r0
42+
; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
43+
; CHECK-NEXT: add.w r0, lr, r12
44+
; CHECK-NEXT: adds r1, r4, r2
45+
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
46+
; CHECK-NEXT: pop {r4, pc}
5247
entry:
5348
%a.real = shufflevector <4 x i8> %a, <4 x i8> zeroinitializer, <2 x i32> <i32 0, i32 2>
5449
%a.imag = shufflevector <4 x i8> %a, <4 x i8> zeroinitializer, <2 x i32> <i32 1, i32 3>

llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -185,11 +185,10 @@ define arm_aapcs_vfpcc <6 x i32> @test_signed_v6f32_v6i32(<6 x float> %f) {
185185
; CHECK-MVEFP: @ %bb.0:
186186
; CHECK-MVEFP-NEXT: vcvt.s32.f32 q1, q1
187187
; CHECK-MVEFP-NEXT: vcvt.s32.f32 q0, q0
188-
; CHECK-MVEFP-NEXT: vmov.f32 s6, s5
189-
; CHECK-MVEFP-NEXT: vmov r2, s4
190-
; CHECK-MVEFP-NEXT: vmov r1, s6
191-
; CHECK-MVEFP-NEXT: strd r2, r1, [r0, #16]
188+
; CHECK-MVEFP-NEXT: vmov r1, r2, d2
189+
; CHECK-MVEFP-NEXT: str r2, [r0, #20]
192190
; CHECK-MVEFP-NEXT: vstrw.32 q0, [r0]
191+
; CHECK-MVEFP-NEXT: str r1, [r0, #16]
193192
; CHECK-MVEFP-NEXT: bx lr
194193
%x = call <6 x i32> @llvm.fptosi.sat.v6f32.v6i32(<6 x float> %f)
195194
ret <6 x i32> %x
@@ -221,13 +220,11 @@ define arm_aapcs_vfpcc <7 x i32> @test_signed_v7f32_v7i32(<7 x float> %f) {
221220
; CHECK-MVEFP: @ %bb.0:
222221
; CHECK-MVEFP-NEXT: vcvt.s32.f32 q1, q1
223222
; CHECK-MVEFP-NEXT: vcvt.s32.f32 q0, q0
224-
; CHECK-MVEFP-NEXT: vmov.f32 s10, s5
225-
; CHECK-MVEFP-NEXT: vmov r2, s4
226223
; CHECK-MVEFP-NEXT: vmov r3, s6
227-
; CHECK-MVEFP-NEXT: vmov r1, s10
228-
; CHECK-MVEFP-NEXT: strd r2, r1, [r0, #16]
229-
; CHECK-MVEFP-NEXT: str r3, [r0, #24]
224+
; CHECK-MVEFP-NEXT: vmov r1, r2, d2
225+
; CHECK-MVEFP-NEXT: strd r2, r3, [r0, #20]
230226
; CHECK-MVEFP-NEXT: vstrw.32 q0, [r0]
227+
; CHECK-MVEFP-NEXT: str r1, [r0, #16]
231228
; CHECK-MVEFP-NEXT: bx lr
232229
%x = call <7 x i32> @llvm.fptosi.sat.v7f32.v7i32(<7 x float> %f)
233230
ret <7 x i32> %x

llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -172,11 +172,10 @@ define arm_aapcs_vfpcc <6 x i32> @test_unsigned_v6f32_v6i32(<6 x float> %f) {
172172
; CHECK-MVEFP: @ %bb.0:
173173
; CHECK-MVEFP-NEXT: vcvt.u32.f32 q1, q1
174174
; CHECK-MVEFP-NEXT: vcvt.u32.f32 q0, q0
175-
; CHECK-MVEFP-NEXT: vmov.f32 s6, s5
176-
; CHECK-MVEFP-NEXT: vmov r2, s4
177-
; CHECK-MVEFP-NEXT: vmov r1, s6
178-
; CHECK-MVEFP-NEXT: strd r2, r1, [r0, #16]
175+
; CHECK-MVEFP-NEXT: vmov r1, r2, d2
176+
; CHECK-MVEFP-NEXT: str r2, [r0, #20]
179177
; CHECK-MVEFP-NEXT: vstrw.32 q0, [r0]
178+
; CHECK-MVEFP-NEXT: str r1, [r0, #16]
180179
; CHECK-MVEFP-NEXT: bx lr
181180
%x = call <6 x i32> @llvm.fptoui.sat.v6f32.v6i32(<6 x float> %f)
182181
ret <6 x i32> %x
@@ -208,13 +207,11 @@ define arm_aapcs_vfpcc <7 x i32> @test_unsigned_v7f32_v7i32(<7 x float> %f) {
208207
; CHECK-MVEFP: @ %bb.0:
209208
; CHECK-MVEFP-NEXT: vcvt.u32.f32 q1, q1
210209
; CHECK-MVEFP-NEXT: vcvt.u32.f32 q0, q0
211-
; CHECK-MVEFP-NEXT: vmov.f32 s10, s5
212-
; CHECK-MVEFP-NEXT: vmov r2, s4
213210
; CHECK-MVEFP-NEXT: vmov r3, s6
214-
; CHECK-MVEFP-NEXT: vmov r1, s10
215-
; CHECK-MVEFP-NEXT: strd r2, r1, [r0, #16]
216-
; CHECK-MVEFP-NEXT: str r3, [r0, #24]
211+
; CHECK-MVEFP-NEXT: vmov r1, r2, d2
212+
; CHECK-MVEFP-NEXT: strd r2, r3, [r0, #20]
217213
; CHECK-MVEFP-NEXT: vstrw.32 q0, [r0]
214+
; CHECK-MVEFP-NEXT: str r1, [r0, #16]
218215
; CHECK-MVEFP-NEXT: bx lr
219216
%x = call <7 x i32> @llvm.fptoui.sat.v7f32.v7i32(<7 x float> %f)
220217
ret <7 x i32> %x

0 commit comments

Comments
 (0)