Skip to content

Commit f5f0076

Browse files
[DAGCombiner] Fix check for extending loads (#112182)
Fix a check for extending loads in DAGCombiner, where if the result type has more bits than the loaded type it should count as an extending load. All backends apart from AArch64 ignore this ExtTy argument to shouldReduceLoadWidth, so this change currently only impacts AArch64.
1 parent caa7301 commit f5f0076

File tree

2 files changed

+36
-1
lines changed

2 files changed

+36
-1
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22568,7 +22568,7 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
2256822568
return SDValue();
2256922569

2257022570
ISD::LoadExtType ExtTy =
22571-
ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
22571+
ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
2257222572
if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
2257322573
!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
2257422574
return SDValue();
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
; FIXME: Currently, we avoid narrowing this v4i32 load, in the
5+
; hopes of being able to fold the shift, despite it requiring stack
6+
; storage + loads. Ideally, we should narrow here and load the i32
7+
; directly from the variable offset e.g:
8+
;
9+
; add x8, x0, x1, lsl #4
10+
; and x9, x2, #0x3
11+
; ldr w0, [x8, x9, lsl #2]
12+
;
13+
; The AArch64TargetLowering::shouldReduceLoadWidth heuristic should
14+
; probably be updated to choose load-narrowing instead of folding the
15+
; lsl in larger vector cases.
16+
;
17+
define i32 @narrow_load_v4_i32_single_ele_variable_idx(ptr %ptr, i64 %off, i32 %ele) {
18+
; CHECK-LABEL: narrow_load_v4_i32_single_ele_variable_idx:
19+
; CHECK: // %bb.0: // %entry
20+
; CHECK-NEXT: sub sp, sp, #16
21+
; CHECK-NEXT: .cfi_def_cfa_offset 16
22+
; CHECK-NEXT: ldr q0, [x0, x1, lsl #4]
23+
; CHECK-NEXT: mov x8, sp
24+
; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
25+
; CHECK-NEXT: bfi x8, x2, #2, #2
26+
; CHECK-NEXT: str q0, [sp]
27+
; CHECK-NEXT: ldr w0, [x8]
28+
; CHECK-NEXT: add sp, sp, #16
29+
; CHECK-NEXT: ret
30+
entry:
31+
%idx = getelementptr inbounds <4 x i32>, ptr %ptr, i64 %off
32+
%x = load <4 x i32>, ptr %idx, align 8
33+
%res = extractelement <4 x i32> %x, i32 %ele
34+
ret i32 %res
35+
}

0 commit comments

Comments
 (0)