Skip to content

Commit 27c6b10

Browse files
committed
[AArch64] Avoid apply S-form on frame index in peephole
1 parent acea1f5 commit 27c6b10

File tree

2 files changed

+101
-0
lines changed

2 files changed

+101
-0
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1920,6 +1920,11 @@ static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
19201920
CmpInstr.getOperand(2).getImm() == 0) &&
19211921
"Caller guarantees that CmpInstr compares with constant 0");
19221922

1923+
// NZCV is not supported if the stack offset is scalable.
1924+
auto &ST = MI.getParent()->getParent()->getSubtarget<AArch64Subtarget>();
1925+
if ((ST.hasSVE() || ST.isStreaming()) && MI.getOperand(1).isFI())
1926+
return false;
1927+
19231928
std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
19241929
if (!NZVCUsed || NZVCUsed->C)
19251930
return false;
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
3+
4+
define void @i(ptr %ad, ptr %0) #0 {
5+
; CHECK-LABEL: i:
6+
; CHECK: // %bb.0: // %entry
7+
; CHECK-NEXT: str d11, [sp, #-48]! // 8-byte Folded Spill
8+
; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill
9+
; CHECK-NEXT: add x29, sp, #16
10+
; CHECK-NEXT: stp x28, x19, [sp, #32] // 16-byte Folded Spill
11+
; CHECK-NEXT: sub sp, sp, #16
12+
; CHECK-NEXT: addvl sp, sp, #-1
13+
; CHECK-NEXT: .cfi_def_cfa w29, 32
14+
; CHECK-NEXT: .cfi_offset w19, -8
15+
; CHECK-NEXT: .cfi_offset w28, -16
16+
; CHECK-NEXT: .cfi_offset w30, -24
17+
; CHECK-NEXT: .cfi_offset w29, -32
18+
; CHECK-NEXT: .cfi_offset b11, -48
19+
; CHECK-NEXT: //APP
20+
; CHECK-NEXT: //NO_APP
21+
; CHECK-NEXT: // %bb.1: // %asm.fallthrough
22+
; CHECK-NEXT: .LBB0_2: // Inline asm indirect target
23+
; CHECK-NEXT: // %ah.preheader.preheader
24+
; CHECK-NEXT: // Label of block must be emitted
25+
; CHECK-NEXT: mov x8, #-35417 // =0xffffffffffff75a7
26+
; CHECK-NEXT: mov x9, #35417 // =0x8a59
27+
; CHECK-NEXT: mov w19, #1 // =0x1
28+
; CHECK-NEXT: movk x8, #29436, lsl #16
29+
; CHECK-NEXT: movk x9, #36099, lsl #16
30+
; CHECK-NEXT: stp x1, x0, [sp] // 16-byte Folded Spill
31+
; CHECK-NEXT: movk x8, #64591, lsl #32
32+
; CHECK-NEXT: movk x9, #944, lsl #32
33+
; CHECK-NEXT: index z0.d, x9, x8
34+
; CHECK-NEXT: sub x8, x29, #16
35+
; CHECK-NEXT: str z0, [x8, #-1, mul vl] // 16-byte Folded Spill
36+
; CHECK-NEXT: .LBB0_3: // Inline asm indirect target
37+
; CHECK-NEXT: // %ah
38+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
39+
; CHECK-NEXT: // Label of block must be emitted
40+
; CHECK-NEXT: sub x9, x29, #16
41+
; CHECK-NEXT: ldr x8, [sp, #8] // 8-byte Folded Reload
42+
; CHECK-NEXT: ldr z0, [x9, #-1, mul vl] // 16-byte Folded Reload
43+
; CHECK-NEXT: str d0, [x8]
44+
; CHECK-NEXT: movi v0.2d, #0000000000000000
45+
; CHECK-NEXT: sub x8, x29, #16
46+
; CHECK-NEXT: str z0, [x8, #-1, mul vl] // 16-byte Folded Spill
47+
; CHECK-NEXT: //APP
48+
; CHECK-NEXT: //NO_APP
49+
; CHECK-NEXT: //APP
50+
; CHECK-NEXT: //NO_APP
51+
; CHECK-NEXT: // %bb.4: // %asm.fallthrough2
52+
; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1
53+
; CHECK-NEXT: bl g
54+
; CHECK-NEXT: add x8, sp, #28
55+
; CHECK-NEXT: addvl x8, x8, #1
56+
; CHECK-NEXT: cmp x8, #0
57+
; CHECK-NEXT: ldp x10, x8, [sp] // 16-byte Folded Reload
58+
; CHECK-NEXT: cset w9, ne
59+
; CHECK-NEXT: strb w19, [x10]
60+
; CHECK-NEXT: str w9, [x8]
61+
; CHECK-NEXT: //APP
62+
; CHECK-NEXT: //NO_APP
63+
; CHECK-NEXT: b .LBB0_3
64+
entry:
65+
%aj = alloca i32, align 4
66+
callbr void asm sideeffect "", "!i,!i"()
67+
to label %asm.fallthrough [label %ah.preheader.preheader, label %ah.preheader.preheader]
68+
69+
ah.preheader.preheader: ; preds = %entry, %entry
70+
%conv = xor i8 0, 1
71+
br label %ah
72+
73+
asm.fallthrough: ; preds = %entry
74+
unreachable
75+
76+
ah: ; preds = %asm.fallthrough2, %asm.fallthrough2, %ah, %ah.preheader.preheader
77+
%af.2 = phi <8 x i64> [ zeroinitializer, %asm.fallthrough2 ], [ <i64 4056814946905, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, %ah.preheader.preheader ], [ zeroinitializer, %asm.fallthrough2 ], [ zeroinitializer, %ah ]
78+
%vecext = extractelement <8 x i64> %af.2, i64 0
79+
store i64 %vecext, ptr %ad, align 8
80+
call void asm sideeffect "", "~{v11}"()
81+
callbr void asm sideeffect "", "!i"()
82+
to label %asm.fallthrough2 [label %ah]
83+
84+
asm.fallthrough2: ; preds = %ah
85+
%call = call i32 @g()
86+
store i8 %conv, ptr %0, align 1
87+
%cmp = icmp ne ptr %aj, null
88+
%conv3 = zext i1 %cmp to i32
89+
store i32 %conv3, ptr %ad, align 4
90+
callbr void asm sideeffect "", "!i"()
91+
to label %ah [label %ah]
92+
}
93+
94+
declare i32 @g(...)
95+
96+
attributes #0 = { "frame-pointer"="non-leaf" "target-features"="+sve" }

0 commit comments

Comments
 (0)