Skip to content

Commit 0c0505d

Browse files
committed
[AArch64] Fix post-inc stores of floating-point conversions
The commit at #147707 introduced a bug because of missing patterns for post-inc stores where the input is a vector_extract with i64 types. Additionally, remove the early pre-legalization early-exit as it can miss its opportunity to apply the optimization.
1 parent a194d51 commit 0c0505d

File tree

4 files changed

+140
-5
lines changed

4 files changed

+140
-5
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24135,9 +24135,6 @@ static SDValue combineStoreValueFPToInt(StoreSDNode *ST,
2413524135
TargetLowering::DAGCombinerInfo &DCI,
2413624136
SelectionDAG &DAG,
2413724137
const AArch64Subtarget *Subtarget) {
24138-
// Limit to post-legalization in order to avoid peeling truncating stores.
24139-
if (DCI.isBeforeLegalize())
24140-
return SDValue();
2414124138
if (!Subtarget->isNeonAvailable())
2414224139
return SDValue();
2414324140
// Source operand is already a vector.
@@ -24174,6 +24171,13 @@ static SDValue combineStoreValueFPToInt(StoreSDNode *ST,
2417424171
SDValue VecFP = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecSrcVT, FPSrc);
2417524172
SDValue VecConv = DAG.getNode(Value.getOpcode(), DL, VecDstVT, VecFP);
2417624173

24174+
if (ST->isTruncatingStore()) {
24175+
EVT NewVecDstVT = EVT::getVectorVT(
24176+
*DAG.getContext(), ST->getMemoryVT(),
24177+
VecDstVT.getFixedSizeInBits() / ST->getMemoryVT().getFixedSizeInBits());
24178+
VecConv = DAG.getNode(AArch64ISD::NVCAST, DL, NewVecDstVT, VecConv);
24179+
}
24180+
2417724181
SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2417824182
SDValue Extracted =
2417924183
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecConv, Zero);

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9273,8 +9273,12 @@ multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex,
92739273

92749274
defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST,
92759275
1>;
9276+
defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i64, ST1i8_POST,
9277+
1>;
92769278
defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST,
92779279
2>;
9280+
defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i64, ST1i16_POST,
9281+
2>;
92789282
defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>;
92799283
defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>;
92809284
defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>;

llvm/test/CodeGen/AArch64/store-float-conversion.ll

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,20 @@ entry:
2727
ret void
2828
}
2929

30+
define ptr @f32_to_s8_inc(float %f, ptr %dst) {
31+
; CHECK-LABEL: f32_to_s8_inc:
32+
; CHECK: // %bb.0: // %entry
33+
; CHECK-NEXT: fcvtzs s0, s0
34+
; CHECK-NEXT: st1 { v0.b }[0], [x0], #1
35+
; CHECK-NEXT: ret
36+
entry:
37+
%conv = fptosi float %f to i32
38+
%trunc = trunc i32 %conv to i8
39+
%next = getelementptr i8, ptr %dst, i64 1
40+
store i8 %trunc, ptr %dst
41+
ret ptr %next
42+
}
43+
3044
define void @f32_to_u16(float %f, ptr %dst) {
3145
; CHECK-LABEL: f32_to_u16:
3246
; CHECK: // %bb.0: // %entry
@@ -53,6 +67,20 @@ entry:
5367
ret void
5468
}
5569

70+
define ptr @f32_to_s16_inc(float %f, ptr %dst) {
71+
; CHECK-LABEL: f32_to_s16_inc:
72+
; CHECK: // %bb.0: // %entry
73+
; CHECK-NEXT: fcvtzs s0, s0
74+
; CHECK-NEXT: st1 { v0.h }[0], [x0], #2
75+
; CHECK-NEXT: ret
76+
entry:
77+
%conv = fptosi float %f to i32
78+
%trunc = trunc i32 %conv to i16
79+
%next = getelementptr i16, ptr %dst, i64 1
80+
store i16 %trunc, ptr %dst
81+
ret ptr %next
82+
}
83+
5684
define void @f32_to_u32(float %f, ptr %dst) {
5785
; CHECK-LABEL: f32_to_u32:
5886
; CHECK: // %bb.0: // %entry
@@ -77,6 +105,19 @@ entry:
77105
ret void
78106
}
79107

108+
define ptr @f32_to_s32_inc(float %f, ptr %dst) {
109+
; CHECK-LABEL: f32_to_s32_inc:
110+
; CHECK: // %bb.0: // %entry
111+
; CHECK-NEXT: fcvtzs s0, s0
112+
; CHECK-NEXT: st1 { v0.s }[0], [x0], #4
113+
; CHECK-NEXT: ret
114+
entry:
115+
%conv = fptosi float %f to i32
116+
%next = getelementptr i32, ptr %dst, i64 1
117+
store i32 %conv, ptr %dst
118+
ret ptr %next
119+
}
120+
80121
define void @f32_to_s64(float %f, ptr %dst) {
81122
; CHECK-LABEL: f32_to_s64:
82123
; CHECK: // %bb.0: // %entry
@@ -115,6 +156,93 @@ entry:
115156
ret void
116157
}
117158

159+
define ptr @f64_to_s64_inc(double %d, ptr %dst) {
160+
; CHECK-LABEL: f64_to_s64_inc:
161+
; CHECK: // %bb.0: // %entry
162+
; CHECK-NEXT: fcvtzs d0, d0
163+
; CHECK-NEXT: st1 { v0.d }[0], [x0], #8
164+
; CHECK-NEXT: ret
165+
entry:
166+
%conv = fptosi double %d to i64
167+
%next = getelementptr i64, ptr %dst, i64 1
168+
store i64 %conv, ptr %dst
169+
ret ptr %next
170+
}
171+
172+
define void @f64_to_u8(double %d, ptr %dst) {
173+
; CHECK-LABEL: f64_to_u8:
174+
; CHECK: // %bb.0:
175+
; CHECK-NEXT: fcvtzu d0, d0
176+
; CHECK-NEXT: str b0, [x0]
177+
; CHECK-NEXT: ret
178+
%conv = fptoui double %d to i64
179+
%trunc = trunc i64 %conv to i8
180+
store i8 %trunc, ptr %dst
181+
ret void
182+
}
183+
184+
define void @f64_to_s8(double %d, ptr %dst) {
185+
; CHECK-LABEL: f64_to_s8:
186+
; CHECK: // %bb.0:
187+
; CHECK-NEXT: fcvtzs d0, d0
188+
; CHECK-NEXT: str b0, [x0]
189+
; CHECK-NEXT: ret
190+
%conv = fptosi double %d to i64
191+
%trunc = trunc i64 %conv to i8
192+
store i8 %trunc, ptr %dst
193+
ret void
194+
}
195+
196+
define ptr @f64_to_s8_inc(double %d, ptr %dst) {
197+
; CHECK-LABEL: f64_to_s8_inc:
198+
; CHECK: // %bb.0:
199+
; CHECK-NEXT: fcvtzs d0, d0
200+
; CHECK-NEXT: st1 { v0.b }[0], [x0], #1
201+
; CHECK-NEXT: ret
202+
%conv = fptosi double %d to i64
203+
%trunc = trunc i64 %conv to i8
204+
store i8 %trunc, ptr %dst
205+
%next = getelementptr i8, ptr %dst, i64 1
206+
ret ptr %next
207+
}
208+
209+
define void @f64_to_u16(double %d, ptr %dst) {
210+
; CHECK-LABEL: f64_to_u16:
211+
; CHECK: // %bb.0:
212+
; CHECK-NEXT: fcvtzu d0, d0
213+
; CHECK-NEXT: str h0, [x0]
214+
; CHECK-NEXT: ret
215+
%conv = fptoui double %d to i64
216+
%trunc = trunc i64 %conv to i16
217+
store i16 %trunc, ptr %dst
218+
ret void
219+
}
220+
221+
define void @f64_to_s16(double %d, ptr %dst) {
222+
; CHECK-LABEL: f64_to_s16:
223+
; CHECK: // %bb.0:
224+
; CHECK-NEXT: fcvtzs d0, d0
225+
; CHECK-NEXT: str h0, [x0]
226+
; CHECK-NEXT: ret
227+
%conv = fptosi double %d to i64
228+
%trunc = trunc i64 %conv to i16
229+
store i16 %trunc, ptr %dst
230+
ret void
231+
}
232+
233+
define ptr @f64_to_s16_inc(double %d, ptr %dst) {
234+
; CHECK-LABEL: f64_to_s16_inc:
235+
; CHECK: // %bb.0:
236+
; CHECK-NEXT: fcvtzs d0, d0
237+
; CHECK-NEXT: st1 { v0.h }[0], [x0], #2
238+
; CHECK-NEXT: ret
239+
%conv = fptosi double %d to i64
240+
%trunc = trunc i64 %conv to i16
241+
%next = getelementptr i16, ptr %dst, i64 1
242+
store i16 %trunc, ptr %dst
243+
ret ptr %next
244+
}
245+
118246
define i32 @f32_to_i32_multiple_uses(float %f, ptr %dst) {
119247
; CHECK-LABEL: f32_to_i32_multiple_uses:
120248
; CHECK: // %bb.0: // %entry

llvm/test/CodeGen/AArch64/tbl-loops.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,7 @@ define void @loop1(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n
6464
; CHECK-NEXT: fcsel s2, s0, s3, mi
6565
; CHECK-NEXT: subs w10, w10, #1
6666
; CHECK-NEXT: fcvtzs s2, s2
67-
; CHECK-NEXT: fmov w11, s2
68-
; CHECK-NEXT: strb w11, [x9], #1
67+
; CHECK-NEXT: st1 { v2.b }[0], [x9], #1
6968
; CHECK-NEXT: b.ne .LBB0_7
7069
; CHECK-NEXT: .LBB0_8: // %for.cond.cleanup
7170
; CHECK-NEXT: ret

0 commit comments

Comments
 (0)