Skip to content

Commit 4bb5e48

Browse files
authored
[LoongArch] Add codegen support for ILP32D calling convention (#141539)
This patch adds codegen support for the calling convention defined by the ILP32D ABI, which passes `f64` values using a soft-float mechanism. Similar to RISC-V, it introduces pseudo-instructions to construct an `f64` value from a pair of `i32`s, and to split an `f64` into two `i32` values.
1 parent d7b936b commit 4bb5e48

File tree

8 files changed

+518
-84
lines changed

8 files changed

+518
-84
lines changed

llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,3 +319,19 @@ def : Pat<(bitconvert FPR64:$src), (MOVFR2GR_D FPR64:$src)>;
319319
let Predicates = [HasBasicD, IsLA64] in {
320320
def : PatFpr<frint, FRINT_D, FPR64>;
321321
} // Predicates = [HasBasicD, IsLA64]
322+
323+
/// Pseudo-instructions needed for the soft-float ABI with LA32D
324+
325+
let Predicates = [HasBasicD, IsLA32] in {
326+
// Moves two GPRs to an FPR.
327+
let usesCustomInserter = 1 in
328+
def BuildPairF64Pseudo
329+
: Pseudo<(outs FPR64:$dst), (ins GPR:$src1, GPR:$src2),
330+
[(set FPR64:$dst, (loongarch_build_pair_f64 GPR:$src1, GPR:$src2))]>;
331+
332+
// Moves an FPR to two GPRs.
333+
let usesCustomInserter = 1 in
334+
def SplitPairF64Pseudo
335+
: Pseudo<(outs GPR:$dst1, GPR:$dst2), (ins FPR64:$src),
336+
[(set GPR:$dst1, GPR:$dst2, (loongarch_split_pair_f64 FPR64:$src))]>;
337+
} // Predicates = [HasBasicD, IsLA32]

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 268 additions & 41 deletions
Large diffs are not rendered by default.

llvm/lib/Target/LoongArch/LoongArchISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,10 @@ enum NodeType : unsigned {
6060

6161
FTINT,
6262

63+
// Build and split F64 pair
64+
BUILD_PAIR_F64,
65+
SPLIT_PAIR_F64,
66+
6367
// Bit counting operations
6468
CLZ_W,
6569
CTZ_W,

llvm/lib/Target/LoongArch/LoongArchInstrInfo.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,13 @@ def SDT_LoongArchMovgr2fcsr : SDTypeProfile<0, 2, [SDTCisVT<0, GRLenVT>,
5858
def SDT_LoongArchMovfcsr2gr : SDTypeProfile<1, 1, [SDTCisVT<0, GRLenVT>,
5959
SDTCisSameAs<0, 1>]>;
6060

61+
def SDT_LoongArchBuildPairF64 : SDTypeProfile<1, 2, [SDTCisVT<0, f64>,
62+
SDTCisVT<1, i32>,
63+
SDTCisSameAs<1, 2>]>;
64+
def SDT_LoongArchSplitPairF64 : SDTypeProfile<2, 1, [SDTCisVT<0, i32>,
65+
SDTCisVT<1, i32>,
66+
SDTCisVT<2, f64>]>;
67+
6168
// TODO: Add LoongArch specific DAG Nodes
6269
// Target-independent nodes, but with target-specific formats.
6370
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart,
@@ -165,6 +172,11 @@ def loongarch_iocsrwr_d : SDNode<"LoongArchISD::IOCSRWR_D",
165172
def loongarch_cpucfg : SDNode<"LoongArchISD::CPUCFG", SDTUnaryOp,
166173
[SDNPHasChain]>;
167174

175+
def loongarch_build_pair_f64 : SDNode<"LoongArchISD::BUILD_PAIR_F64",
176+
SDT_LoongArchBuildPairF64>;
177+
def loongarch_split_pair_f64 : SDNode<"LoongArchISD::SPLIT_PAIR_F64",
178+
SDT_LoongArchSplitPairF64>;
179+
168180
def to_fclass_mask: SDNodeXForm<timm, [{
169181
uint64_t Check = N->getZExtValue();
170182
unsigned Mask = 0;
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc --mtriple=loongarch32 --mattr=+d --target-abi=ilp32d < %s \
3+
; RUN: | FileCheck %s
4+
5+
;; This file contains specific tests for the ilp32d ABI.
6+
7+
;; Check pass floating-point arguments whith FPRs.
8+
9+
define i32 @callee_float_in_fpr(i32 %a, float %b, double %c) nounwind {
10+
; CHECK-LABEL: callee_float_in_fpr:
11+
; CHECK: # %bb.0:
12+
; CHECK-NEXT: ftintrz.w.s $fa0, $fa0
13+
; CHECK-NEXT: movfr2gr.s $a1, $fa0
14+
; CHECK-NEXT: ftintrz.w.d $fa0, $fa1
15+
; CHECK-NEXT: movfr2gr.s $a2, $fa0
16+
; CHECK-NEXT: add.w $a0, $a0, $a1
17+
; CHECK-NEXT: add.w $a0, $a0, $a2
18+
; CHECK-NEXT: ret
19+
%b_fptosi = fptosi float %b to i32
20+
%c_fptosi = fptosi double %c to i32
21+
%1 = add i32 %a, %b_fptosi
22+
%2 = add i32 %1, %c_fptosi
23+
ret i32 %2
24+
}
25+
26+
define i32 @caller_float_in_fpr() nounwind {
27+
; CHECK-LABEL: caller_float_in_fpr:
28+
; CHECK: # %bb.0:
29+
; CHECK-NEXT: addi.w $sp, $sp, -16
30+
; CHECK-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
31+
; CHECK-NEXT: movgr2fr.w $fa1, $zero
32+
; CHECK-NEXT: movgr2frh.w $fa1, $zero
33+
; CHECK-NEXT: movgr2fr.w $fa0, $zero
34+
; CHECK-NEXT: ori $a0, $zero, 1
35+
; CHECK-NEXT: bl callee_float_in_fpr
36+
; CHECK-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
37+
; CHECK-NEXT: addi.w $sp, $sp, 16
38+
; CHECK-NEXT: ret
39+
%1 = call i32 @callee_float_in_fpr(i32 1, float 0.0, double 0.0)
40+
ret i32 %1
41+
}
42+
43+
;; Check that the GPR is used once the FPRs are exhausted.
44+
45+
;; Must keep define on a single line due to an update_llc_test_checks.py limitation.
46+
define i32 @callee_double_in_gpr_exhausted_fprs(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i) nounwind {
47+
; CHECK-LABEL: callee_double_in_gpr_exhausted_fprs:
48+
; CHECK: # %bb.0:
49+
; CHECK-NEXT: movgr2fr.w $fa0, $a0
50+
; CHECK-NEXT: movgr2frh.w $fa0, $a1
51+
; CHECK-NEXT: ftintrz.w.d $fa1, $fa7
52+
; CHECK-NEXT: movfr2gr.s $a0, $fa1
53+
; CHECK-NEXT: ftintrz.w.d $fa0, $fa0
54+
; CHECK-NEXT: movfr2gr.s $a1, $fa0
55+
; CHECK-NEXT: add.w $a0, $a0, $a1
56+
; CHECK-NEXT: ret
57+
%h_fptosi = fptosi double %h to i32
58+
%i_fptosi = fptosi double %i to i32
59+
%1 = add i32 %h_fptosi, %i_fptosi
60+
ret i32 %1
61+
}
62+
63+
define i32 @caller_double_in_gpr_exhausted_fprs() nounwind {
64+
; CHECK-LABEL: caller_double_in_gpr_exhausted_fprs:
65+
; CHECK: # %bb.0:
66+
; CHECK-NEXT: addi.w $sp, $sp, -16
67+
; CHECK-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
68+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
69+
; CHECK-NEXT: fld.d $fa1, $a0, %pc_lo12(.LCPI3_0)
70+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_1)
71+
; CHECK-NEXT: fld.d $fa2, $a0, %pc_lo12(.LCPI3_1)
72+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_2)
73+
; CHECK-NEXT: fld.d $fa3, $a0, %pc_lo12(.LCPI3_2)
74+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_3)
75+
; CHECK-NEXT: fld.d $fa4, $a0, %pc_lo12(.LCPI3_3)
76+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_4)
77+
; CHECK-NEXT: fld.d $fa5, $a0, %pc_lo12(.LCPI3_4)
78+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_5)
79+
; CHECK-NEXT: fld.d $fa6, $a0, %pc_lo12(.LCPI3_5)
80+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_6)
81+
; CHECK-NEXT: fld.d $fa7, $a0, %pc_lo12(.LCPI3_6)
82+
; CHECK-NEXT: addi.w $a0, $zero, 1
83+
; CHECK-NEXT: movgr2fr.w $fa0, $a0
84+
; CHECK-NEXT: ffint.s.w $fa0, $fa0
85+
; CHECK-NEXT: fcvt.d.s $fa0, $fa0
86+
; CHECK-NEXT: lu12i.w $a1, 262688
87+
; CHECK-NEXT: move $a0, $zero
88+
; CHECK-NEXT: bl callee_double_in_gpr_exhausted_fprs
89+
; CHECK-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
90+
; CHECK-NEXT: addi.w $sp, $sp, 16
91+
; CHECK-NEXT: ret
92+
%1 = call i32 @callee_double_in_gpr_exhausted_fprs(
93+
double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0,
94+
double 7.0, double 8.0, double 9.0)
95+
ret i32 %1
96+
}
97+
98+
;; Check that the stack is used once the FPRs and GPRs are both exhausted.
99+
100+
;; Must keep define on a single line due to an update_llc_test_checks.py limitation.
101+
define i32 @callee_double_on_stack_exhausted_fprs_gprs(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, double %j, double %k, double %l, double %m, double %n) nounwind {
102+
; CHECK-LABEL: callee_double_on_stack_exhausted_fprs_gprs:
103+
; CHECK: # %bb.0:
104+
; CHECK-NEXT: fld.d $fa0, $sp, 0
105+
; CHECK-NEXT: fld.d $fa1, $sp, 8
106+
; CHECK-NEXT: ftintrz.w.d $fa0, $fa0
107+
; CHECK-NEXT: movfr2gr.s $a0, $fa0
108+
; CHECK-NEXT: ftintrz.w.d $fa0, $fa1
109+
; CHECK-NEXT: movfr2gr.s $a1, $fa0
110+
; CHECK-NEXT: add.w $a0, $a0, $a1
111+
; CHECK-NEXT: ret
112+
%m_fptosi = fptosi double %m to i32
113+
%n_fptosi = fptosi double %n to i32
114+
%1 = add i32 %m_fptosi, %n_fptosi
115+
ret i32 %1
116+
}
117+
118+
define i32 @caller_double_on_stack_exhausted_fprs_gprs() nounwind {
119+
; CHECK-LABEL: caller_double_on_stack_exhausted_fprs_gprs:
120+
; CHECK: # %bb.0:
121+
; CHECK-NEXT: addi.w $sp, $sp, -32
122+
; CHECK-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
123+
; CHECK-NEXT: lu12i.w $a0, 262816
124+
; CHECK-NEXT: st.w $a0, $sp, 4
125+
; CHECK-NEXT: st.w $zero, $sp, 0
126+
; CHECK-NEXT: lu12i.w $a0, 262848
127+
; CHECK-NEXT: st.w $a0, $sp, 12
128+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
129+
; CHECK-NEXT: fld.d $fa1, $a0, %pc_lo12(.LCPI5_0)
130+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_1)
131+
; CHECK-NEXT: fld.d $fa2, $a0, %pc_lo12(.LCPI5_1)
132+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_2)
133+
; CHECK-NEXT: fld.d $fa3, $a0, %pc_lo12(.LCPI5_2)
134+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_3)
135+
; CHECK-NEXT: fld.d $fa4, $a0, %pc_lo12(.LCPI5_3)
136+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_4)
137+
; CHECK-NEXT: fld.d $fa5, $a0, %pc_lo12(.LCPI5_4)
138+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_5)
139+
; CHECK-NEXT: fld.d $fa6, $a0, %pc_lo12(.LCPI5_5)
140+
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_6)
141+
; CHECK-NEXT: fld.d $fa7, $a0, %pc_lo12(.LCPI5_6)
142+
; CHECK-NEXT: addi.w $a0, $zero, 1
143+
; CHECK-NEXT: movgr2fr.w $fa0, $a0
144+
; CHECK-NEXT: ffint.s.w $fa0, $fa0
145+
; CHECK-NEXT: fcvt.d.s $fa0, $fa0
146+
; CHECK-NEXT: lu12i.w $a1, 262688
147+
; CHECK-NEXT: lu12i.w $a3, 262720
148+
; CHECK-NEXT: lu12i.w $a5, 262752
149+
; CHECK-NEXT: lu12i.w $a7, 262784
150+
; CHECK-NEXT: st.w $zero, $sp, 8
151+
; CHECK-NEXT: move $a0, $zero
152+
; CHECK-NEXT: move $a2, $zero
153+
; CHECK-NEXT: move $a4, $zero
154+
; CHECK-NEXT: move $a6, $zero
155+
; CHECK-NEXT: bl callee_double_on_stack_exhausted_fprs_gprs
156+
; CHECK-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
157+
; CHECK-NEXT: addi.w $sp, $sp, 32
158+
; CHECK-NEXT: ret
159+
%1 = call i32 @callee_double_on_stack_exhausted_fprs_gprs(
160+
double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0,
161+
double 7.0, double 8.0, double 9.0, double 10.0, double 11.0, double 12.0,
162+
double 13.0, double 14.0)
163+
ret i32 %1
164+
}
165+
166+
;; Check returning doubles.
167+
168+
define double @callee_double_ret() nounwind {
169+
; CHECK-LABEL: callee_double_ret:
170+
; CHECK: # %bb.0:
171+
; CHECK-NEXT: addi.w $a0, $zero, 1
172+
; CHECK-NEXT: movgr2fr.w $fa0, $a0
173+
; CHECK-NEXT: ffint.s.w $fa0, $fa0
174+
; CHECK-NEXT: fcvt.d.s $fa0, $fa0
175+
; CHECK-NEXT: ret
176+
ret double 1.0
177+
}
178+
179+
define i64 @caller_double_ret() nounwind {
180+
; CHECK-LABEL: caller_double_ret:
181+
; CHECK: # %bb.0:
182+
; CHECK-NEXT: addi.w $sp, $sp, -16
183+
; CHECK-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
184+
; CHECK-NEXT: bl callee_double_ret
185+
; CHECK-NEXT: movfr2gr.s $a0, $fa0
186+
; CHECK-NEXT: movfrh2gr.s $a1, $fa0
187+
; CHECK-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
188+
; CHECK-NEXT: addi.w $sp, $sp, 16
189+
; CHECK-NEXT: ret
190+
%1 = call double @callee_double_ret()
191+
%2 = bitcast double %1 to i64
192+
ret i64 %2
193+
}

llvm/test/CodeGen/LoongArch/inline-asm-constraint-f.ll

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,18 +32,14 @@ define double @constraint_f_double(double %a) nounwind {
3232
define double @constraint_gpr(double %a) {
3333
; LA32-LABEL: constraint_gpr:
3434
; LA32: # %bb.0:
35-
; LA32-NEXT: addi.w $sp, $sp, -16
36-
; LA32-NEXT: .cfi_def_cfa_offset 16
37-
; LA32-NEXT: fst.d $fa0, $sp, 8
38-
; LA32-NEXT: ld.w $a7, $sp, 8
39-
; LA32-NEXT: ld.w $t0, $sp, 12
35+
; LA32-NEXT: .cfi_def_cfa_offset 0
36+
; LA32-NEXT: movfr2gr.s $a7, $fa0
37+
; LA32-NEXT: movfrh2gr.s $t0, $fa0
4038
; LA32-NEXT: #APP
4139
; LA32-NEXT: move $a6, $a7
4240
; LA32-NEXT: #NO_APP
43-
; LA32-NEXT: st.w $a7, $sp, 4
44-
; LA32-NEXT: st.w $a6, $sp, 0
45-
; LA32-NEXT: fld.d $fa0, $sp, 0
46-
; LA32-NEXT: addi.w $sp, $sp, 16
41+
; LA32-NEXT: movgr2fr.w $fa0, $a6
42+
; LA32-NEXT: movgr2frh.w $fa0, $a7
4743
; LA32-NEXT: ret
4844
;
4945
; LA64-LABEL: constraint_gpr:

llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -279,11 +279,8 @@ define double @convert_u64_to_double(i64 %a) nounwind {
279279
define double @bitcast_i64_to_double(i64 %a, i64 %b) nounwind {
280280
; LA32-LABEL: bitcast_i64_to_double:
281281
; LA32: # %bb.0:
282-
; LA32-NEXT: addi.w $sp, $sp, -16
283-
; LA32-NEXT: st.w $a1, $sp, 12
284-
; LA32-NEXT: st.w $a0, $sp, 8
285-
; LA32-NEXT: fld.d $fa0, $sp, 8
286-
; LA32-NEXT: addi.w $sp, $sp, 16
282+
; LA32-NEXT: movgr2fr.w $fa0, $a0
283+
; LA32-NEXT: movgr2frh.w $fa0, $a1
287284
; LA32-NEXT: ret
288285
;
289286
; LA64-LABEL: bitcast_i64_to_double:
@@ -297,11 +294,8 @@ define double @bitcast_i64_to_double(i64 %a, i64 %b) nounwind {
297294
define i64 @bitcast_double_to_i64(double %a) nounwind {
298295
; LA32-LABEL: bitcast_double_to_i64:
299296
; LA32: # %bb.0:
300-
; LA32-NEXT: addi.w $sp, $sp, -16
301-
; LA32-NEXT: fst.d $fa0, $sp, 8
302-
; LA32-NEXT: ld.w $a0, $sp, 8
303-
; LA32-NEXT: ld.w $a1, $sp, 12
304-
; LA32-NEXT: addi.w $sp, $sp, 16
297+
; LA32-NEXT: movfr2gr.s $a0, $fa0
298+
; LA32-NEXT: movfrh2gr.s $a1, $fa0
305299
; LA32-NEXT: ret
306300
;
307301
; LA64-LABEL: bitcast_double_to_i64:

llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,8 @@ define double @load_acquire_double(ptr %ptr) {
115115
; LA32-NEXT: .cfi_offset 1, -4
116116
; LA32-NEXT: ori $a1, $zero, 2
117117
; LA32-NEXT: bl __atomic_load_8
118-
; LA32-NEXT: st.w $a1, $sp, 4
119-
; LA32-NEXT: st.w $a0, $sp, 0
120-
; LA32-NEXT: fld.d $fa0, $sp, 0
118+
; LA32-NEXT: movgr2fr.w $fa0, $a0
119+
; LA32-NEXT: movgr2frh.w $fa0, $a1
121120
; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
122121
; LA32-NEXT: addi.w $sp, $sp, 16
123122
; LA32-NEXT: ret
@@ -234,9 +233,8 @@ define double @load_unordered_double(ptr %ptr) {
234233
; LA32-NEXT: .cfi_offset 1, -4
235234
; LA32-NEXT: move $a1, $zero
236235
; LA32-NEXT: bl __atomic_load_8
237-
; LA32-NEXT: st.w $a1, $sp, 4
238-
; LA32-NEXT: st.w $a0, $sp, 0
239-
; LA32-NEXT: fld.d $fa0, $sp, 0
236+
; LA32-NEXT: movgr2fr.w $fa0, $a0
237+
; LA32-NEXT: movgr2frh.w $fa0, $a1
240238
; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
241239
; LA32-NEXT: addi.w $sp, $sp, 16
242240
; LA32-NEXT: ret
@@ -352,9 +350,8 @@ define double @load_monotonic_double(ptr %ptr) {
352350
; LA32-NEXT: .cfi_offset 1, -4
353351
; LA32-NEXT: move $a1, $zero
354352
; LA32-NEXT: bl __atomic_load_8
355-
; LA32-NEXT: st.w $a1, $sp, 4
356-
; LA32-NEXT: st.w $a0, $sp, 0
357-
; LA32-NEXT: fld.d $fa0, $sp, 0
353+
; LA32-NEXT: movgr2fr.w $fa0, $a0
354+
; LA32-NEXT: movgr2frh.w $fa0, $a1
358355
; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
359356
; LA32-NEXT: addi.w $sp, $sp, 16
360357
; LA32-NEXT: ret
@@ -481,9 +478,8 @@ define double @load_seq_cst_double(ptr %ptr) {
481478
; LA32-NEXT: .cfi_offset 1, -4
482479
; LA32-NEXT: ori $a1, $zero, 5
483480
; LA32-NEXT: bl __atomic_load_8
484-
; LA32-NEXT: st.w $a1, $sp, 4
485-
; LA32-NEXT: st.w $a0, $sp, 0
486-
; LA32-NEXT: fld.d $fa0, $sp, 0
481+
; LA32-NEXT: movgr2fr.w $fa0, $a0
482+
; LA32-NEXT: movgr2frh.w $fa0, $a1
487483
; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
488484
; LA32-NEXT: addi.w $sp, $sp, 16
489485
; LA32-NEXT: ret
@@ -605,9 +601,8 @@ define void @store_release_double(ptr %ptr, double %v) {
605601
; LA32-NEXT: .cfi_def_cfa_offset 16
606602
; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
607603
; LA32-NEXT: .cfi_offset 1, -4
608-
; LA32-NEXT: fst.d $fa0, $sp, 0
609-
; LA32-NEXT: ld.w $a1, $sp, 0
610-
; LA32-NEXT: ld.w $a2, $sp, 4
604+
; LA32-NEXT: movfr2gr.s $a1, $fa0
605+
; LA32-NEXT: movfrh2gr.s $a2, $fa0
611606
; LA32-NEXT: ori $a3, $zero, 3
612607
; LA32-NEXT: bl __atomic_store_8
613608
; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
@@ -723,9 +718,8 @@ define void @store_unordered_double(ptr %ptr, double %v) {
723718
; LA32-NEXT: .cfi_def_cfa_offset 16
724719
; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
725720
; LA32-NEXT: .cfi_offset 1, -4
726-
; LA32-NEXT: fst.d $fa0, $sp, 0
727-
; LA32-NEXT: ld.w $a1, $sp, 0
728-
; LA32-NEXT: ld.w $a2, $sp, 4
721+
; LA32-NEXT: movfr2gr.s $a1, $fa0
722+
; LA32-NEXT: movfrh2gr.s $a2, $fa0
729723
; LA32-NEXT: move $a3, $zero
730724
; LA32-NEXT: bl __atomic_store_8
731725
; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
@@ -841,9 +835,8 @@ define void @store_monotonic_double(ptr %ptr, double %v) {
841835
; LA32-NEXT: .cfi_def_cfa_offset 16
842836
; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
843837
; LA32-NEXT: .cfi_offset 1, -4
844-
; LA32-NEXT: fst.d $fa0, $sp, 0
845-
; LA32-NEXT: ld.w $a1, $sp, 0
846-
; LA32-NEXT: ld.w $a2, $sp, 4
838+
; LA32-NEXT: movfr2gr.s $a1, $fa0
839+
; LA32-NEXT: movfrh2gr.s $a2, $fa0
847840
; LA32-NEXT: move $a3, $zero
848841
; LA32-NEXT: bl __atomic_store_8
849842
; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
@@ -973,9 +966,8 @@ define void @store_seq_cst_double(ptr %ptr, double %v) {
973966
; LA32-NEXT: .cfi_def_cfa_offset 16
974967
; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
975968
; LA32-NEXT: .cfi_offset 1, -4
976-
; LA32-NEXT: fst.d $fa0, $sp, 0
977-
; LA32-NEXT: ld.w $a1, $sp, 0
978-
; LA32-NEXT: ld.w $a2, $sp, 4
969+
; LA32-NEXT: movfr2gr.s $a1, $fa0
970+
; LA32-NEXT: movfrh2gr.s $a2, $fa0
979971
; LA32-NEXT: ori $a3, $zero, 5
980972
; LA32-NEXT: bl __atomic_store_8
981973
; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload

0 commit comments

Comments
 (0)