Skip to content

Commit 7bc55e9

Browse files
committed
Use 2 byte loads for FP16 arguments
1 parent c3f8eb0 commit 7bc55e9

File tree

2 files changed

+12
-15
lines changed

2 files changed

+12
-15
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4765,17 +4765,18 @@ SDValue ARMTargetLowering::LowerFormalArguments(
47654765
// a 4 byte stack slot. This is done as-if the extension was done
47664766
// in a 32-bit register, so the actual bytes used for the value
47674767
// differ between little and big endian.
4768+
assert(VA.getLocVT().getSizeInBits() == 32);
47684769
unsigned FIOffset = VA.getLocMemOffset();
47694770
int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits() / 8,
47704771
FIOffset, true);
47714772

4772-
// Create load nodes to retrieve arguments from the stack.
4773-
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4774-
SDValue Load = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
4775-
MachinePointerInfo::getFixedStack(
4776-
DAG.getMachineFunction(), FI));
4777-
InVals.push_back(
4778-
MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Load));
4773+
SDValue Addr = DAG.getFrameIndex(FI, PtrVT);
4774+
if (DAG.getDataLayout().isBigEndian())
4775+
Addr = DAG.getObjectPtrOffset(dl, Addr, TypeSize::getFixed(2));
4776+
4777+
InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, Addr,
4778+
MachinePointerInfo::getFixedStack(
4779+
DAG.getMachineFunction(), FI)));
47794780

47804781
} else {
47814782
unsigned FIOffset = VA.getLocMemOffset();

llvm/test/CodeGen/Thumb2/fp16-pcs.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -78,15 +78,13 @@ define arm_aapcscc half @callee_soft_half_on_stack(float %r0, float %r1, float %
7878
;
7979
; LE-FP16-LABEL: callee_soft_half_on_stack:
8080
; LE-FP16: @ %bb.0: @ %entry
81-
; LE-FP16-NEXT: ldr r0, [sp]
82-
; LE-FP16-NEXT: vmov.f16 s0, r0
81+
; LE-FP16-NEXT: vldr.16 s0, [sp]
8382
; LE-FP16-NEXT: vmov r0, s0
8483
; LE-FP16-NEXT: bx lr
8584
;
8685
; BE-FP16-LABEL: callee_soft_half_on_stack:
8786
; BE-FP16: @ %bb.0: @ %entry
88-
; BE-FP16-NEXT: ldr r0, [sp]
89-
; BE-FP16-NEXT: vmov.f16 s0, r0
87+
; BE-FP16-NEXT: vldr.16 s0, [sp, #2]
9088
; BE-FP16-NEXT: vmov r0, s0
9189
; BE-FP16-NEXT: bx lr
9290
entry:
@@ -224,14 +222,12 @@ define arm_aapcs_vfpcc half @callee_hard_half_on_stack(float %s0, float %s1, flo
224222
;
225223
; LE-FP16-LABEL: callee_hard_half_on_stack:
226224
; LE-FP16: @ %bb.0: @ %entry
227-
; LE-FP16-NEXT: ldr r0, [sp]
228-
; LE-FP16-NEXT: vmov.f16 s0, r0
225+
; LE-FP16-NEXT: vldr.16 s0, [sp]
229226
; LE-FP16-NEXT: bx lr
230227
;
231228
; BE-FP16-LABEL: callee_hard_half_on_stack:
232229
; BE-FP16: @ %bb.0: @ %entry
233-
; BE-FP16-NEXT: ldr r0, [sp]
234-
; BE-FP16-NEXT: vmov.f16 s0, r0
230+
; BE-FP16-NEXT: vldr.16 s0, [sp, #2]
235231
; BE-FP16-NEXT: bx lr
236232
entry:
237233
ret half %f

0 commit comments

Comments
 (0)