Skip to content

Commit a280a19

Browse files
committed
[MIPS][float] Fixed SingleFloat codegen on N32/N64 targets
1 parent a45f20b commit a280a19

12 files changed

+766
-41
lines changed

llvm/lib/Target/Mips/MipsCallingConv.td

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,8 @@ def RetCC_MipsN : CallingConv<[
186186
//
187187
// f128 should only occur for the N64 ABI where long double is 128-bit. On
188188
// N32, long double is equivalent to double.
189-
CCIfType<[i64], CCIfOrigArgWasF128<CCDelegateTo<RetCC_F128>>>,
189+
CCIfSubtargetNot<"isSingleFloat()",
190+
CCIfType<[i64], CCIfOrigArgWasF128<CCDelegateTo<RetCC_F128>>>>,
190191

191192
// Aggregate returns are positioned at the lowest address in the slot for
192193
// both little and big-endian targets. When passing in registers, this
@@ -316,9 +317,10 @@ def CC_Mips_FixedArg : CallingConv<[
316317
//
317318
// f128 should only occur for the N64 ABI where long double is 128-bit. On
318319
// N32, long double is equivalent to double.
319-
CCIfType<[i64],
320-
CCIfSubtargetNot<"useSoftFloat()",
321-
CCIfOrigArgWasF128<CCBitConvertToType<f64>>>>,
320+
CCIfType<[i64],
321+
CCIfSubtargetNot<"isSingleFloat()",
322+
CCIfSubtargetNot<"useSoftFloat()",
323+
CCIfOrigArgWasF128<CCBitConvertToType<f64>>>>>,
322324

323325
CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_Mips_FastCC>>,
324326

@@ -342,8 +344,8 @@ def CC_Mips : CallingConv<[
342344
// Callee-saved register lists.
343345
//===----------------------------------------------------------------------===//
344346

345-
def CSR_SingleFloatOnly : CalleeSavedRegs<(add (sequence "F%u", 31, 20), RA, FP,
346-
(sequence "S%u", 7, 0))>;
347+
def CSR_O32_SingleFloat : CalleeSavedRegs<(add(sequence "F%u", 31, 20), RA, FP,
348+
(sequence "S%u", 7, 0))>;
347349

348350
def CSR_O32_FPXX : CalleeSavedRegs<(add (sequence "D%u", 15, 10), RA, FP,
349351
(sequence "S%u", 7, 0))> {
@@ -357,13 +359,19 @@ def CSR_O32_FP64 :
357359
CalleeSavedRegs<(add (decimate (sequence "D%u_64", 30, 20), 2), RA, FP,
358360
(sequence "S%u", 7, 0))>;
359361

360-
def CSR_N32 : CalleeSavedRegs<(add D20_64, D22_64, D24_64, D26_64, D28_64,
361-
D30_64, RA_64, FP_64, GP_64,
362-
(sequence "S%u_64", 7, 0))>;
362+
def CSR_N32 : CalleeSavedRegs<(add(decimate(sequence "D%u_64", 30, 20), 2),
363+
RA_64, FP_64, GP_64, (sequence "S%u_64", 7, 0))>;
364+
365+
def CSR_N32_SingleFloat
366+
: CalleeSavedRegs<(add(decimate(sequence "F%u", 30, 20), 2), RA_64, FP_64,
367+
GP_64, (sequence "S%u_64", 7, 0))>;
363368

364369
def CSR_N64 : CalleeSavedRegs<(add (sequence "D%u_64", 31, 24), RA_64, FP_64,
365370
GP_64, (sequence "S%u_64", 7, 0))>;
366371

372+
def CSR_N64_SingleFloat : CalleeSavedRegs<(add(sequence "F%u", 31, 24), RA_64,
373+
FP_64, GP_64, (sequence "S%u_64", 7, 0))>;
374+
367375
def CSR_Mips16RetHelper :
368376
CalleeSavedRegs<(add V0, V1, FP,
369377
(sequence "A%u", 3, 0), (sequence "S%u", 7, 0),

llvm/lib/Target/Mips/MipsISelLowering.cpp

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4265,10 +4265,16 @@ parseRegForInlineAsmConstraint(StringRef C, MVT VT) const {
42654265
return std::make_pair(0U, nullptr);
42664266

42674267
if (Prefix == "$f") { // Parse $f0-$f31.
4268-
// If the size of FP registers is 64-bit or Reg is an even number, select
4269-
// the 64-bit register class. Otherwise, select the 32-bit register class.
4270-
if (VT == MVT::Other)
4271-
VT = (Subtarget.isFP64bit() || !(Reg % 2)) ? MVT::f64 : MVT::f32;
4268+
// If the targets is single float only, always select 32-bit registers,
4269+
// otherwise if the size of FP registers is 64-bit or Reg is an even number,
4270+
// select the 64-bit register class. Otherwise, select the 32-bit register
4271+
// class.
4272+
if (VT == MVT::Other) {
4273+
if (Subtarget.isSingleFloat())
4274+
VT = MVT::f32;
4275+
else
4276+
VT = (Subtarget.isFP64bit() || !(Reg % 2)) ? MVT::f64 : MVT::f32;
4277+
}
42724278

42734279
RC = getRegClassFor(VT);
42744280

@@ -4308,10 +4314,12 @@ MipsTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
43084314
return std::make_pair(0U, &Mips::CPU16RegsRegClass);
43094315
return std::make_pair(0U, &Mips::GPR32RegClass);
43104316
}
4311-
if ((VT == MVT::i64 || (VT == MVT::f64 && Subtarget.useSoftFloat())) &&
4317+
if ((VT == MVT::i64 || (VT == MVT::f64 && Subtarget.useSoftFloat()) ||
4318+
(VT == MVT::f64 && Subtarget.isSingleFloat())) &&
43124319
!Subtarget.isGP64bit())
43134320
return std::make_pair(0U, &Mips::GPR32RegClass);
4314-
if ((VT == MVT::i64 || (VT == MVT::f64 && Subtarget.useSoftFloat())) &&
4321+
if ((VT == MVT::i64 || (VT == MVT::f64 && Subtarget.useSoftFloat()) ||
4322+
(VT == MVT::f64 && Subtarget.isSingleFloat())) &&
43154323
Subtarget.isGP64bit())
43164324
return std::make_pair(0U, &Mips::GPR64RegClass);
43174325
// This will generate an error message

llvm/lib/Target/Mips/MipsRegisterInfo.cpp

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -89,14 +89,25 @@ MipsRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
8989
: CSR_Interrupt_32_SaveList;
9090
}
9191

92-
if (Subtarget.isSingleFloat())
93-
return CSR_SingleFloatOnly_SaveList;
92+
// N64 ABI
93+
if (Subtarget.isABI_N64()) {
94+
if (Subtarget.isSingleFloat())
95+
return CSR_N64_SingleFloat_SaveList;
9496

95-
if (Subtarget.isABI_N64())
9697
return CSR_N64_SaveList;
98+
}
99+
100+
// N32 ABI
101+
if (Subtarget.isABI_N32()) {
102+
if (Subtarget.isSingleFloat())
103+
return CSR_N32_SingleFloat_SaveList;
97104

98-
if (Subtarget.isABI_N32())
99105
return CSR_N32_SaveList;
106+
}
107+
108+
// O32 ABI
109+
if (Subtarget.isSingleFloat())
110+
return CSR_O32_SingleFloat_SaveList;
100111

101112
if (Subtarget.isFP64bit())
102113
return CSR_O32_FP64_SaveList;
@@ -111,14 +122,25 @@ const uint32_t *
111122
MipsRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
112123
CallingConv::ID) const {
113124
const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
114-
if (Subtarget.isSingleFloat())
115-
return CSR_SingleFloatOnly_RegMask;
125+
// N64 ABI
126+
if (Subtarget.isABI_N64()) {
127+
if (Subtarget.isSingleFloat())
128+
return CSR_N64_SingleFloat_RegMask;
116129

117-
if (Subtarget.isABI_N64())
118130
return CSR_N64_RegMask;
131+
}
132+
133+
// N32 ABI
134+
if (Subtarget.isABI_N32()) {
135+
if (Subtarget.isSingleFloat())
136+
return CSR_N32_SingleFloat_RegMask;
119137

120-
if (Subtarget.isABI_N32())
121138
return CSR_N32_RegMask;
139+
}
140+
141+
// O32 ABI
142+
if (Subtarget.isSingleFloat())
143+
return CSR_O32_SingleFloat_RegMask;
122144

123145
if (Subtarget.isFP64bit())
124146
return CSR_O32_FP64_RegMask;

llvm/lib/Target/Mips/MipsSEISelLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "llvm/CodeGen/SelectionDAG.h"
2929
#include "llvm/CodeGen/SelectionDAGNodes.h"
3030
#include "llvm/CodeGen/TargetInstrInfo.h"
31+
#include "llvm/CodeGen/TargetLowering.h"
3132
#include "llvm/CodeGen/TargetSubtargetInfo.h"
3233
#include "llvm/CodeGen/ValueTypes.h"
3334
#include "llvm/CodeGenTypes/MachineValueType.h"
@@ -211,6 +212,16 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
211212
}
212213
}
213214

215+
// Targets with 64bits integer registers, but no 64bit floating point register
216+
// do not support conversion between them
217+
if (Subtarget.isGP64bit() && Subtarget.isSingleFloat() &&
218+
!Subtarget.useSoftFloat()) {
219+
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
220+
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
221+
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Expand);
222+
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
223+
}
224+
214225
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom);
215226
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom);
216227
setOperationAction(ISD::MULHS, MVT::i32, Custom);
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
; RUN: llc -mtriple=mips -relocation-model=static -mattr=single-float < %s \
2+
; RUN: | FileCheck --check-prefixes=ALL,SYM32,O32 %s
3+
; RUN: llc -mtriple=mipsel -relocation-model=static -mattr=single-float < %s \
4+
; RUN: | FileCheck --check-prefixes=ALL,SYM32,O32 %s
5+
6+
; RUN: llc -mtriple=mips64 -relocation-model=static -target-abi n32 -mattr=single-float < %s \
7+
; RUN: | FileCheck --check-prefixes=ALL,SYM32,N32,NEW,NEWBE %s
8+
; RUN: llc -mtriple=mips64el -relocation-model=static -target-abi n32 -mattr=single-float < %s \
9+
; RUN: | FileCheck --check-prefixes=ALL,SYM32,N32,NEW,NEWLE %s
10+
11+
; RUN: llc -mtriple=mips64 -relocation-model=static -target-abi n64 -mattr=single-float < %s \
12+
; RUN: | FileCheck --check-prefixes=ALL,SYM64,N64,NEW,NEWBE %s
13+
; RUN: llc -mtriple=mips64el -relocation-model=static -target-abi n64 -mattr=single-float < %s \
14+
; RUN: | FileCheck --check-prefixes=ALL,SYM64,N64,NEW,NEWLE %s
15+
16+
@floats = global [11 x float] zeroinitializer
17+
@doubles = global [11 x double] zeroinitializer
18+
19+
define void @double_args(double %a, ...)
20+
nounwind {
21+
entry:
22+
%0 = getelementptr [11 x double], ptr @doubles, i32 0, i32 1
23+
store volatile double %a, ptr %0
24+
25+
%ap = alloca ptr
26+
call void @llvm.va_start(ptr %ap)
27+
%b = va_arg ptr %ap, double
28+
%1 = getelementptr [11 x double], ptr @doubles, i32 0, i32 2
29+
store volatile double %b, ptr %1
30+
call void @llvm.va_end(ptr %ap)
31+
ret void
32+
}
33+
34+
; ALL-LABEL: double_args:
35+
; We won't test the way the global address is calculated in this test. This is
36+
; just to get the register number for the other checks.
37+
; SYM32-DAG: addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(doubles)
38+
; SYM64-DAG: daddiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(doubles)
39+
40+
; O32 forbids using floating point registers for the non-variable portion.
41+
; N32/N64 allow it.
42+
; O32-DAG: sw $4, 8([[R2]])
43+
; O32-DAG: sw $5, 12([[R2]])
44+
; NEW-DAG: sd $4, 8([[R2]])
45+
46+
; The varargs portion is dumped to stack
47+
; O32-DAG: sw $6, 16($sp)
48+
; O32-DAG: sw $7, 20($sp)
49+
; NEW-DAG: sd $5, 8($sp)
50+
; NEW-DAG: sd $6, 16($sp)
51+
; NEW-DAG: sd $7, 24($sp)
52+
; NEW-DAG: sd $8, 32($sp)
53+
; NEW-DAG: sd $9, 40($sp)
54+
; NEW-DAG: sd $10, 48($sp)
55+
; NEW-DAG: sd $11, 56($sp)
56+
57+
; Get the varargs pointer
58+
; O32 has 4 bytes padding, 4 bytes for the varargs pointer, and 8 bytes reserved
59+
; for arguments 1 and 2.
60+
; N32/N64 has 8 bytes for the varargs pointer, and no reserved area.
61+
; O32-DAG: addiu [[VAPTR:\$[0-9]+]], $sp, 16
62+
; O32-DAG: sw [[VAPTR]], 4($sp)
63+
; N32-DAG: addiu [[VAPTR:\$[0-9]+]], $sp, 8
64+
; N32-DAG: sw [[VAPTR]], 4($sp)
65+
; N64-DAG: daddiu [[VAPTR:\$[0-9]+]], $sp, 8
66+
; N64-DAG: sd [[VAPTR]], 0($sp)
67+
68+
; Increment the pointer then get the varargs arg
69+
; LLVM will rebind the load to the stack pointer instead of the varargs pointer
70+
; during lowering. This is fine and doesn't change the behaviour.
71+
; O32-DAG: addiu [[VAPTR]], [[VAPTR]], 8
72+
; N32-DAG: addiu [[VAPTR]], [[VAPTR]], 8
73+
; N64-DAG: daddiu [[VAPTR]], [[VAPTR]], 8
74+
; O32-DAG: lw [[R3:\$[0-9]+]], 16($sp)
75+
; O32-DAG: lw [[R4:\$[0-9]+]], 20($sp)
76+
; O32-DAG: sw [[R3]], 16([[R2]])
77+
; O32-DAG: sw [[R4]], 20([[R2]])
78+
; NEW-DAG: ld [[R3:\$[0-9]+]], 8($sp)
79+
; NEW-DAG: sd [[R3]], 16([[R2]])
80+
81+
define void @float_args(float %a, ...) nounwind {
82+
entry:
83+
%0 = getelementptr [11 x float], ptr @floats, i32 0, i32 1
84+
store volatile float %a, ptr %0
85+
86+
%ap = alloca ptr
87+
call void @llvm.va_start(ptr %ap)
88+
%b = va_arg ptr %ap, float
89+
%1 = getelementptr [11 x float], ptr @floats, i32 0, i32 2
90+
store volatile float %b, ptr %1
91+
call void @llvm.va_end(ptr %ap)
92+
ret void
93+
}
94+
95+
; ALL-LABEL: float_args:
96+
; We won't test the way the global address is calculated in this test. This is
97+
; just to get the register number for the other checks.
98+
; SYM32-DAG: addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(floats)
99+
; SYM64-DAG: daddiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(floats)
100+
101+
; The first four arguments are the same in O32/N32/N64.
102+
; The non-variable portion should be unaffected.
103+
; O32-DAG: mtc1 $4, $f0
104+
; O32-DAG: swc1 $f0, 4([[R2]])
105+
; NEW-DAG: swc1 $f12, 4([[R2]])
106+
107+
; The varargs portion is dumped to stack
108+
; O32-DAG: sw $5, 12($sp)
109+
; O32-DAG: sw $6, 16($sp)
110+
; O32-DAG: sw $7, 20($sp)
111+
; NEW-DAG: sd $5, 8($sp)
112+
; NEW-DAG: sd $6, 16($sp)
113+
; NEW-DAG: sd $7, 24($sp)
114+
; NEW-DAG: sd $8, 32($sp)
115+
; NEW-DAG: sd $9, 40($sp)
116+
; NEW-DAG: sd $10, 48($sp)
117+
; NEW-DAG: sd $11, 56($sp)
118+
119+
; Get the varargs pointer
120+
; O32 has 4 bytes padding, 4 bytes for the varargs pointer, and should have 8
121+
; bytes reserved for arguments 1 and 2 (the first float arg) but as discussed in
122+
; arguments-float.ll, GCC doesn't agree with MD00305 and treats floats as 4
123+
; bytes so we only have 12 bytes total.
124+
; N32/N64 has 8 bytes for the varargs pointer, and no reserved area.
125+
; O32-DAG: addiu [[VAPTR:\$[0-9]+]], $sp, 12
126+
; O32-DAG: sw [[VAPTR]], 4($sp)
127+
; N32-DAG: addiu [[VAPTR:\$[0-9]+]], $sp, 8
128+
; N32-DAG: sw [[VAPTR]], 4($sp)
129+
; N64-DAG: daddiu [[VAPTR:\$[0-9]+]], $sp, 8
130+
; N64-DAG: sd [[VAPTR]], 0($sp)
131+
132+
; Increment the pointer then get the varargs arg
133+
; LLVM will rebind the load to the stack pointer instead of the varargs pointer
134+
; during lowering. This is fine and doesn't change the behaviour.
135+
; Also, in big-endian mode the offset must be increased by 4 to retrieve the
136+
; correct half of the argument slot.
137+
;
138+
; O32-DAG: addiu [[VAPTR]], [[VAPTR]], 4
139+
; N32-DAG: addiu [[VAPTR]], [[VAPTR]], 8
140+
; N64-DAG: daddiu [[VAPTR]], [[VAPTR]], 8
141+
; O32-DAG: lwc1 [[FTMP1:\$f[0-9]+]], 12($sp)
142+
; NEWLE-DAG: lwc1 [[FTMP1:\$f[0-9]+]], 8($sp)
143+
; NEWBE-DAG: lwc1 [[FTMP1:\$f[0-9]+]], 12($sp)
144+
; ALL-DAG: swc1 [[FTMP1]], 8([[R2]])
145+
146+
declare void @llvm.va_start(ptr)
147+
declare void @llvm.va_copy(ptr, ptr)
148+
declare void @llvm.va_end(ptr)

0 commit comments

Comments
 (0)