Skip to content

Commit b04b10b

Browse files
Tazdevil971mahesh-attarde
authored andcommitted
[MIPS][float] Fixed SingleFloat codegen on N32/N64 targets (llvm#140575)
This patch aims at making the combination of single-float and N32/N64 ABI properly work. Right now when both options are enabled the compiler chooses an incorrect ABI and in some cases even generates wrong instructions. The floating point behavior on MIPS is controlled through 3 flags: soft-float, single-float, fp64. This makes things complicated because fp64 indicates the presence of 64bit floating point registers, but cannot be easily disabled (the mips3 feature require it, but mips3 CPUs with only 32bit floating point exist). Also if fp64 is missing it doesn't actually disable 64bit floating point operations, because certain MIPS1/2 CPUs support 64bit floating point with 32bit registers, hence the single-float option. I'm guessing that originally single-float was only intended for the latter case, and that's the reason why it doesn't properly work on 64bit targets. So this patch does the following: - Make single-float a "master disable", even if fp64 is enabled this should completely disable generation of 64bit floating point operations, making it available on targets which hard require fp64. - Add proper calling conventions for N32/N64 single-float combinations. - Fixup codegen to not generate certain 64bit floating point operations, apparently not assigning a register class to f64 values is not enough to prevent them from showing up. - Add tests for the new calling conventions and codegen.
1 parent 880e944 commit b04b10b

12 files changed

+766
-41
lines changed

llvm/lib/Target/Mips/MipsCallingConv.td

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,8 @@ def RetCC_MipsN : CallingConv<[
186186
//
187187
// f128 should only occur for the N64 ABI where long double is 128-bit. On
188188
// N32, long double is equivalent to double.
189-
CCIfType<[i64], CCIfOrigArgWasF128<CCDelegateTo<RetCC_F128>>>,
189+
CCIfSubtargetNot<"isSingleFloat()",
190+
CCIfType<[i64], CCIfOrigArgWasF128<CCDelegateTo<RetCC_F128>>>>,
190191

191192
// Aggregate returns are positioned at the lowest address in the slot for
192193
// both little and big-endian targets. When passing in registers, this
@@ -316,9 +317,10 @@ def CC_Mips_FixedArg : CallingConv<[
316317
//
317318
// f128 should only occur for the N64 ABI where long double is 128-bit. On
318319
// N32, long double is equivalent to double.
319-
CCIfType<[i64],
320-
CCIfSubtargetNot<"useSoftFloat()",
321-
CCIfOrigArgWasF128<CCBitConvertToType<f64>>>>,
320+
CCIfType<[i64],
321+
CCIfSubtargetNot<"isSingleFloat()",
322+
CCIfSubtargetNot<"useSoftFloat()",
323+
CCIfOrigArgWasF128<CCBitConvertToType<f64>>>>>,
322324

323325
CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_Mips_FastCC>>,
324326

@@ -342,8 +344,8 @@ def CC_Mips : CallingConv<[
342344
// Callee-saved register lists.
343345
//===----------------------------------------------------------------------===//
344346

345-
def CSR_SingleFloatOnly : CalleeSavedRegs<(add (sequence "F%u", 31, 20), RA, FP,
346-
(sequence "S%u", 7, 0))>;
347+
def CSR_O32_SingleFloat : CalleeSavedRegs<(add(sequence "F%u", 31, 20), RA, FP,
348+
(sequence "S%u", 7, 0))>;
347349

348350
def CSR_O32_FPXX : CalleeSavedRegs<(add (sequence "D%u", 15, 10), RA, FP,
349351
(sequence "S%u", 7, 0))> {
@@ -357,13 +359,19 @@ def CSR_O32_FP64 :
357359
CalleeSavedRegs<(add (decimate (sequence "D%u_64", 30, 20), 2), RA, FP,
358360
(sequence "S%u", 7, 0))>;
359361

360-
def CSR_N32 : CalleeSavedRegs<(add D20_64, D22_64, D24_64, D26_64, D28_64,
361-
D30_64, RA_64, FP_64, GP_64,
362-
(sequence "S%u_64", 7, 0))>;
362+
def CSR_N32 : CalleeSavedRegs<(add(decimate(sequence "D%u_64", 30, 20), 2),
363+
RA_64, FP_64, GP_64, (sequence "S%u_64", 7, 0))>;
364+
365+
def CSR_N32_SingleFloat
366+
: CalleeSavedRegs<(add(decimate(sequence "F%u", 30, 20), 2), RA_64, FP_64,
367+
GP_64, (sequence "S%u_64", 7, 0))>;
363368

364369
def CSR_N64 : CalleeSavedRegs<(add (sequence "D%u_64", 31, 24), RA_64, FP_64,
365370
GP_64, (sequence "S%u_64", 7, 0))>;
366371

372+
def CSR_N64_SingleFloat : CalleeSavedRegs<(add(sequence "F%u", 31, 24), RA_64,
373+
FP_64, GP_64, (sequence "S%u_64", 7, 0))>;
374+
367375
def CSR_Mips16RetHelper :
368376
CalleeSavedRegs<(add V0, V1, FP,
369377
(sequence "A%u", 3, 0), (sequence "S%u", 7, 0),

llvm/lib/Target/Mips/MipsISelLowering.cpp

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4265,10 +4265,16 @@ parseRegForInlineAsmConstraint(StringRef C, MVT VT) const {
42654265
return std::make_pair(0U, nullptr);
42664266

42674267
if (Prefix == "$f") { // Parse $f0-$f31.
4268-
// If the size of FP registers is 64-bit or Reg is an even number, select
4269-
// the 64-bit register class. Otherwise, select the 32-bit register class.
4270-
if (VT == MVT::Other)
4271-
VT = (Subtarget.isFP64bit() || !(Reg % 2)) ? MVT::f64 : MVT::f32;
4268+
// If the targets is single float only, always select 32-bit registers,
4269+
// otherwise if the size of FP registers is 64-bit or Reg is an even number,
4270+
// select the 64-bit register class. Otherwise, select the 32-bit register
4271+
// class.
4272+
if (VT == MVT::Other) {
4273+
if (Subtarget.isSingleFloat())
4274+
VT = MVT::f32;
4275+
else
4276+
VT = (Subtarget.isFP64bit() || !(Reg % 2)) ? MVT::f64 : MVT::f32;
4277+
}
42724278

42734279
RC = getRegClassFor(VT);
42744280

@@ -4308,10 +4314,12 @@ MipsTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
43084314
return std::make_pair(0U, &Mips::CPU16RegsRegClass);
43094315
return std::make_pair(0U, &Mips::GPR32RegClass);
43104316
}
4311-
if ((VT == MVT::i64 || (VT == MVT::f64 && Subtarget.useSoftFloat())) &&
4317+
if ((VT == MVT::i64 || (VT == MVT::f64 && Subtarget.useSoftFloat()) ||
4318+
(VT == MVT::f64 && Subtarget.isSingleFloat())) &&
43124319
!Subtarget.isGP64bit())
43134320
return std::make_pair(0U, &Mips::GPR32RegClass);
4314-
if ((VT == MVT::i64 || (VT == MVT::f64 && Subtarget.useSoftFloat())) &&
4321+
if ((VT == MVT::i64 || (VT == MVT::f64 && Subtarget.useSoftFloat()) ||
4322+
(VT == MVT::f64 && Subtarget.isSingleFloat())) &&
43154323
Subtarget.isGP64bit())
43164324
return std::make_pair(0U, &Mips::GPR64RegClass);
43174325
// This will generate an error message

llvm/lib/Target/Mips/MipsRegisterInfo.cpp

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -89,14 +89,25 @@ MipsRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
8989
: CSR_Interrupt_32_SaveList;
9090
}
9191

92-
if (Subtarget.isSingleFloat())
93-
return CSR_SingleFloatOnly_SaveList;
92+
// N64 ABI
93+
if (Subtarget.isABI_N64()) {
94+
if (Subtarget.isSingleFloat())
95+
return CSR_N64_SingleFloat_SaveList;
9496

95-
if (Subtarget.isABI_N64())
9697
return CSR_N64_SaveList;
98+
}
99+
100+
// N32 ABI
101+
if (Subtarget.isABI_N32()) {
102+
if (Subtarget.isSingleFloat())
103+
return CSR_N32_SingleFloat_SaveList;
97104

98-
if (Subtarget.isABI_N32())
99105
return CSR_N32_SaveList;
106+
}
107+
108+
// O32 ABI
109+
if (Subtarget.isSingleFloat())
110+
return CSR_O32_SingleFloat_SaveList;
100111

101112
if (Subtarget.isFP64bit())
102113
return CSR_O32_FP64_SaveList;
@@ -111,14 +122,25 @@ const uint32_t *
111122
MipsRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
112123
CallingConv::ID) const {
113124
const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
114-
if (Subtarget.isSingleFloat())
115-
return CSR_SingleFloatOnly_RegMask;
125+
// N64 ABI
126+
if (Subtarget.isABI_N64()) {
127+
if (Subtarget.isSingleFloat())
128+
return CSR_N64_SingleFloat_RegMask;
116129

117-
if (Subtarget.isABI_N64())
118130
return CSR_N64_RegMask;
131+
}
132+
133+
// N32 ABI
134+
if (Subtarget.isABI_N32()) {
135+
if (Subtarget.isSingleFloat())
136+
return CSR_N32_SingleFloat_RegMask;
119137

120-
if (Subtarget.isABI_N32())
121138
return CSR_N32_RegMask;
139+
}
140+
141+
// O32 ABI
142+
if (Subtarget.isSingleFloat())
143+
return CSR_O32_SingleFloat_RegMask;
122144

123145
if (Subtarget.isFP64bit())
124146
return CSR_O32_FP64_RegMask;

llvm/lib/Target/Mips/MipsSEISelLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "llvm/CodeGen/SelectionDAG.h"
2929
#include "llvm/CodeGen/SelectionDAGNodes.h"
3030
#include "llvm/CodeGen/TargetInstrInfo.h"
31+
#include "llvm/CodeGen/TargetLowering.h"
3132
#include "llvm/CodeGen/TargetSubtargetInfo.h"
3233
#include "llvm/CodeGen/ValueTypes.h"
3334
#include "llvm/CodeGenTypes/MachineValueType.h"
@@ -211,6 +212,16 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
211212
}
212213
}
213214

215+
// Targets with 64bits integer registers, but no 64bit floating point register
216+
// do not support conversion between them
217+
if (Subtarget.isGP64bit() && Subtarget.isSingleFloat() &&
218+
!Subtarget.useSoftFloat()) {
219+
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
220+
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
221+
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Expand);
222+
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
223+
}
224+
214225
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom);
215226
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom);
216227
setOperationAction(ISD::MULHS, MVT::i32, Custom);
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
; RUN: llc -mtriple=mips -relocation-model=static -mattr=single-float < %s \
2+
; RUN: | FileCheck --check-prefixes=ALL,SYM32,O32 %s
3+
; RUN: llc -mtriple=mipsel -relocation-model=static -mattr=single-float < %s \
4+
; RUN: | FileCheck --check-prefixes=ALL,SYM32,O32 %s
5+
6+
; RUN: llc -mtriple=mips64 -relocation-model=static -target-abi n32 -mattr=single-float < %s \
7+
; RUN: | FileCheck --check-prefixes=ALL,SYM32,N32,NEW,NEWBE %s
8+
; RUN: llc -mtriple=mips64el -relocation-model=static -target-abi n32 -mattr=single-float < %s \
9+
; RUN: | FileCheck --check-prefixes=ALL,SYM32,N32,NEW,NEWLE %s
10+
11+
; RUN: llc -mtriple=mips64 -relocation-model=static -target-abi n64 -mattr=single-float < %s \
12+
; RUN: | FileCheck --check-prefixes=ALL,SYM64,N64,NEW,NEWBE %s
13+
; RUN: llc -mtriple=mips64el -relocation-model=static -target-abi n64 -mattr=single-float < %s \
14+
; RUN: | FileCheck --check-prefixes=ALL,SYM64,N64,NEW,NEWLE %s
15+
16+
@floats = global [11 x float] zeroinitializer
17+
@doubles = global [11 x double] zeroinitializer
18+
19+
define void @double_args(double %a, ...)
20+
nounwind {
21+
entry:
22+
%0 = getelementptr [11 x double], ptr @doubles, i32 0, i32 1
23+
store volatile double %a, ptr %0
24+
25+
%ap = alloca ptr
26+
call void @llvm.va_start(ptr %ap)
27+
%b = va_arg ptr %ap, double
28+
%1 = getelementptr [11 x double], ptr @doubles, i32 0, i32 2
29+
store volatile double %b, ptr %1
30+
call void @llvm.va_end(ptr %ap)
31+
ret void
32+
}
33+
34+
; ALL-LABEL: double_args:
35+
; We won't test the way the global address is calculated in this test. This is
36+
; just to get the register number for the other checks.
37+
; SYM32-DAG: addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(doubles)
38+
; SYM64-DAG: daddiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(doubles)
39+
40+
; O32 forbids using floating point registers for the non-variable portion.
41+
; N32/N64 allow it.
42+
; O32-DAG: sw $4, 8([[R2]])
43+
; O32-DAG: sw $5, 12([[R2]])
44+
; NEW-DAG: sd $4, 8([[R2]])
45+
46+
; The varargs portion is dumped to stack
47+
; O32-DAG: sw $6, 16($sp)
48+
; O32-DAG: sw $7, 20($sp)
49+
; NEW-DAG: sd $5, 8($sp)
50+
; NEW-DAG: sd $6, 16($sp)
51+
; NEW-DAG: sd $7, 24($sp)
52+
; NEW-DAG: sd $8, 32($sp)
53+
; NEW-DAG: sd $9, 40($sp)
54+
; NEW-DAG: sd $10, 48($sp)
55+
; NEW-DAG: sd $11, 56($sp)
56+
57+
; Get the varargs pointer
58+
; O32 has 4 bytes padding, 4 bytes for the varargs pointer, and 8 bytes reserved
59+
; for arguments 1 and 2.
60+
; N32/N64 has 8 bytes for the varargs pointer, and no reserved area.
61+
; O32-DAG: addiu [[VAPTR:\$[0-9]+]], $sp, 16
62+
; O32-DAG: sw [[VAPTR]], 4($sp)
63+
; N32-DAG: addiu [[VAPTR:\$[0-9]+]], $sp, 8
64+
; N32-DAG: sw [[VAPTR]], 4($sp)
65+
; N64-DAG: daddiu [[VAPTR:\$[0-9]+]], $sp, 8
66+
; N64-DAG: sd [[VAPTR]], 0($sp)
67+
68+
; Increment the pointer then get the varargs arg
69+
; LLVM will rebind the load to the stack pointer instead of the varargs pointer
70+
; during lowering. This is fine and doesn't change the behaviour.
71+
; O32-DAG: addiu [[VAPTR]], [[VAPTR]], 8
72+
; N32-DAG: addiu [[VAPTR]], [[VAPTR]], 8
73+
; N64-DAG: daddiu [[VAPTR]], [[VAPTR]], 8
74+
; O32-DAG: lw [[R3:\$[0-9]+]], 16($sp)
75+
; O32-DAG: lw [[R4:\$[0-9]+]], 20($sp)
76+
; O32-DAG: sw [[R3]], 16([[R2]])
77+
; O32-DAG: sw [[R4]], 20([[R2]])
78+
; NEW-DAG: ld [[R3:\$[0-9]+]], 8($sp)
79+
; NEW-DAG: sd [[R3]], 16([[R2]])
80+
81+
define void @float_args(float %a, ...) nounwind {
82+
entry:
83+
%0 = getelementptr [11 x float], ptr @floats, i32 0, i32 1
84+
store volatile float %a, ptr %0
85+
86+
%ap = alloca ptr
87+
call void @llvm.va_start(ptr %ap)
88+
%b = va_arg ptr %ap, float
89+
%1 = getelementptr [11 x float], ptr @floats, i32 0, i32 2
90+
store volatile float %b, ptr %1
91+
call void @llvm.va_end(ptr %ap)
92+
ret void
93+
}
94+
95+
; ALL-LABEL: float_args:
96+
; We won't test the way the global address is calculated in this test. This is
97+
; just to get the register number for the other checks.
98+
; SYM32-DAG: addiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(floats)
99+
; SYM64-DAG: daddiu [[R2:\$[0-9]+]], ${{[0-9]+}}, %lo(floats)
100+
101+
; The first four arguments are the same in O32/N32/N64.
102+
; The non-variable portion should be unaffected.
103+
; O32-DAG: mtc1 $4, $f0
104+
; O32-DAG: swc1 $f0, 4([[R2]])
105+
; NEW-DAG: swc1 $f12, 4([[R2]])
106+
107+
; The varargs portion is dumped to stack
108+
; O32-DAG: sw $5, 12($sp)
109+
; O32-DAG: sw $6, 16($sp)
110+
; O32-DAG: sw $7, 20($sp)
111+
; NEW-DAG: sd $5, 8($sp)
112+
; NEW-DAG: sd $6, 16($sp)
113+
; NEW-DAG: sd $7, 24($sp)
114+
; NEW-DAG: sd $8, 32($sp)
115+
; NEW-DAG: sd $9, 40($sp)
116+
; NEW-DAG: sd $10, 48($sp)
117+
; NEW-DAG: sd $11, 56($sp)
118+
119+
; Get the varargs pointer
120+
; O32 has 4 bytes padding, 4 bytes for the varargs pointer, and should have 8
121+
; bytes reserved for arguments 1 and 2 (the first float arg) but as discussed in
122+
; arguments-float.ll, GCC doesn't agree with MD00305 and treats floats as 4
123+
; bytes so we only have 12 bytes total.
124+
; N32/N64 has 8 bytes for the varargs pointer, and no reserved area.
125+
; O32-DAG: addiu [[VAPTR:\$[0-9]+]], $sp, 12
126+
; O32-DAG: sw [[VAPTR]], 4($sp)
127+
; N32-DAG: addiu [[VAPTR:\$[0-9]+]], $sp, 8
128+
; N32-DAG: sw [[VAPTR]], 4($sp)
129+
; N64-DAG: daddiu [[VAPTR:\$[0-9]+]], $sp, 8
130+
; N64-DAG: sd [[VAPTR]], 0($sp)
131+
132+
; Increment the pointer then get the varargs arg
133+
; LLVM will rebind the load to the stack pointer instead of the varargs pointer
134+
; during lowering. This is fine and doesn't change the behaviour.
135+
; Also, in big-endian mode the offset must be increased by 4 to retrieve the
136+
; correct half of the argument slot.
137+
;
138+
; O32-DAG: addiu [[VAPTR]], [[VAPTR]], 4
139+
; N32-DAG: addiu [[VAPTR]], [[VAPTR]], 8
140+
; N64-DAG: daddiu [[VAPTR]], [[VAPTR]], 8
141+
; O32-DAG: lwc1 [[FTMP1:\$f[0-9]+]], 12($sp)
142+
; NEWLE-DAG: lwc1 [[FTMP1:\$f[0-9]+]], 8($sp)
143+
; NEWBE-DAG: lwc1 [[FTMP1:\$f[0-9]+]], 12($sp)
144+
; ALL-DAG: swc1 [[FTMP1]], 8([[R2]])
145+
146+
declare void @llvm.va_start(ptr)
147+
declare void @llvm.va_copy(ptr, ptr)
148+
declare void @llvm.va_end(ptr)

0 commit comments

Comments
 (0)