Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,7 @@ class LegalizerHelper {
LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI);
LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI);

LLVM_ABI LegalizeResult lowerFPExtAndTruncMem(MachineInstr &MI);
LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI);
LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI);
LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI);
Expand Down
6 changes: 6 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,12 @@ class LegalizeRuleSet {
LegalizeRuleSet &lowerFor(std::initializer_list<std::pair<LLT, LLT>> Types) {
return actionFor(LegalizeAction::Lower, Types);
}
LegalizeRuleSet &lowerFor(bool Pred,
std::initializer_list<std::pair<LLT, LLT>> Types) {
if (!Pred)
return *this;
return actionFor(LegalizeAction::Lower, Types);
}
/// The instruction is lowered when type indexes 0 and 1 is any type pair in
/// the given list.
LegalizeRuleSet &lowerFor(std::initializer_list<std::pair<LLT, LLT>> Types,
Expand Down
31 changes: 30 additions & 1 deletion llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4669,6 +4669,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
case G_FPTOUI_SAT:
case G_FPTOSI_SAT:
return lowerFPTOINT_SAT(MI);
case G_FPEXT:
return lowerFPExtAndTruncMem(MI);
case G_FPTRUNC:
return lowerFPTRUNC(MI);
case G_FPOWI:
Expand Down Expand Up @@ -8410,6 +8412,33 @@ LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) {
return Legalized;
}

// fp conversions using truncating and extending loads and stores.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// fp conversions using truncating and extending loads and stores.
// Floating-point conversions using truncating and extending loads and stores.

LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFPExtAndTruncMem(MachineInstr &MI) {
assert((MI.getOpcode() == TargetOpcode::G_FPEXT ||
MI.getOpcode() == TargetOpcode::G_FPTRUNC) &&
"Only G_FPEXT and G_FPTRUNC are expected");

auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
MachinePointerInfo PtrInfo;
LLT StackTy = MI.getOpcode() == TargetOpcode::G_FPEXT ? SrcTy : DstTy;
Align StackTyAlign = getStackTemporaryAlignment(StackTy);
auto StackTemp =
createStackTemporary(StackTy.getSizeInBytes(), StackTyAlign, PtrInfo);

MachineFunction &MF = MIRBuilder.getMF();
auto *StoreMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
StackTy, StackTyAlign);
MIRBuilder.buildStore(SrcReg, StackTemp, *StoreMMO);

auto *LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
StackTy, StackTyAlign);
MIRBuilder.buildLoad(DstReg, StackTemp, *LoadMMO);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can't just use a regular load and store. We should have a proper FP extending load opcode (like G_ZEXTLOAD), and also need the truncate case

Copy link
Contributor Author

@e-kud e-kud Oct 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@arsenm I got the idea. But it looks more like a G_LOAD/G_STORE design problem in GlobalISel. We don't enforce matching types between MemDesc and opcode type but we must use a specialized load/store if there is type mismatch (a lot of targets ignore it, if you check, e.g. RISCV)


MI.eraseFromParent();
return Legalized;
}

// f64 -> f16 conversion using round-to-nearest-even rounding mode.
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
Expand Down Expand Up @@ -8535,7 +8564,7 @@ LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
return lowerFPTRUNC_F64_TO_F16(MI);

return UnableToLegalize;
return lowerFPExtAndTruncMem(MI);
}

LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
Expand Down
145 changes: 121 additions & 24 deletions llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -449,13 +449,13 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
.legalFor(HasSSE2, {{s64, s32}})
.legalFor(HasAVX, {{v4s64, v4s32}})
.legalFor(HasAVX512, {{v8s64, v8s32}})
.customFor(UseX87, {{s64, s32}, {s80, s32}, {s80, s64}});
.lowerFor(UseX87, {{s64, s32}, {s80, s32}, {s80, s64}});

getActionDefinitionsBuilder(G_FPTRUNC)
.legalFor(HasSSE2, {{s32, s64}})
.legalFor(HasAVX, {{v4s32, v4s64}})
.legalFor(HasAVX512, {{v8s32, v8s64}})
.customFor(UseX87, {{s32, s64}, {s32, s80}, {s64, s80}});
.lowerFor(UseX87, {{s32, s64}, {s32, s80}, {s64, s80}});

getActionDefinitionsBuilder(G_SITOFP)
.legalFor(HasSSE1, {{s32, s32}})
Expand Down Expand Up @@ -625,9 +625,6 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
return legalizeSITOFP(MI, MRI, Helper);
case TargetOpcode::G_FPTOSI:
return legalizeFPTOSI(MI, MRI, Helper);
case TargetOpcode::G_FPEXT:
case TargetOpcode::G_FPTRUNC:
return legalizeFPExtAndTrunc(MI, MRI, Helper);
case TargetOpcode::G_GET_ROUNDING:
return legalizeGETROUNDING(MI, MRI, Helper);
case TargetOpcode::G_SET_ROUNDING:
Expand Down Expand Up @@ -874,28 +871,128 @@ bool X86LegalizerInfo::legalizeGETROUNDING(MachineInstr &MI,
return true;
}

bool X86LegalizerInfo::legalizeFPExtAndTrunc(MachineInstr &MI,
MachineRegisterInfo &MRI,
LegalizerHelper &Helper) const {
assert((MI.getOpcode() == TargetOpcode::G_FPEXT ||
MI.getOpcode() == TargetOpcode::G_FPTRUNC) &&
"Only G_FPEXT and G_FPTRUNC are expected");
auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
MachinePointerInfo PtrInfo;
LLT StackTy = MI.getOpcode() == TargetOpcode::G_FPEXT ? SrcTy : DstTy;
Align StackTyAlign = Helper.getStackTemporaryAlignment(StackTy);
auto StackTemp = Helper.createStackTemporary(StackTy.getSizeInBytes(),
StackTyAlign, PtrInfo);

bool X86LegalizerInfo::legalizeSETROUNDING(MachineInstr &MI,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you rebase to drop this unrelated stuff that already was submitted

MachineRegisterInfo &MRI,
LegalizerHelper &Helper) const {
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
MachineFunction &MF = MIRBuilder.getMF();
auto *StoreMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
StackTy, StackTyAlign);
MIRBuilder.buildStore(SrcReg, StackTemp, *StoreMMO);
Register Src = MI.getOperand(0).getReg();
const LLT s8 = LLT::scalar(8);
const LLT s16 = LLT::scalar(16);
const LLT s32 = LLT::scalar(32);

auto *LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
StackTy, StackTyAlign);
MIRBuilder.buildLoad(DstReg, StackTemp, *LoadMMO);
// Allocate stack slot for control word and MXCSR (4 bytes).
int MemSize = 4;
Align Alignment = Align(4);
MachinePointerInfo PtrInfo;
auto StackTemp = Helper.createStackTemporary(TypeSize::getFixed(MemSize),
Alignment, PtrInfo);
Register StackPtr = StackTemp.getReg(0);

auto StoreMMO =
MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, 2, Align(2));
MIRBuilder.buildInstr(X86::G_FNSTCW16)
.addUse(StackPtr)
.addMemOperand(StoreMMO);

auto LoadMMO =
MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, 2, Align(2));
auto CWD16 = MIRBuilder.buildLoad(s16, StackPtr, *LoadMMO);

// Clear RM field (bits 11:10)
auto ClearedCWD =
MIRBuilder.buildAnd(s16, CWD16, MIRBuilder.buildConstant(s16, 0xf3ff));

// Check if Src is a constant
auto *SrcDef = MRI.getVRegDef(Src);
Register RMBits;
Register MXCSRRMBits;

if (SrcDef && SrcDef->getOpcode() == TargetOpcode::G_CONSTANT) {
uint64_t RM = getIConstantFromReg(Src, MRI).getZExtValue();
int FieldVal = X86::getRoundingModeX86(RM);

if (FieldVal == X86::rmInvalid) {
LLVMContext &C = MF.getFunction().getContext();
C.diagnose(DiagnosticInfoUnsupported(
MF.getFunction(), "rounding mode is not supported by X86 hardware",
DiagnosticLocation(MI.getDebugLoc()), DS_Error));
return false;
}

FieldVal = FieldVal << 3;
RMBits = MIRBuilder.buildConstant(s16, FieldVal).getReg(0);
MXCSRRMBits = MIRBuilder.buildConstant(s32, FieldVal).getReg(0);
} else {
// Convert Src (rounding mode) to bits for control word
// (0xc9 << (2 * Src + 4)) & 0xc00
auto Src32 = MIRBuilder.buildZExtOrTrunc(s32, Src);
auto ShiftAmt = MIRBuilder.buildAdd(
s32, MIRBuilder.buildShl(s32, Src32, MIRBuilder.buildConstant(s32, 1)),
MIRBuilder.buildConstant(s32, 4));
auto ShiftAmt8 = MIRBuilder.buildTrunc(s8, ShiftAmt);
auto Shifted = MIRBuilder.buildShl(s16, MIRBuilder.buildConstant(s16, 0xc9),
ShiftAmt8);
RMBits =
MIRBuilder.buildAnd(s16, Shifted, MIRBuilder.buildConstant(s16, 0xc00))
.getReg(0);

// For non-constant case, we still need to compute MXCSR bits dynamically
auto RMBits32 = MIRBuilder.buildZExt(s32, RMBits);
MXCSRRMBits =
MIRBuilder.buildShl(s32, RMBits32, MIRBuilder.buildConstant(s32, 3))
.getReg(0);
}
// Update rounding mode bits
auto NewCWD =
MIRBuilder.buildOr(s16, ClearedCWD, RMBits, MachineInstr::Disjoint);

// Store new FP Control Word to stack
auto StoreNewMMO =
MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, 2, Align(2));
MIRBuilder.buildStore(NewCWD, StackPtr, *StoreNewMMO);

// Load FP control word from the slot using G_FLDCW16
auto LoadNewMMO =
MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, 2, Align(2));
MIRBuilder.buildInstr(X86::G_FLDCW16)
.addUse(StackPtr)
.addMemOperand(LoadNewMMO);

if (Subtarget.hasSSE1()) {
// Store MXCSR to stack (use STMXCSR)
auto StoreMXCSRMMO = MF.getMachineMemOperand(
PtrInfo, MachineMemOperand::MOStore, 4, Align(4));
MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
.addIntrinsicID(Intrinsic::x86_sse_stmxcsr)
.addUse(StackPtr)
.addMemOperand(StoreMXCSRMMO);

// Load MXCSR from stack
auto LoadMXCSRMMO = MF.getMachineMemOperand(
PtrInfo, MachineMemOperand::MOLoad, 4, Align(4));
auto MXCSR = MIRBuilder.buildLoad(s32, StackPtr, *LoadMXCSRMMO);

// Clear RM field (bits 14:13)
auto ClearedMXCSR = MIRBuilder.buildAnd(
s32, MXCSR, MIRBuilder.buildConstant(s32, 0xffff9fff));

// Update rounding mode bits
auto NewMXCSR = MIRBuilder.buildOr(s32, ClearedMXCSR, MXCSRRMBits);

// Store new MXCSR to stack
auto StoreNewMXCSRMMO = MF.getMachineMemOperand(
PtrInfo, MachineMemOperand::MOStore, 4, Align(4));
MIRBuilder.buildStore(NewMXCSR, StackPtr, *StoreNewMXCSRMMO);

// Load MXCSR from stack (use LDMXCSR)
auto LoadNewMXCSRMMO = MF.getMachineMemOperand(
PtrInfo, MachineMemOperand::MOLoad, 4, Align(4));
MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
.addIntrinsicID(Intrinsic::x86_sse_ldmxcsr)
.addUse(StackPtr)
.addMemOperand(LoadNewMXCSRMMO);
}

MI.eraseFromParent();
return true;
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ class X86LegalizerInfo : public LegalizerInfo {
bool legalizeGETROUNDING(MachineInstr &MI, MachineRegisterInfo &MRI,
LegalizerHelper &Helper) const;

bool legalizeFPExtAndTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
LegalizerHelper &Helper) const;
bool legalizeSETROUNDING(MachineInstr &MI, MachineRegisterInfo &MRI,
LegalizerHelper &Helper) const;
};
} // namespace llvm
#endif