|
21 | 21 | #include "llvm/CodeGen/TargetOpcodes.h"
|
22 | 22 | #include "llvm/CodeGen/ValueTypes.h"
|
23 | 23 | #include "llvm/IR/DerivedTypes.h"
|
| 24 | +#include "llvm/IR/IntrinsicsX86.h" |
24 | 25 | #include "llvm/IR/Type.h"
|
25 | 26 |
|
26 | 27 | using namespace llvm;
|
@@ -110,7 +111,8 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
|
110 | 111 | .legalFor(HasSSE2 || UseX87, {s64})
|
111 | 112 | .legalFor(UseX87, {s80});
|
112 | 113 |
|
113 |
| - getActionDefinitionsBuilder(G_GET_ROUNDING).customFor({s32}); |
| 114 | + getActionDefinitionsBuilder({G_GET_ROUNDING, G_SET_ROUNDING}) |
| 115 | + .customFor({s32}); |
114 | 116 |
|
115 | 117 | // merge/unmerge
|
116 | 118 | for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
|
@@ -617,6 +619,8 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
|
617 | 619 | return legalizeFPTOSI(MI, MRI, Helper);
|
618 | 620 | case TargetOpcode::G_GET_ROUNDING:
|
619 | 621 | return legalizeGETROUNDING(MI, MRI, Helper);
|
| 622 | + case TargetOpcode::G_SET_ROUNDING: |
| 623 | + return legalizeSETROUNDING(MI, MRI, Helper); |
620 | 624 | }
|
621 | 625 | llvm_unreachable("expected switch to return");
|
622 | 626 | }
|
@@ -859,6 +863,133 @@ bool X86LegalizerInfo::legalizeGETROUNDING(MachineInstr &MI,
|
859 | 863 | return true;
|
860 | 864 | }
|
861 | 865 |
|
| 866 | +bool X86LegalizerInfo::legalizeSETROUNDING(MachineInstr &MI, |
| 867 | + MachineRegisterInfo &MRI, |
| 868 | + LegalizerHelper &Helper) const { |
| 869 | + MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| 870 | + MachineFunction &MF = MIRBuilder.getMF(); |
| 871 | + Register Src = MI.getOperand(0).getReg(); |
| 872 | + const LLT s8 = LLT::scalar(8); |
| 873 | + const LLT s16 = LLT::scalar(16); |
| 874 | + const LLT s32 = LLT::scalar(32); |
| 875 | + |
| 876 | + // Allocate stack slot for control word and MXCSR (4 bytes). |
| 877 | + int MemSize = 4; |
| 878 | + Align Alignment = Align(4); |
| 879 | + MachinePointerInfo PtrInfo; |
| 880 | + auto StackTemp = Helper.createStackTemporary(TypeSize::getFixed(MemSize), |
| 881 | + Alignment, PtrInfo); |
| 882 | + Register StackPtr = StackTemp.getReg(0); |
| 883 | + |
| 884 | + auto StoreMMO = |
| 885 | + MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, 2, Align(2)); |
| 886 | + MIRBuilder.buildInstr(X86::G_FNSTCW16) |
| 887 | + .addUse(StackPtr) |
| 888 | + .addMemOperand(StoreMMO); |
| 889 | + |
| 890 | + auto LoadMMO = |
| 891 | + MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, 2, Align(2)); |
| 892 | + auto CWD16 = MIRBuilder.buildLoad(s16, StackPtr, *LoadMMO); |
| 893 | + |
| 894 | + // Clear RM field (bits 11:10) |
| 895 | + auto ClearedCWD = |
| 896 | + MIRBuilder.buildAnd(s16, CWD16, MIRBuilder.buildConstant(s16, 0xf3ff)); |
| 897 | + |
| 898 | + // Check if Src is a constant |
| 899 | + auto *SrcDef = MRI.getVRegDef(Src); |
| 900 | + Register RMBits; |
| 901 | + Register MXCSRRMBits; |
| 902 | + |
| 903 | + if (SrcDef && SrcDef->getOpcode() == TargetOpcode::G_CONSTANT) { |
| 904 | + uint64_t RM = getIConstantFromReg(Src, MRI).getZExtValue(); |
| 905 | + int FieldVal = X86::getRoundingModeX86(RM); |
| 906 | + |
| 907 | + if (FieldVal == X86::rmInvalid) { |
| 908 | + LLVMContext &C = MF.getFunction().getContext(); |
| 909 | + C.diagnose(DiagnosticInfoUnsupported( |
| 910 | + MF.getFunction(), "rounding mode is not supported by X86 hardware", |
| 911 | + DiagnosticLocation(MI.getDebugLoc()), DS_Error)); |
| 912 | + return false; |
| 913 | + } |
| 914 | + |
| 915 | + FieldVal = FieldVal << 3; |
| 916 | + RMBits = MIRBuilder.buildConstant(s16, FieldVal).getReg(0); |
| 917 | + MXCSRRMBits = MIRBuilder.buildConstant(s32, FieldVal).getReg(0); |
| 918 | + } else { |
| 919 | + // Convert Src (rounding mode) to bits for control word |
| 920 | + // (0xc9 << (2 * Src + 4)) & 0xc00 |
| 921 | + auto Src32 = MIRBuilder.buildZExtOrTrunc(s32, Src); |
| 922 | + auto ShiftAmt = MIRBuilder.buildAdd( |
| 923 | + s32, MIRBuilder.buildShl(s32, Src32, MIRBuilder.buildConstant(s32, 1)), |
| 924 | + MIRBuilder.buildConstant(s32, 4)); |
| 925 | + auto ShiftAmt8 = MIRBuilder.buildTrunc(s8, ShiftAmt); |
| 926 | + auto Shifted = MIRBuilder.buildShl(s16, MIRBuilder.buildConstant(s16, 0xc9), |
| 927 | + ShiftAmt8); |
| 928 | + RMBits = |
| 929 | + MIRBuilder.buildAnd(s16, Shifted, MIRBuilder.buildConstant(s16, 0xc00)) |
| 930 | + .getReg(0); |
| 931 | + |
| 932 | + // For non-constant case, we still need to compute MXCSR bits dynamically |
| 933 | + auto RMBits32 = MIRBuilder.buildZExt(s32, RMBits); |
| 934 | + MXCSRRMBits = |
| 935 | + MIRBuilder.buildShl(s32, RMBits32, MIRBuilder.buildConstant(s32, 3)) |
| 936 | + .getReg(0); |
| 937 | + } |
| 938 | + // Update rounding mode bits |
| 939 | + auto NewCWD = |
| 940 | + MIRBuilder.buildOr(s16, ClearedCWD, RMBits, MachineInstr::Disjoint); |
| 941 | + |
| 942 | + // Store new FP Control Word to stack |
| 943 | + auto StoreNewMMO = |
| 944 | + MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, 2, Align(2)); |
| 945 | + MIRBuilder.buildStore(NewCWD, StackPtr, *StoreNewMMO); |
| 946 | + |
| 947 | + // Load FP control word from the slot using G_FLDCW16 |
| 948 | + auto LoadNewMMO = |
| 949 | + MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, 2, Align(2)); |
| 950 | + MIRBuilder.buildInstr(X86::G_FLDCW16) |
| 951 | + .addUse(StackPtr) |
| 952 | + .addMemOperand(LoadNewMMO); |
| 953 | + |
| 954 | + if (Subtarget.hasSSE1()) { |
| 955 | + // Store MXCSR to stack (use STMXCSR) |
| 956 | + auto StoreMXCSRMMO = MF.getMachineMemOperand( |
| 957 | + PtrInfo, MachineMemOperand::MOStore, 4, Align(4)); |
| 958 | + MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS) |
| 959 | + .addIntrinsicID(Intrinsic::x86_sse_stmxcsr) |
| 960 | + .addUse(StackPtr) |
| 961 | + .addMemOperand(StoreMXCSRMMO); |
| 962 | + |
| 963 | + // Load MXCSR from stack |
| 964 | + auto LoadMXCSRMMO = MF.getMachineMemOperand( |
| 965 | + PtrInfo, MachineMemOperand::MOLoad, 4, Align(4)); |
| 966 | + auto MXCSR = MIRBuilder.buildLoad(s32, StackPtr, *LoadMXCSRMMO); |
| 967 | + |
| 968 | + // Clear RM field (bits 14:13) |
| 969 | + auto ClearedMXCSR = MIRBuilder.buildAnd( |
| 970 | + s32, MXCSR, MIRBuilder.buildConstant(s32, 0xffff9fff)); |
| 971 | + |
| 972 | + // Update rounding mode bits |
| 973 | + auto NewMXCSR = MIRBuilder.buildOr(s32, ClearedMXCSR, MXCSRRMBits); |
| 974 | + |
| 975 | + // Store new MXCSR to stack |
| 976 | + auto StoreNewMXCSRMMO = MF.getMachineMemOperand( |
| 977 | + PtrInfo, MachineMemOperand::MOStore, 4, Align(4)); |
| 978 | + MIRBuilder.buildStore(NewMXCSR, StackPtr, *StoreNewMXCSRMMO); |
| 979 | + |
| 980 | + // Load MXCSR from stack (use LDMXCSR) |
| 981 | + auto LoadNewMXCSRMMO = MF.getMachineMemOperand( |
| 982 | + PtrInfo, MachineMemOperand::MOLoad, 4, Align(4)); |
| 983 | + MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS) |
| 984 | + .addIntrinsicID(Intrinsic::x86_sse_ldmxcsr) |
| 985 | + .addUse(StackPtr) |
| 986 | + .addMemOperand(LoadNewMXCSRMMO); |
| 987 | + } |
| 988 | + |
| 989 | + MI.eraseFromParent(); |
| 990 | + return true; |
| 991 | +} |
| 992 | + |
862 | 993 | bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
|
863 | 994 | MachineInstr &MI) const {
|
864 | 995 | return true;
|
|
0 commit comments