|
17 | 17 | #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" |
18 | 18 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
19 | 19 | #include "llvm/CodeGen/MachineConstantPool.h" |
| 20 | +#include "llvm/CodeGen/MachineFrameInfo.h" |
20 | 21 | #include "llvm/CodeGen/TargetOpcodes.h" |
21 | 22 | #include "llvm/CodeGen/ValueTypes.h" |
22 | 23 | #include "llvm/IR/DerivedTypes.h" |
@@ -108,6 +109,8 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, |
108 | 109 | .legalFor(HasSSE2 || UseX87, {s64}) |
109 | 110 | .legalFor(UseX87, {s80}); |
110 | 111 |
|
| 112 | + getActionDefinitionsBuilder(G_GET_ROUNDING).customFor({s32}); |
| 113 | + |
111 | 114 | // merge/unmerge |
112 | 115 | for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { |
113 | 116 | unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; |
@@ -611,6 +614,8 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, |
611 | 614 | return legalizeSITOFP(MI, MRI, Helper); |
612 | 615 | case TargetOpcode::G_FPTOSI: |
613 | 616 | return legalizeFPTOSI(MI, MRI, Helper); |
| 617 | + case TargetOpcode::G_GET_ROUNDING: |
| 618 | + return legalizeGETROUNDING(MI, MRI, Helper); |
614 | 619 | } |
615 | 620 | llvm_unreachable("expected switch to return"); |
616 | 621 | } |
@@ -777,6 +782,82 @@ bool X86LegalizerInfo::legalizeNarrowingStore(MachineInstr &MI, |
777 | 782 | return true; |
778 | 783 | } |
779 | 784 |
|
| 785 | +bool X86LegalizerInfo::legalizeGETROUNDING(MachineInstr &MI, |
| 786 | + MachineRegisterInfo &MRI, |
| 787 | + LegalizerHelper &Helper) const { |
| 788 | + /* |
| 789 | + The rounding mode is in bits 11:10 of FPSR, and has the following |
| 790 | + settings: |
| 791 | + 00 Round to nearest |
| 792 | + 01 Round to -inf |
| 793 | + 10 Round to +inf |
| 794 | + 11 Round to 0 |
| 795 | +
|
| 796 | + GET_ROUNDING, on the other hand, expects the following: |
| 797 | + -1 Undefined |
| 798 | + 0 Round to 0 |
| 799 | + 1 Round to nearest |
| 800 | + 2 Round to +inf |
| 801 | + 3 Round to -inf |
| 802 | +
|
| 803 | + To perform the conversion, we use a packed lookup table of the four 2-bit |
| 804 | + values that we can index by FPSP[11:10] |
| 805 | + 0x2d --> (0b00,10,11,01) --> (0,2,3,1) >> FPSR[11:10] |
| 806 | +
|
| 807 | + (0x2d >> ((FPSR >> 9) & 6)) & 3 |
| 808 | + */ |
| 809 | + |
| 810 | + MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; |
| 811 | + MachineFunction &MF = MIRBuilder.getMF(); |
| 812 | + Register Dst = MI.getOperand(0).getReg(); |
| 813 | + LLT DstTy = MRI.getType(Dst); |
| 814 | + const LLT s8 = LLT::scalar(8); |
| 815 | + const LLT s16 = LLT::scalar(16); |
| 816 | + const LLT s32 = LLT::scalar(32); |
| 817 | + |
| 818 | + // Save FP Control Word to stack slot |
| 819 | + int MemSize = 2; |
| 820 | + Align Alignment = Align(2); |
| 821 | + MachinePointerInfo PtrInfo; |
| 822 | + auto StackTemp = Helper.createStackTemporary(TypeSize::getFixed(MemSize), |
| 823 | + Alignment, PtrInfo); |
| 824 | + Register StackPtr = StackTemp.getReg(0); |
| 825 | + |
| 826 | + auto StoreMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, |
| 827 | + MemSize, Alignment); |
| 828 | + |
| 829 | + // Store FP Control Word to stack slot using G_FNSTCW16 |
| 830 | + MIRBuilder.buildInstr(X86::G_FNSTCW16) |
| 831 | + .addUse(StackPtr) |
| 832 | + .addMemOperand(StoreMMO); |
| 833 | + |
| 834 | + // Load FP Control Word from stack slot |
| 835 | + auto LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, |
| 836 | + MemSize, Alignment); |
| 837 | + |
| 838 | + auto CWD32 = |
| 839 | + MIRBuilder.buildZExt(s32, MIRBuilder.buildLoad(s16, StackPtr, *LoadMMO)); |
| 840 | + auto Shifted8 = MIRBuilder.buildTrunc( |
| 841 | + s8, MIRBuilder.buildLShr(s32, CWD32, MIRBuilder.buildConstant(s8, 9))); |
| 842 | + auto Masked32 = MIRBuilder.buildZExt( |
| 843 | + s32, MIRBuilder.buildAnd(s8, Shifted8, MIRBuilder.buildConstant(s8, 6))); |
| 844 | + |
| 845 | + // LUT is a packed lookup table (0x2d) used to map the 2-bit x87 FPU rounding |
| 846 | + // mode (from bits 11:10 of the control word) to the values expected by |
| 847 | + // GET_ROUNDING. The mapping is performed by shifting LUT right by the |
| 848 | + // extracted rounding mode and masking the result with 3 to obtain the final |
| 849 | + auto LUT = MIRBuilder.buildConstant(s32, 0x2d); |
| 850 | + auto LUTShifted = MIRBuilder.buildLShr(s32, LUT, Masked32); |
| 851 | + auto RetVal = |
| 852 | + MIRBuilder.buildAnd(s32, LUTShifted, MIRBuilder.buildConstant(s32, 3)); |
| 853 | + auto RetValTrunc = MIRBuilder.buildZExtOrTrunc(DstTy, RetVal); |
| 854 | + |
| 855 | + MIRBuilder.buildCopy(Dst, RetValTrunc); |
| 856 | + |
| 857 | + MI.eraseFromParent(); |
| 858 | + return true; |
| 859 | +} |
| 860 | + |
780 | 861 | bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, |
781 | 862 | MachineInstr &MI) const { |
782 | 863 | return true; |
|
0 commit comments