@@ -75,6 +75,70 @@ static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
7575 return MaskVec;
7676}
7777
78+ static Value *emitX86Round (CodeGenFunction &CGF,
79+ Value *X,
80+ unsigned M) {
81+ unsigned RoundingMask = 0b11 ;
82+ unsigned UpdatePEBit = 0b100 ;
83+ unsigned UseMXCSRBit = 0b1000 ;
84+
85+ unsigned roundingMode = M & RoundingMask;
86+ bool updatePE = M & UpdatePEBit;
87+ bool useMXCSR = M & UseMXCSRBit;
88+
89+ Intrinsic::ID ID = Intrinsic::not_intrinsic;
90+ LLVMContext &Ctx = CGF.CGM .getLLVMContext ();
91+
92+ if (useMXCSR) {
93+ ID = Intrinsic::experimental_constrained_nearbyint;
94+
95+ auto PE_metatadata = updatePE ? " fpexcept.strict" : " fpexcept.ignore" ;
96+
97+ Value *ExceptMode = MetadataAsValue::get (
98+ Ctx,
99+ MDString::get (Ctx, PE_metatadata)
100+ );
101+
102+ Value *RoundingMode = MetadataAsValue::get (
103+ Ctx,
104+ MDString::get (Ctx, " rounding.dynamic" )
105+ );
106+
107+ Function *F = CGF.CGM .getIntrinsic (ID, X->getType ());
108+ return CGF.Builder .CreateCall (F, {X, ExceptMode, RoundingMode});
109+ }
110+
111+ if (updatePE) {
112+ switch (roundingMode) {
113+ case 0b00 : ID = Intrinsic::experimental_constrained_roundeven; break ;
114+ case 0b01 : ID = Intrinsic::experimental_constrained_floor; break ;
115+ case 0b10 : ID = Intrinsic::experimental_constrained_ceil; break ;
116+ case 0b11 : ID = Intrinsic::experimental_constrained_trunc; break ;
117+ default : llvm_unreachable (" Invalid rounding mode" );
118+ }
119+
120+ Value *ExceptMode =MetadataAsValue::get (
121+ Ctx,
122+ MDString::get (Ctx, " fpexcept.strict" )
123+ );
124+
125+ Function *F = CGF.CGM .getIntrinsic (ID, X->getType ());
126+ return CGF.Builder .CreateCall (F, {X, ExceptMode});
127+ }
128+
129+ // Otherwise we can use the standard ops
130+ switch (roundingMode) {
131+ case 0b00 : ID = Intrinsic::roundeven; break ;
132+ case 0b01 : ID = Intrinsic::floor; break ;
133+ case 0b10 : ID = Intrinsic::ceil; break ;
134+ case 0b11 : ID = Intrinsic::trunc; break ;
135+ default : llvm_unreachable (" Invalid rounding mode" );
136+ }
137+
138+ Function *F = CGF.CGM .getIntrinsic (ID, X->getType ());
139+ return CGF.Builder .CreateCall (F, {X});
140+ }
141+
78142static Value *EmitX86MaskedStore (CodeGenFunction &CGF, ArrayRef<Value *> Ops,
79143 Align Alignment) {
80144 Value *Ptr = Ops[0 ];
@@ -843,100 +907,19 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
843907 case X86::BI__builtin_ia32_roundps:
844908 case X86::BI__builtin_ia32_roundpd:
845909 case X86::BI__builtin_ia32_roundps256:
846- case X86::BI__builtin_ia32_roundpd256: {
910+ case X86::BI__builtin_ia32_roundpd256: {
847911 unsigned M = cast<ConstantInt>(Ops[1 ])->getZExtValue ();
848- unsigned roundingModeAndPE = M & 0b111 ;
849- unsigned updatePE = M & 0b100 ;
850- unsigned use_MXCSR = M & 0b1000 ;
851-
852- Intrinsic::ID ID;
853-
854- // Currently no ops for MXCSR bit set, so lower directly to SSE41 instruction
855- if (use_MXCSR) {
856- switch (BuiltinID) {
857- case X86::BI__builtin_ia32_roundps: ID = Intrinsic::x86_sse41_round_ps; break ;
858- case X86::BI__builtin_ia32_roundpd: ID = Intrinsic::x86_sse41_round_pd; break ;
859- }
860- return nullptr ;
861- } else {
862- switch (roundingModeAndPE) {
863- default : return nullptr ;
864- case 0b000 : ID = Intrinsic::nearbyint; break ;
865- case 0b001 : ID = Intrinsic::floor; break ;
866- case 0b010 : ID = Intrinsic::ceil; break ;
867- case 0b011 : ID = Intrinsic::trunc; break ;
868- case 0b100 : ID = Intrinsic::experimental_constrained_floor; break ; // TODO: replace with actual op
869- case 0b101 : ID = Intrinsic::experimental_constrained_floor; break ;
870- case 0b110 : ID = Intrinsic::experimental_constrained_ceil; break ;
871- case 0b111 : ID = Intrinsic::experimental_constrained_trunc; break ;
872- }
873- }
874-
875- Function *F = CGM.getIntrinsic (ID, Ops[0 ]->getType ());
876-
877- if (updatePE) {
878- LLVMContext &Ctx = CGM.getLLVMContext ();
879-
880- Value *ExceptMode =MetadataAsValue::get (
881- Ctx,
882- MDString::get (Ctx, " fpexcept.strict" )
883- );
884-
885- return Builder.CreateCall (F, {Ops[0 ], ExceptMode});
886- }
887-
888- return Builder.CreateCall (F, {Ops[0 ]});
912+ return emitX86Round (*this , Ops[0 ], M);
889913 }
890914 case X86::BI__builtin_ia32_roundss:
891915 case X86::BI__builtin_ia32_roundsd: {
892916 unsigned M = cast<ConstantInt>(Ops[2 ])->getZExtValue ();
893- unsigned roundingModeAndPE = M & 0b111 ;
894- unsigned updatePE = M & 0b100 ;
895- unsigned use_MXCSR = M & 0b1000 ;
896917
897- Intrinsic::ID ID;
898-
899- // Currently no ops for MXCSR bit set, so lower directly to SSE41 instruction
900- if (use_MXCSR) {
901- switch (BuiltinID) {
902- case X86::BI__builtin_ia32_roundss: ID = Intrinsic::x86_sse41_round_ss; break ;
903- case X86::BI__builtin_ia32_roundsd: ID = Intrinsic::x86_sse41_round_sd; break ;
904- }
905- return nullptr ;
906- } else {
907- switch (roundingModeAndPE) {
908- default : return nullptr ;
909- case 0b000 : ID = Intrinsic::nearbyint; break ;
910- case 0b001 : ID = Intrinsic::floor; break ;
911- case 0b010 : ID = Intrinsic::ceil; break ;
912- case 0b011 : ID = Intrinsic::trunc; break ;
913- case 0b100 : ID = Intrinsic::experimental_constrained_floor; break ; // TODO: replace with actual op
914- case 0b101 : ID = Intrinsic::experimental_constrained_floor; break ;
915- case 0b110 : ID = Intrinsic::experimental_constrained_ceil; break ;
916- case 0b111 : ID = Intrinsic::experimental_constrained_trunc; break ;
917- }
918- }
919-
920918 Value *idx = Builder.getInt32 (0 );
921- Value *b0 = Builder.CreateExtractElement (Ops[1 ], idx);
922- Value *rounded0;
923-
924- Function *F = CGM.getIntrinsic (ID, b0->getType ());
925-
926- if (updatePE) {
927- LLVMContext &Ctx = CGM.getLLVMContext ();
928-
929- Value *ExceptMode =MetadataAsValue::get (
930- Ctx,
931- MDString::get (Ctx, " fpexcept.strict" )
932- );
933-
934- rounded0 = Builder.CreateCall (F, {b0, ExceptMode});
935- } else {
936- rounded0 = Builder.CreateCall (F, {b0});
937- }
919+ Value *ValAt0 = Builder.CreateExtractElement (Ops[1 ], idx);
920+ Value *RoundedAt0 = emitX86Round (*this , ValAt0, M);
938921
939- return Builder.CreateInsertElement (Ops[0 ], rounded0 , idx);
922+ return Builder.CreateInsertElement (Ops[0 ], RoundedAt0 , idx);
940923 }
941924 case X86::BI__builtin_ia32_lzcnt_u16:
942925 case X86::BI__builtin_ia32_lzcnt_u32:
0 commit comments