@@ -840,6 +840,104 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
840840 Ops[0 ]);
841841 return Builder.CreateExtractValue (Call, 0 );
842842 }
843+ case X86::BI__builtin_ia32_roundps:
844+ case X86::BI__builtin_ia32_roundpd:
845+ case X86::BI__builtin_ia32_roundps256:
846+ case X86::BI__builtin_ia32_roundpd256: {
847+ unsigned M = cast<ConstantInt>(Ops[1 ])->getZExtValue ();
848+ unsigned roundingModeAndPE = M & 0b111 ;
849+ unsigned updatePE = M & 0b100 ;
850+ unsigned use_MXCSR = M & 0b1000 ;
851+
852+ Intrinsic::ID ID;
853+
854+ // Currently no ops for MXCSR bit set, so lower directly to SSE41 instruction
855+ if (use_MXCSR) {
856+ switch (BuiltinID) {
857+ case X86::BI__builtin_ia32_roundps: ID = Intrinsic::x86_sse41_round_ps; break ;
858+ case X86::BI__builtin_ia32_roundpd: ID = Intrinsic::x86_sse41_round_pd; break ;
859+ }
860+ return nullptr ;
861+ } else {
862+ switch (roundingModeAndPE) {
863+ default : return nullptr ;
864+ case 0b000 : ID = Intrinsic::nearbyint; break ;
865+ case 0b001 : ID = Intrinsic::floor; break ;
866+ case 0b010 : ID = Intrinsic::ceil; break ;
867+ case 0b011 : ID = Intrinsic::trunc; break ;
868+ case 0b100 : ID = Intrinsic::experimental_constrained_floor; break ; // TODO: replace with actual op
869+ case 0b101 : ID = Intrinsic::experimental_constrained_floor; break ;
870+ case 0b110 : ID = Intrinsic::experimental_constrained_ceil; break ;
871+ case 0b111 : ID = Intrinsic::experimental_constrained_trunc; break ;
872+ }
873+ }
874+
875+ Function *F = CGM.getIntrinsic (ID, Ops[0 ]->getType ());
876+
877+ if (updatePE) {
878+ LLVMContext &Ctx = CGM.getLLVMContext ();
879+
880+ Value *ExceptMode =MetadataAsValue::get (
881+ Ctx,
882+ MDString::get (Ctx, " fpexcept.strict" )
883+ );
884+
885+ return Builder.CreateCall (F, {Ops[0 ], ExceptMode});
886+ }
887+
888+ return Builder.CreateCall (F, {Ops[0 ]});
889+ }
890+ case X86::BI__builtin_ia32_roundss:
891+ case X86::BI__builtin_ia32_roundsd: {
892+ unsigned M = cast<ConstantInt>(Ops[2 ])->getZExtValue ();
893+ unsigned roundingModeAndPE = M & 0b111 ;
894+ unsigned updatePE = M & 0b100 ;
895+ unsigned use_MXCSR = M & 0b1000 ;
896+
897+ Intrinsic::ID ID;
898+
899+ // Currently no ops for MXCSR bit set, so lower directly to SSE41 instruction
900+ if (use_MXCSR) {
901+ switch (BuiltinID) {
902+ case X86::BI__builtin_ia32_roundss: ID = Intrinsic::x86_sse41_round_ss; break ;
903+ case X86::BI__builtin_ia32_roundsd: ID = Intrinsic::x86_sse41_round_sd; break ;
904+ }
905+ return nullptr ;
906+ } else {
907+ switch (roundingModeAndPE) {
908+ default : return nullptr ;
909+ case 0b000 : ID = Intrinsic::nearbyint; break ;
910+ case 0b001 : ID = Intrinsic::floor; break ;
911+ case 0b010 : ID = Intrinsic::ceil; break ;
912+ case 0b011 : ID = Intrinsic::trunc; break ;
913+ case 0b100 : ID = Intrinsic::experimental_constrained_floor; break ; // TODO: replace with actual op
914+ case 0b101 : ID = Intrinsic::experimental_constrained_floor; break ;
915+ case 0b110 : ID = Intrinsic::experimental_constrained_ceil; break ;
916+ case 0b111 : ID = Intrinsic::experimental_constrained_trunc; break ;
917+ }
918+ }
919+
920+ Value *idx = Builder.getInt32 (0 );
921+ Value *b0 = Builder.CreateExtractElement (Ops[1 ], idx);
922+ Value *rounded0;
923+
924+ Function *F = CGM.getIntrinsic (ID, b0->getType ());
925+
926+ if (updatePE) {
927+ LLVMContext &Ctx = CGM.getLLVMContext ();
928+
929+ Value *ExceptMode =MetadataAsValue::get (
930+ Ctx,
931+ MDString::get (Ctx, " fpexcept.strict" )
932+ );
933+
934+ rounded0 = Builder.CreateCall (F, {b0, ExceptMode});
935+ } else {
936+ rounded0 = Builder.CreateCall (F, {b0});
937+ }
938+
939+ return Builder.CreateInsertElement (Ops[0 ], rounded0, idx);
940+ }
843941 case X86::BI__builtin_ia32_lzcnt_u16:
844942 case X86::BI__builtin_ia32_lzcnt_u32:
845943 case X86::BI__builtin_ia32_lzcnt_u64: {
0 commit comments