@@ -38033,11 +38033,13 @@ static MachineBasicBlock *emitCTSelectI386WithConditionMaterialization(
3803338033 Register TmpMaskReg;
3803438034
3803538035 // Determine the register class for tmp_mask based on the data type
38036- if (InternalPseudoOpcode == X86::CTSELECT_I386_INT_GR8rr ||
38037- InternalPseudoOpcode == X86::CTSELECT_I386_INT_GR16rr ||
38038- InternalPseudoOpcode == X86::CTSELECT_I386_INT_GR32rr) {
38036+ if (InternalPseudoOpcode == X86::CTSELECT_I386_INT_GR8rr)
38037+ TmpMaskReg = MRI.createVirtualRegister(&X86::GR8RegClass);
38038+ else if (InternalPseudoOpcode == X86::CTSELECT_I386_INT_GR16rr)
38039+ TmpMaskReg = MRI.createVirtualRegister(&X86::GR16RegClass);
38040+ else if (InternalPseudoOpcode == X86::CTSELECT_I386_INT_GR32rr)
3803938041 TmpMaskReg = MRI.createVirtualRegister(&X86::GR32RegClass);
38040- } else {
38042+ else {
3804138043 llvm_unreachable("Unknown internal pseudo opcode");
3804238044 }
3804338045
@@ -38075,139 +38077,109 @@ static MachineBasicBlock *emitCTSelectI386WithFpType(MachineInstr &MI,
3807538077 Register CondByteReg = MRI.createVirtualRegister(&X86::GR8RegClass);
3807638078 BuildMI(*BB, MI, MIMD, TII->get(X86::SETCCr), CondByteReg).addImm(OppCC);
3807738079
38078- // Create mask from condition: 0x00000000 or 0xFFFFFFFF
38079- unsigned MaskReg = MRI.createVirtualRegister(&X86::GR32RegClass);
38080- unsigned ExtReg = MRI.createVirtualRegister(&X86::GR32RegClass);
38081-
38082- // Zero-extend i8 condition to i32
38083- BuildMI(*BB, MI, MIMD, TII->get(X86::MOVZX32rr8), ExtReg)
38084- .addReg(CondByteReg, RegState::Kill);
38085-
38086- // Negate to create mask
38087- BuildMI(*BB, MI, MIMD, TII->get(X86::NEG32r), MaskReg)
38088- .addReg(ExtReg, RegState::Kill);
38089-
38090- // Create inverted mask
38091- unsigned InvMaskReg = MRI.createVirtualRegister(&X86::GR32RegClass);
38092- BuildMI(*BB, MI, MIMD, TII->get(X86::NOT32r), InvMaskReg).addReg(MaskReg);
38093-
3809438080 auto storeFpToSlot = [&](unsigned Opcode, int Slot, Register Reg) {
3809538081 addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(Opcode)), Slot)
3809638082 .addReg(Reg, RegState::Kill);
3809738083 };
3809838084
38099- auto emitCtSelect = [&](unsigned NumValues, int TrueSlot, int FalseSlot,
38100- int ResultSlot, bool KillMaskRegs) {
38085+ auto emitCtSelectWithPseudo = [&](unsigned NumValues, int TrueSlot, int FalseSlot, int ResultSlot) {
3810138086 for (unsigned Val = 0; Val < NumValues; ++Val) {
3810238087 unsigned Offset = Val * RegSizeInByte;
38103- unsigned TrueReg = MRI.createVirtualRegister(&X86::GR32RegClass);
38104- BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32rm), TrueReg)
38088+
38089+ // Load true and false values from stack as 32-bit integers
38090+ unsigned TrueIntReg = MRI.createVirtualRegister(&X86::GR32RegClass);
38091+ BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32rm), TrueIntReg)
3810538092 .addFrameIndex(TrueSlot)
3810638093 .addImm(1)
3810738094 .addReg(0)
3810838095 .addImm(Offset)
3810938096 .addReg(0);
3811038097
38111- unsigned FalseReg = MRI.createVirtualRegister(&X86::GR32RegClass);
38112- BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32rm), FalseReg )
38098+ unsigned FalseIntReg = MRI.createVirtualRegister(&X86::GR32RegClass);
38099+ BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32rm), FalseIntReg )
3811338100 .addFrameIndex(FalseSlot)
3811438101 .addImm(1)
3811538102 .addReg(0)
3811638103 .addImm(Offset)
3811738104 .addReg(0);
3811838105
38119- unsigned MaskedTrueReg = MRI.createVirtualRegister(&X86::GR32RegClass);
38120- unsigned MaskedFalseReg = MRI.createVirtualRegister(&X86::GR32RegClass);
38121- unsigned ResultReg = MRI.createVirtualRegister(&X86::GR32RegClass);
38122-
38123- bool KillMasksNow = KillMaskRegs && Val + 1 == NumValues;
38124-
38125- auto TrueMIB =
38126- BuildMI(*BB, MI, MIMD, TII->get(X86::AND32rr), MaskedTrueReg);
38127- TrueMIB.addReg(TrueReg, RegState::Kill);
38128- if (KillMasksNow)
38129- TrueMIB.addReg(MaskReg, RegState::Kill);
38130- else
38131- TrueMIB.addReg(MaskReg);
38132-
38133- auto FalseMIB =
38134- BuildMI(*BB, MI, MIMD, TII->get(X86::AND32rr), MaskedFalseReg);
38135- FalseMIB.addReg(FalseReg, RegState::Kill);
38136- if (KillMasksNow)
38137- FalseMIB.addReg(InvMaskReg, RegState::Kill);
38138- else
38139- FalseMIB.addReg(InvMaskReg);
38140-
38141- BuildMI(*BB, MI, MIMD, TII->get(X86::OR32rr), ResultReg)
38142- .addReg(MaskedTrueReg, RegState::Kill)
38143- .addReg(MaskedFalseReg, RegState::Kill);
38144-
38106+ // Use CTSELECT_I386_INT_GR32 pseudo instruction for constant-time selection
38107+ unsigned ResultIntReg = MRI.createVirtualRegister(&X86::GR32RegClass);
38108+ unsigned TmpByteReg = MRI.createVirtualRegister(&X86::GR8RegClass);
38109+ unsigned TmpMaskReg = MRI.createVirtualRegister(&X86::GR32RegClass);
38110+
38111+ BuildMI(*BB, MI, MIMD, TII->get(X86::CTSELECT_I386_INT_GR32rr))
38112+ .addDef(ResultIntReg) // dst (output)
38113+ .addDef(TmpByteReg) // tmp_byte (output)
38114+ .addDef(TmpMaskReg) // tmp_mask (output)
38115+ .addReg(FalseIntReg) // src1 (input) - false value
38116+ .addReg(TrueIntReg) // src2 (input) - true value
38117+ .addReg(CondByteReg); // pre-materialized condition byte (input)
38118+
38119+ // Store result back to result slot
3814538120 BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32mr))
3814638121 .addFrameIndex(ResultSlot)
3814738122 .addImm(1)
3814838123 .addReg(0)
3814938124 .addImm(Offset)
3815038125 .addReg(0)
38151- .addReg(ResultReg , RegState::Kill);
38126+ .addReg(ResultIntReg , RegState::Kill);
3815238127 }
3815338128 };
3815438129
3815538130 switch (pseudoInstr) {
3815638131 case X86::CTSELECT_I386_FP32rr: {
38157-
38158- // Allocate stack slot for result (4 bytes for f32)
38132+ // Allocate stack slots (4 bytes for f32)
3815938133 int ResultSlot = MFI.CreateStackObject(RegSizeInByte, Align(4), false);
3816038134 int TrueSlot = MFI.CreateStackObject(RegSizeInByte, Align(4), false);
3816138135 int FalseSlot = MFI.CreateStackObject(RegSizeInByte, Align(4), false);
3816238136
38163- // Store f32 to stack using pseudo instruction (ST_Fp32m will be handled by
38164- // FP stackifier)
38137+ // Store f32 values to stack
3816538138 storeFpToSlot(X86::ST_Fp32m, TrueSlot, TrueReg);
3816638139 storeFpToSlot(X86::ST_Fp32m, FalseSlot, FalseReg);
3816738140
38168- emitCtSelect(1, TrueSlot, FalseSlot, ResultSlot, true);
38141+ // Use pseudo instruction for selection (1 x 32-bit value)
38142+ emitCtSelectWithPseudo(1, TrueSlot, FalseSlot, ResultSlot);
3816938143
38170- // Load as f32 to x87 stack using pseudo instruction
38144+ // Load result back as f32
3817138145 addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::LD_Fp32m), DestReg),
3817238146 ResultSlot);
3817338147 break;
3817438148 }
3817538149 case X86::CTSELECT_I386_FP64rr: {
3817638150 unsigned StackSlotSize = 8;
38177- // Allocate stack slots for temporaries (8 bytes for f64)
38151+ // Allocate stack slots (8 bytes for f64)
3817838152 int TrueSlot = MFI.CreateStackObject(StackSlotSize, Align(4), false);
3817938153 int FalseSlot = MFI.CreateStackObject(StackSlotSize, Align(4), false);
3818038154 int ResultSlot = MFI.CreateStackObject(StackSlotSize, Align(4), false);
3818138155
38182- // Store x87 values to stack using pseudo instruction
38183- // ST_Fp64m will be handled by the FP stackifier
38156+ // Store f64 values to stack
3818438157 storeFpToSlot(X86::ST_Fp64m, TrueSlot, TrueReg);
3818538158 storeFpToSlot(X86::ST_Fp64m, FalseSlot, FalseReg);
3818638159
38187- emitCtSelect(StackSlotSize/RegSizeInByte, TrueSlot, FalseSlot, ResultSlot, true);
38160+ // Use pseudo instruction for selection (2 x 32-bit values)
38161+ emitCtSelectWithPseudo(StackSlotSize/RegSizeInByte, TrueSlot, FalseSlot, ResultSlot);
3818838162
38189- // Load final f64 result back to x87 stack using pseudo instruction
38163+ // Load result back as f64
3819038164 addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::LD_Fp64m), DestReg),
3819138165 ResultSlot);
3819238166 break;
3819338167 }
3819438168 case X86::CTSELECT_I386_FP80rr: {
38195- // Allocate stack slots for temporaries
38196- unsigned StackObjctSize = 12;
38197- int TrueSlot = MFI.CreateStackObject(
38198- StackObjctSize, Align(4), false); // 80-bit = 10 bytes, aligned to 12
38199- int FalseSlot = MFI.CreateStackObject(StackObjctSize, Align(4), false);
38200- int ResultSlot = MFI.CreateStackObject(StackObjctSize, Align(4), false);
38201-
38202- // Store x87 values to stack using pseudo instruction
38203- // ST_FpP80m will be handled by the FP stackifier
38169+ // Allocate stack slots (12 bytes for f80 - 80-bit = 10 bytes, aligned to 12)
38170+ unsigned StackObjectSize = 12;
38171+ int TrueSlot = MFI.CreateStackObject(StackObjectSize, Align(4), false);
38172+ int FalseSlot = MFI.CreateStackObject(StackObjectSize, Align(4), false);
38173+ int ResultSlot = MFI.CreateStackObject(StackObjectSize, Align(4), false);
38174+
38175+ // Store f80 values to stack
3820438176 storeFpToSlot(X86::ST_FpP80m, TrueSlot, TrueReg);
3820538177 storeFpToSlot(X86::ST_FpP80m, FalseSlot, FalseReg);
3820638178
38207- // Process 3 x i32 parts (bytes 0-3, 4-7, 8-11 )
38208- emitCtSelect(StackObjctSize /RegSizeInByte, TrueSlot, FalseSlot, ResultSlot, true );
38179+ // Use pseudo instruction for selection (3 x 32-bit values )
38180+ emitCtSelectWithPseudo(StackObjectSize /RegSizeInByte, TrueSlot, FalseSlot, ResultSlot);
3820938181
38210- // Load final f80 result back to x87 stack using pseudo instruction
38182+ // Load result back as f80
3821138183 addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::LD_Fp80m), DestReg),
3821238184 ResultSlot);
3821338185 break;
@@ -38300,10 +38272,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
3830038272 return emitCTSelectI386WithFpType(MI, BB, X86::CTSELECT_I386_FP64rr);
3830138273 case X86::CTSELECT_I386_FP80rr:
3830238274 return emitCTSelectI386WithFpType(MI, BB, X86::CTSELECT_I386_FP80rr);
38303- case X86::CTSELECT_VR64rr:
38304- return EmitLoweredSelect(
38305- MI, BB); // TODO: Implement this to generate for Constant time version
38306-
38275+
3830738276 case X86::FP80_ADDr:
3830838277 case X86::FP80_ADDm32: {
3830938278 // Change the floating point control register to use double extended
0 commit comments