Skip to content

Commit dc9b327

Browse files
kumarakwizardengineer
authored andcommitted
[CT] fix mismatch of register type for CTSELECT_I386_GR16 pseudo instruction
1 parent a5702d4 commit dc9b327

File tree

7 files changed

+440
-332
lines changed

7 files changed

+440
-332
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6834,7 +6834,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
68346834
return;
68356835
}
68366836
case Intrinsic::ct_select: {
6837-
68386837
SDLoc DL = getCurSDLoc();
68396838

68406839
SDValue Cond = getValue(I.getArgOperand(0)); // i1

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 49 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -38033,11 +38033,13 @@ static MachineBasicBlock *emitCTSelectI386WithConditionMaterialization(
3803338033
Register TmpMaskReg;
3803438034

3803538035
// Determine the register class for tmp_mask based on the data type
38036-
if (InternalPseudoOpcode == X86::CTSELECT_I386_INT_GR8rr ||
38037-
InternalPseudoOpcode == X86::CTSELECT_I386_INT_GR16rr ||
38038-
InternalPseudoOpcode == X86::CTSELECT_I386_INT_GR32rr) {
38036+
if (InternalPseudoOpcode == X86::CTSELECT_I386_INT_GR8rr)
38037+
TmpMaskReg = MRI.createVirtualRegister(&X86::GR8RegClass);
38038+
else if (InternalPseudoOpcode == X86::CTSELECT_I386_INT_GR16rr)
38039+
TmpMaskReg = MRI.createVirtualRegister(&X86::GR16RegClass);
38040+
else if (InternalPseudoOpcode == X86::CTSELECT_I386_INT_GR32rr)
3803938041
TmpMaskReg = MRI.createVirtualRegister(&X86::GR32RegClass);
38040-
} else {
38042+
else {
3804138043
llvm_unreachable("Unknown internal pseudo opcode");
3804238044
}
3804338045

@@ -38075,139 +38077,109 @@ static MachineBasicBlock *emitCTSelectI386WithFpType(MachineInstr &MI,
3807538077
Register CondByteReg = MRI.createVirtualRegister(&X86::GR8RegClass);
3807638078
BuildMI(*BB, MI, MIMD, TII->get(X86::SETCCr), CondByteReg).addImm(OppCC);
3807738079

38078-
// Create mask from condition: 0x00000000 or 0xFFFFFFFF
38079-
unsigned MaskReg = MRI.createVirtualRegister(&X86::GR32RegClass);
38080-
unsigned ExtReg = MRI.createVirtualRegister(&X86::GR32RegClass);
38081-
38082-
// Zero-extend i8 condition to i32
38083-
BuildMI(*BB, MI, MIMD, TII->get(X86::MOVZX32rr8), ExtReg)
38084-
.addReg(CondByteReg, RegState::Kill);
38085-
38086-
// Negate to create mask
38087-
BuildMI(*BB, MI, MIMD, TII->get(X86::NEG32r), MaskReg)
38088-
.addReg(ExtReg, RegState::Kill);
38089-
38090-
// Create inverted mask
38091-
unsigned InvMaskReg = MRI.createVirtualRegister(&X86::GR32RegClass);
38092-
BuildMI(*BB, MI, MIMD, TII->get(X86::NOT32r), InvMaskReg).addReg(MaskReg);
38093-
3809438080
auto storeFpToSlot = [&](unsigned Opcode, int Slot, Register Reg) {
3809538081
addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(Opcode)), Slot)
3809638082
.addReg(Reg, RegState::Kill);
3809738083
};
3809838084

38099-
auto emitCtSelect = [&](unsigned NumValues, int TrueSlot, int FalseSlot,
38100-
int ResultSlot, bool KillMaskRegs) {
38085+
auto emitCtSelectWithPseudo = [&](unsigned NumValues, int TrueSlot, int FalseSlot, int ResultSlot) {
3810138086
for (unsigned Val = 0; Val < NumValues; ++Val) {
3810238087
unsigned Offset = Val * RegSizeInByte;
38103-
unsigned TrueReg = MRI.createVirtualRegister(&X86::GR32RegClass);
38104-
BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32rm), TrueReg)
38088+
38089+
// Load true and false values from stack as 32-bit integers
38090+
unsigned TrueIntReg = MRI.createVirtualRegister(&X86::GR32RegClass);
38091+
BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32rm), TrueIntReg)
3810538092
.addFrameIndex(TrueSlot)
3810638093
.addImm(1)
3810738094
.addReg(0)
3810838095
.addImm(Offset)
3810938096
.addReg(0);
3811038097

38111-
unsigned FalseReg = MRI.createVirtualRegister(&X86::GR32RegClass);
38112-
BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32rm), FalseReg)
38098+
unsigned FalseIntReg = MRI.createVirtualRegister(&X86::GR32RegClass);
38099+
BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32rm), FalseIntReg)
3811338100
.addFrameIndex(FalseSlot)
3811438101
.addImm(1)
3811538102
.addReg(0)
3811638103
.addImm(Offset)
3811738104
.addReg(0);
3811838105

38119-
unsigned MaskedTrueReg = MRI.createVirtualRegister(&X86::GR32RegClass);
38120-
unsigned MaskedFalseReg = MRI.createVirtualRegister(&X86::GR32RegClass);
38121-
unsigned ResultReg = MRI.createVirtualRegister(&X86::GR32RegClass);
38122-
38123-
bool KillMasksNow = KillMaskRegs && Val + 1 == NumValues;
38124-
38125-
auto TrueMIB =
38126-
BuildMI(*BB, MI, MIMD, TII->get(X86::AND32rr), MaskedTrueReg);
38127-
TrueMIB.addReg(TrueReg, RegState::Kill);
38128-
if (KillMasksNow)
38129-
TrueMIB.addReg(MaskReg, RegState::Kill);
38130-
else
38131-
TrueMIB.addReg(MaskReg);
38132-
38133-
auto FalseMIB =
38134-
BuildMI(*BB, MI, MIMD, TII->get(X86::AND32rr), MaskedFalseReg);
38135-
FalseMIB.addReg(FalseReg, RegState::Kill);
38136-
if (KillMasksNow)
38137-
FalseMIB.addReg(InvMaskReg, RegState::Kill);
38138-
else
38139-
FalseMIB.addReg(InvMaskReg);
38140-
38141-
BuildMI(*BB, MI, MIMD, TII->get(X86::OR32rr), ResultReg)
38142-
.addReg(MaskedTrueReg, RegState::Kill)
38143-
.addReg(MaskedFalseReg, RegState::Kill);
38144-
38106+
// Use CTSELECT_I386_INT_GR32 pseudo instruction for constant-time selection
38107+
unsigned ResultIntReg = MRI.createVirtualRegister(&X86::GR32RegClass);
38108+
unsigned TmpByteReg = MRI.createVirtualRegister(&X86::GR8RegClass);
38109+
unsigned TmpMaskReg = MRI.createVirtualRegister(&X86::GR32RegClass);
38110+
38111+
BuildMI(*BB, MI, MIMD, TII->get(X86::CTSELECT_I386_INT_GR32rr))
38112+
.addDef(ResultIntReg) // dst (output)
38113+
.addDef(TmpByteReg) // tmp_byte (output)
38114+
.addDef(TmpMaskReg) // tmp_mask (output)
38115+
.addReg(FalseIntReg) // src1 (input) - false value
38116+
.addReg(TrueIntReg) // src2 (input) - true value
38117+
.addReg(CondByteReg); // pre-materialized condition byte (input)
38118+
38119+
// Store result back to result slot
3814538120
BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32mr))
3814638121
.addFrameIndex(ResultSlot)
3814738122
.addImm(1)
3814838123
.addReg(0)
3814938124
.addImm(Offset)
3815038125
.addReg(0)
38151-
.addReg(ResultReg, RegState::Kill);
38126+
.addReg(ResultIntReg, RegState::Kill);
3815238127
}
3815338128
};
3815438129

3815538130
switch (pseudoInstr) {
3815638131
case X86::CTSELECT_I386_FP32rr: {
38157-
38158-
// Allocate stack slot for result (4 bytes for f32)
38132+
// Allocate stack slots (4 bytes for f32)
3815938133
int ResultSlot = MFI.CreateStackObject(RegSizeInByte, Align(4), false);
3816038134
int TrueSlot = MFI.CreateStackObject(RegSizeInByte, Align(4), false);
3816138135
int FalseSlot = MFI.CreateStackObject(RegSizeInByte, Align(4), false);
3816238136

38163-
// Store f32 to stack using pseudo instruction (ST_Fp32m will be handled by
38164-
// FP stackifier)
38137+
// Store f32 values to stack
3816538138
storeFpToSlot(X86::ST_Fp32m, TrueSlot, TrueReg);
3816638139
storeFpToSlot(X86::ST_Fp32m, FalseSlot, FalseReg);
3816738140

38168-
emitCtSelect(1, TrueSlot, FalseSlot, ResultSlot, true);
38141+
// Use pseudo instruction for selection (1 x 32-bit value)
38142+
emitCtSelectWithPseudo(1, TrueSlot, FalseSlot, ResultSlot);
3816938143

38170-
// Load as f32 to x87 stack using pseudo instruction
38144+
// Load result back as f32
3817138145
addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::LD_Fp32m), DestReg),
3817238146
ResultSlot);
3817338147
break;
3817438148
}
3817538149
case X86::CTSELECT_I386_FP64rr: {
3817638150
unsigned StackSlotSize = 8;
38177-
// Allocate stack slots for temporaries (8 bytes for f64)
38151+
// Allocate stack slots (8 bytes for f64)
3817838152
int TrueSlot = MFI.CreateStackObject(StackSlotSize, Align(4), false);
3817938153
int FalseSlot = MFI.CreateStackObject(StackSlotSize, Align(4), false);
3818038154
int ResultSlot = MFI.CreateStackObject(StackSlotSize, Align(4), false);
3818138155

38182-
// Store x87 values to stack using pseudo instruction
38183-
// ST_Fp64m will be handled by the FP stackifier
38156+
// Store f64 values to stack
3818438157
storeFpToSlot(X86::ST_Fp64m, TrueSlot, TrueReg);
3818538158
storeFpToSlot(X86::ST_Fp64m, FalseSlot, FalseReg);
3818638159

38187-
emitCtSelect(StackSlotSize/RegSizeInByte, TrueSlot, FalseSlot, ResultSlot, true);
38160+
// Use pseudo instruction for selection (2 x 32-bit values)
38161+
emitCtSelectWithPseudo(StackSlotSize/RegSizeInByte, TrueSlot, FalseSlot, ResultSlot);
3818838162

38189-
// Load final f64 result back to x87 stack using pseudo instruction
38163+
// Load result back as f64
3819038164
addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::LD_Fp64m), DestReg),
3819138165
ResultSlot);
3819238166
break;
3819338167
}
3819438168
case X86::CTSELECT_I386_FP80rr: {
38195-
// Allocate stack slots for temporaries
38196-
unsigned StackObjctSize = 12;
38197-
int TrueSlot = MFI.CreateStackObject(
38198-
StackObjctSize, Align(4), false); // 80-bit = 10 bytes, aligned to 12
38199-
int FalseSlot = MFI.CreateStackObject(StackObjctSize, Align(4), false);
38200-
int ResultSlot = MFI.CreateStackObject(StackObjctSize, Align(4), false);
38201-
38202-
// Store x87 values to stack using pseudo instruction
38203-
// ST_FpP80m will be handled by the FP stackifier
38169+
// Allocate stack slots (12 bytes for f80 - 80-bit = 10 bytes, aligned to 12)
38170+
unsigned StackObjectSize = 12;
38171+
int TrueSlot = MFI.CreateStackObject(StackObjectSize, Align(4), false);
38172+
int FalseSlot = MFI.CreateStackObject(StackObjectSize, Align(4), false);
38173+
int ResultSlot = MFI.CreateStackObject(StackObjectSize, Align(4), false);
38174+
38175+
// Store f80 values to stack
3820438176
storeFpToSlot(X86::ST_FpP80m, TrueSlot, TrueReg);
3820538177
storeFpToSlot(X86::ST_FpP80m, FalseSlot, FalseReg);
3820638178

38207-
// Process 3 x i32 parts (bytes 0-3, 4-7, 8-11)
38208-
emitCtSelect(StackObjctSize/RegSizeInByte, TrueSlot, FalseSlot, ResultSlot, true);
38179+
// Use pseudo instruction for selection (3 x 32-bit values)
38180+
emitCtSelectWithPseudo(StackObjectSize/RegSizeInByte, TrueSlot, FalseSlot, ResultSlot);
3820938181

38210-
// Load final f80 result back to x87 stack using pseudo instruction
38182+
// Load result back as f80
3821138183
addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::LD_Fp80m), DestReg),
3821238184
ResultSlot);
3821338185
break;
@@ -38300,10 +38272,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
3830038272
return emitCTSelectI386WithFpType(MI, BB, X86::CTSELECT_I386_FP64rr);
3830138273
case X86::CTSELECT_I386_FP80rr:
3830238274
return emitCTSelectI386WithFpType(MI, BB, X86::CTSELECT_I386_FP80rr);
38303-
case X86::CTSELECT_VR64rr:
38304-
return EmitLoweredSelect(
38305-
MI, BB); // TODO: Implement this to generate for Constant time version
38306-
38275+
3830738276
case X86::FP80_ADDr:
3830838277
case X86::FP80_ADDm32: {
3830938278
// Change the floating point control register to use double extended

llvm/lib/Target/X86/X86InstrCompiler.td

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -753,9 +753,6 @@ let hasSideEffects = 1,
753753
defm CTSELECT_I386_FP64 : CTSELECT_I386_INITIAL<RFP64, f64>;
754754

755755
defm CTSELECT_I386_FP80 : CTSELECT_I386_INITIAL<RFP80, f80>;
756-
757-
let Predicates = [HasMMX] in
758-
defm CTSELECT_VR64 : CTSELECT_I386_INITIAL<VR64, x86mmx>;
759756
}
760757

761758
// Pattern matching for non-native-CMOV CTSELECT (routes to custom inserter for condition materialization)

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6999,15 +6999,6 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
69996999
case X86::CTSELECT_V4F64:
70007000
case X86::CTSELECT_V8F32:
70017001
return expandCtSelectVector(MI);
7002-
7003-
// i386-specific CTSELECT expansion (post-RA, constant-time)
7004-
//case X86::CTSELECT_I386_GR16rr:
7005-
//case X86::CTSELECT_I386_GR32rr:
7006-
// return expandCtSelectI386(MI);
7007-
7008-
// VR64-specific CTSELECT expansion (post-RA, constant-time)
7009-
//case X86::CTSELECT_I386_VR64rr:
7010-
// return expandCtSelectI386VR64(MI);
70117002
}
70127003
return false;
70137004
}

llvm/lib/Target/X86/X86InstrInfo.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -730,12 +730,6 @@ class X86InstrInfo final : public X86GenInstrInfo {
730730

731731
bool expandCtSelectVector(MachineInstr &MI) const;
732732

733-
/// Expand i386-specific CTSELECT pseudo instructions (post-RA, constant-time)
734-
bool expandCtSelectI386(MachineInstr &MI) const;
735-
736-
/// Expand VR64-specific CTSELECT pseudo instructions (post-RA, constant-time)
737-
bool expandCtSelectI386VR64(MachineInstr &MI) const;
738-
739733
/// Returns true iff the routine could find two commutable operands in the
740734
/// given machine instruction with 3 vector inputs.
741735
/// The 'SrcOpIdx1' and 'SrcOpIdx2' are INPUT and OUTPUT arguments. Their

0 commit comments

Comments
 (0)