@@ -106,6 +106,211 @@ let Predicates = [HasCMOV, HasNDD] in {
106106 def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, timm:$cond, EFLAGS),
107107 (CMOV64rm_ND GR64:$src2, addr:$src1, (inv_cond_XFORM timm:$cond))>;
108108}
109+
110+ // Create pseudo instruction and do the pattern matching to them.
111+ // We use a machine pass to lower these pseudos into cmov, in order
112+ // to avoid backend optimizations
113+ let Uses = [EFLAGS], isNotDuplicable = 1, isPseudo = 1 in {
114+
115+ multiclass CTSELECT<X86TypeInfo t> {
116+ // register-only
117+ let isCommutable = 0, SchedRW = [WriteCMOV], Predicates = [HasNativeCMOV],
118+ AsmString = "ctselect\\t$dst, $src1, $src2, $cond" in {
119+ def rr : PseudoI<(outs t.RegClass:$dst),
120+ (ins t.RegClass:$src1, t.RegClass:$src2, i8imm:$cond),
121+ [(set t.RegClass:$dst, (X86ctselect t.RegClass:$src1, t.RegClass:$src2, timm:$cond, EFLAGS))]>;
122+ }
123+
124+ // register-memory
125+ let SchedRW = [WriteCMOV.Folded, WriteCMOV.ReadAfterFold], Predicates = [HasNativeCMOV],
126+ AsmString = "ctselect\\t$dst, $src1, $src2, $cond" in {
127+ def rm : PseudoI<(outs t.RegClass:$dst),
128+ (ins t.RegClass:$src1, t.MemOperand:$src2, i8imm:$cond),
129+ [(set t.RegClass:$dst, (X86ctselect t.RegClass:$src1, (t.LoadNode addr:$src2), timm:$cond, EFLAGS))]>;
130+ }
131+ }
132+ }
133+
134+ let isCodeGenOnly = 1, hasSideEffects = 1, ForceDisassemble = 1 in {
135+ let Constraints = "$dst = $src1" in {
136+ defm CTSELECT16 : CTSELECT<Xi16>;
137+ defm CTSELECT32 : CTSELECT<Xi32>;
138+ defm CTSELECT64 : CTSELECT<Xi64>;
139+ }
140+ }
141+
142+ // CTSELECT_VEC base class
143+ class CTSELECT_VEC<RegisterClass VRc, RegisterClass GRc>
144+ : PseudoI<
145+ (outs VRc:$dst, VRc:$tmpx, GRc:$tmpg),
146+ (ins VRc:$t, VRc:$f, i8imm:$cond),
147+ []
148+ > {
149+ let Uses = [EFLAGS];
150+ let isPseudo = 1;
151+ let isNotDuplicable = 1;
152+ let hasSideEffects = 1;
153+ let AsmString = "ctselect\t$dst, $f, $t, $cond";
154+ let SchedRW = [];
155+ }
156+
157+ // Width-specific class aliases
158+ class CTSELECT_VEC128 : CTSELECT_VEC<VR128, GR32>;
159+ class CTSELECT_VEC128X : CTSELECT_VEC<VR128X, GR32>;
160+ class CTSELECT_VEC256 : CTSELECT_VEC<VR256, GR32>;
161+ class CTSELECT_VEC512 : CTSELECT_VEC<VR512, GR32>;
162+
163+
164+ //===----------------------------------------------------------------------===//
165+ // 128-bit pseudos (SSE2 baseline; we use PXOR/PAND/MOVD/PSHUFD in the expander)
166+ //===----------------------------------------------------------------------===//
167+
168+ let Predicates = [HasSSE1] in {
169+
170+ def CTSELECT_V4F32 : CTSELECT_VEC128 {
171+ let Constraints = "@earlyclobber $dst,@earlyclobber $tmpx,@earlyclobber $tmpg";
172+ }
173+ }
174+
175+ let Predicates = [HasSSE2] in {
176+
177+ def CTSELECT_V2F64 : CTSELECT_VEC128 {
178+ let Constraints = "@earlyclobber $dst,@earlyclobber $tmpx,@earlyclobber $tmpg";
179+ }
180+ def CTSELECT_V4I32 : CTSELECT_VEC128 {
181+ let Constraints = "@earlyclobber $dst,@earlyclobber $tmpx,@earlyclobber $tmpg";
182+ }
183+ def CTSELECT_V2I64 : CTSELECT_VEC128 {
184+ let Constraints = "@earlyclobber $dst,@earlyclobber $tmpx,@earlyclobber $tmpg";
185+ }
186+ def CTSELECT_V8I16 : CTSELECT_VEC128 {
187+ let Constraints = "@earlyclobber $dst,@earlyclobber $tmpx,@earlyclobber $tmpg";
188+ }
189+ def CTSELECT_V16I8 : CTSELECT_VEC128 {
190+ let Constraints = "@earlyclobber $dst,@earlyclobber $tmpx,@earlyclobber $tmpg";
191+ }
192+
193+ // If your build has v8f16, keep this; otherwise comment it out.
194+ def CTSELECT_V8F16 : CTSELECT_VEC128 {
195+ let Constraints = "@earlyclobber $dst,@earlyclobber $tmpx,@earlyclobber $tmpg";
196+ }
197+ }
198+
199+ let Predicates = [HasAVX] in {
200+
201+ def CTSELECT_V4F32X : CTSELECT_VEC128X {
202+ let Constraints = "@earlyclobber $dst,@earlyclobber $tmpx,@earlyclobber $tmpg";
203+ }
204+ def CTSELECT_V2F64X : CTSELECT_VEC128X {
205+ let Constraints = "@earlyclobber $dst,@earlyclobber $tmpx,@earlyclobber $tmpg";
206+ }
207+ def CTSELECT_V4I32X : CTSELECT_VEC128X {
208+ let Constraints = "@earlyclobber $dst,@earlyclobber $tmpx,@earlyclobber $tmpg";
209+ }
210+ def CTSELECT_V2I64X : CTSELECT_VEC128X {
211+ let Constraints = "@earlyclobber $dst,@earlyclobber $tmpx,@earlyclobber $tmpg";
212+ }
213+ def CTSELECT_V8I16X : CTSELECT_VEC128X {
214+ let Constraints = "@earlyclobber $dst,@earlyclobber $tmpx,@earlyclobber $tmpg";
215+ }
216+ def CTSELECT_V16I8X : CTSELECT_VEC128X {
217+ let Constraints = "@earlyclobber $dst,@earlyclobber $tmpx,@earlyclobber $tmpg";
218+ }
219+
220+ // If your build has v8f16, keep this; otherwise comment it out.
221+ def CTSELECT_V8F16X : CTSELECT_VEC128X {
222+ let Constraints = "@earlyclobber $dst,@earlyclobber $tmpx,@earlyclobber $tmpg";
223+ }
224+ }
225+
226+ //===----------------------------------------------------------------------===//
227+ // 256-bit pseudos
228+ //===----------------------------------------------------------------------===//
229+ let Predicates = [HasAVX] in {
230+
231+ def CTSELECT_V8F32 : CTSELECT_VEC256 {
232+ let Constraints = "@earlyclobber $dst,@earlyclobber $tmpx,@earlyclobber $tmpg";
233+ }
234+ def CTSELECT_V4F64 : CTSELECT_VEC256 {
235+ let Constraints = "@earlyclobber $dst,@earlyclobber $tmpx,@earlyclobber $tmpg";
236+ }
237+ def CTSELECT_V8I32 : CTSELECT_VEC256 {
238+ let Constraints = "@earlyclobber $dst,@earlyclobber $tmpx,@earlyclobber $tmpg";
239+ }
240+ def CTSELECT_V4I64 : CTSELECT_VEC256 {
241+ let Constraints = "@earlyclobber $dst,@earlyclobber $tmpx,@earlyclobber $tmpg";
242+ }
243+ def CTSELECT_V16I16 : CTSELECT_VEC256 {
244+ let Constraints = "@earlyclobber $dst,@earlyclobber $tmpx,@earlyclobber $tmpg";
245+ }
246+ def CTSELECT_V32I8 : CTSELECT_VEC256 {
247+ let Constraints = "@earlyclobber $dst,@earlyclobber $tmpx,@earlyclobber $tmpg";
248+ }
249+
250+ // If your build has v16f16, keep this; otherwise comment it out.
251+ def CTSELECT_V16F16 : CTSELECT_VEC256 {
252+ let Constraints = "@earlyclobber $dst,@earlyclobber $tmpx,@earlyclobber $tmpg";
253+ }
254+ }
255+
256+ //===----------------------------------------------------------------------===//
257+ // Selection patterns: X86ctselect(...), EFLAGS -> CTSELECT_V*
258+ //
259+ // NOTE:
260+ // * The SDNode carries Glue from CMP/TEST (due to SDNPInGlue).
261+ // * We list EFLAGS explicitly in the pattern (X86 style) to model the arch read.
262+ // * Temps (tmpx/tmpy,tmpg) are not in the pattern; they’re outs allocated by RA.
263+ //===----------------------------------------------------------------------===//
264+
265+ let Predicates = [HasSSE1] in {
266+
267+ // 128-bit float (bitwise-equivalent ops in expander)
268+ def : Pat<(v4f32 (X86ctselect VR128:$t, VR128:$f, (i8 timm:$cc), EFLAGS)),
269+ (CTSELECT_V4F32 VR128:$t, VR128:$f, timm:$cc)>;
270+ }
271+
272+ let Predicates = [HasSSE2] in {
273+
274+ // 128-bit integer
275+ def : Pat<(v4i32 (X86ctselect VR128:$t, VR128:$f, (i8 timm:$cc), EFLAGS)),
276+ (CTSELECT_V4I32 VR128:$t, VR128:$f, timm:$cc)>;
277+ def : Pat<(v2i64 (X86ctselect VR128:$t, VR128:$f, (i8 timm:$cc), EFLAGS)),
278+ (CTSELECT_V2I64 VR128:$t, VR128:$f, timm:$cc)>;
279+ def : Pat<(v8i16 (X86ctselect VR128:$t, VR128:$f, (i8 timm:$cc), EFLAGS)),
280+ (CTSELECT_V8I16 VR128:$t, VR128:$f, timm:$cc)>;
281+ def : Pat<(v16i8 (X86ctselect VR128:$t, VR128:$f, (i8 timm:$cc), EFLAGS)),
282+ (CTSELECT_V16I8 VR128:$t, VR128:$f, timm:$cc)>;
283+ def : Pat<(v2f64 (X86ctselect VR128:$t, VR128:$f, (i8 timm:$cc), EFLAGS)),
284+ (CTSELECT_V2F64 VR128:$t, VR128:$f, timm:$cc)>;
285+
286+ // 128-bit f16 (optional)
287+ def : Pat<(v8f16 (X86ctselect VR128:$t, VR128:$f, (i8 timm:$cc), EFLAGS)),
288+ (CTSELECT_V8F16 VR128:$t, VR128:$f, timm:$cc)>;
289+ }
290+
291+ let Predicates = [HasAVX] in {
292+
293+ // 256-bit integer
294+ def : Pat<(v8i32 (X86ctselect VR256:$t, VR256:$f, (i8 timm:$cc), EFLAGS)),
295+ (CTSELECT_V8I32 VR256:$t, VR256:$f, timm:$cc)>;
296+ def : Pat<(v4i64 (X86ctselect VR256:$t, VR256:$f, (i8 timm:$cc), EFLAGS)),
297+ (CTSELECT_V4I64 VR256:$t, VR256:$f, timm:$cc)>;
298+ def : Pat<(v16i16 (X86ctselect VR256:$t, VR256:$f, (i8 timm:$cc), EFLAGS)),
299+ (CTSELECT_V16I16 VR256:$t, VR256:$f, timm:$cc)>;
300+ def : Pat<(v32i8 (X86ctselect VR256:$t, VR256:$f, (i8 timm:$cc), EFLAGS)),
301+ (CTSELECT_V32I8 VR256:$t, VR256:$f, timm:$cc)>;
302+
303+ // 256-bit float (bitwise-equivalent ops in expander)
304+ def : Pat<(v8f32 (X86ctselect VR256:$t, VR256:$f, (i8 timm:$cc), EFLAGS)),
305+ (CTSELECT_V8F32 VR256:$t, VR256:$f, timm:$cc)>;
306+ def : Pat<(v4f64 (X86ctselect VR256:$t, VR256:$f, (i8 timm:$cc), EFLAGS)),
307+ (CTSELECT_V4F64 VR256:$t, VR256:$f, timm:$cc)>;
308+
309+ // 256-bit f16 (optional)
310+ def : Pat<(v16f16 (X86ctselect VR256:$t, VR256:$f, (i8 timm:$cc), EFLAGS)),
311+ (CTSELECT_V16F16 VR256:$t, VR256:$f, timm:$cc)>;
312+ }
313+
109314let Predicates = [HasCMOV, HasCF] in {
110315 def : Pat<(X86cmov GR16:$src1, 0, timm:$cond, EFLAGS),
111316 (CFCMOV16rr GR16:$src1, (inv_cond_XFORM timm:$cond))>;
0 commit comments