@@ -5688,6 +5688,13 @@ let Predicates = [UseSSE41, OptForSize] in {
56885688// SSE4.1 - Packed Bit Test
56895689//===----------------------------------------------------------------------===//
56905690
5691+ // ptest is commutable if only the Z flag is used. If the C flag is used,
5692+ // commuting would change which operand is inverted.
5693+ def X86ptest_commutable : PatFrag<(ops node:$src1, node:$src2),
5694+ (X86ptest node:$src1, node:$src2), [{
5695+ return onlyUsesZeroFlag(SDValue(Node, 0));
5696+ }]>;
5697+
56915698// ptest instruction we'll lower to this in X86ISelLowering primarily from
56925699// the intel intrinsic that corresponds to this.
56935700let Defs = [EFLAGS], Predicates = [HasAVX] in {
@@ -5723,6 +5730,17 @@ def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
57235730 Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>;
57245731}
57255732
5733+ let Predicates = [HasAVX] in {
5734+ def : Pat<(X86ptest_commutable (loadv2i64 addr:$src2), VR128:$src1),
5735+ (VPTESTrm VR128:$src1, addr:$src2)>;
5736+ def : Pat<(X86ptest_commutable (loadv4i64 addr:$src2), VR256:$src1),
5737+ (VPTESTYrm VR256:$src1, addr:$src2)>;
5738+ }
5739+ let Predicates = [UseSSE41] in {
5740+ def : Pat<(X86ptest_commutable (memopv2i64 addr:$src2), VR128:$src1),
5741+ (PTESTrm VR128:$src1, addr:$src2)>;
5742+ }
5743+
57265744// The bit test instructions below are AVX only
57275745multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
57285746 X86MemOperand x86memop, PatFrag mem_frag, ValueType vt,
@@ -5737,6 +5755,13 @@ multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
57375755 Sched<[sched.Folded, sched.ReadAfterFold]>, VEX;
57385756}
57395757
5758+ // testps/testpd are commutable if only the Z flag is used. If the C flag is
5759+ // used, commuting would change which operand is inverted.
5760+ def X86testp_commutable : PatFrag<(ops node:$src1, node:$src2),
5761+ (X86testp node:$src1, node:$src2), [{
5762+ return onlyUsesZeroFlag(SDValue(Node, 0));
5763+ }]>;
5764+
57405765let Defs = [EFLAGS], Predicates = [HasAVX] in {
57415766let ExeDomain = SSEPackedSingle in {
57425767defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, loadv4f32, v4f32,
@@ -5752,6 +5777,18 @@ defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, loadv4f64, v4f64,
57525777}
57535778}
57545779
5780+ let Predicates = [HasAVX] in {
5781+ def : Pat<(X86testp_commutable (loadv4f32 addr:$src2), VR128:$src),
5782+ (VTESTPSrm VR128:$src, addr:$src2)>;
5783+ def : Pat<(X86testp_commutable (loadv8f32 addr:$src2), VR256:$src),
5784+ (VTESTPSYrm VR256:$src, addr:$src2)>;
5785+
5786+ def : Pat<(X86testp_commutable (loadv2f64 addr:$src2), VR128:$src),
5787+ (VTESTPDrm VR128:$src, addr:$src2)>;
5788+ def : Pat<(X86testp_commutable (loadv4f64 addr:$src2), VR256:$src),
5789+ (VTESTPDYrm VR256:$src, addr:$src2)>;
5790+ }
5791+
57555792//===----------------------------------------------------------------------===//
57565793// SSE4.1 - Misc Instructions
57575794//===----------------------------------------------------------------------===//
0 commit comments