-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[DAG] Peek through bitcasts when canonicalizing constants to the RHS on commutable instructions #112682
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…on commutable instructions X86 is the beneficiary here as it often bitcasts rematerializable 0/-1 vector constants as vXi32 and bitcasts to the requested type Minor cleanup that helps with llvm#107423
|
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-llvm-selectiondag Author: Simon Pilgrim (RKSimon) ChangesX86 is the beneficiary here as it often bitcasts rematerializable 0/-1 vector constants as vXi32 and bitcasts to the requested type Minor cleanup that helps with #107423 Patch is 50.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112682.diff 8 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ca91d35573c3ec..b09d33eb20296a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2668,8 +2668,8 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
return C;
// canonicalize constant to RHS
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
if (areBitwiseNotOfEachother(N0, N1))
@@ -3048,8 +3048,8 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) {
return C;
// canonicalize constant to RHS
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(Opcode, DL, VT, N1, N0);
// fold vector ops
@@ -3306,8 +3306,8 @@ SDValue DAGCombiner::visitADDO(SDNode *N) {
DAG.getUNDEF(CarryVT));
// canonicalize constant to RHS.
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
// fold (addo x, 0) -> x + no carry out
@@ -4381,8 +4381,8 @@ SDValue DAGCombiner::visitMULFIX(SDNode *N) {
return DAG.getConstant(0, SDLoc(N), VT);
// Canonicalize constant to RHS (vector doesn't have to splat)
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
// fold (mulfix x, 0, scale) -> 0
@@ -4410,8 +4410,8 @@ template <class MatchContextClass> SDValue DAGCombiner::visitMUL(SDNode *N) {
return C;
// canonicalize constant to RHS (vector doesn't have to splat)
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return Matcher.getNode(ISD::MUL, DL, VT, N1, N0);
bool N1IsConst = false;
@@ -5156,8 +5156,8 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
return C;
// canonicalize constant to RHS.
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
if (VT.isVector()) {
@@ -5215,8 +5215,8 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
return C;
// canonicalize constant to RHS.
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
if (VT.isVector()) {
@@ -5293,8 +5293,8 @@ SDValue DAGCombiner::visitAVG(SDNode *N) {
return C;
// canonicalize constant to RHS.
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
if (VT.isVector())
@@ -5367,8 +5367,8 @@ SDValue DAGCombiner::visitABD(SDNode *N) {
return C;
// canonicalize constant to RHS.
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
if (VT.isVector())
@@ -5465,8 +5465,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
// canonicalize constant to RHS (vector doesn't have to splat)
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
// If the type is twice as wide is legal, transform the mulhu to a wider
@@ -5506,8 +5506,8 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
// canonicalize constant to RHS (vector doesn't have to splat)
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
// (umul_lohi N0, 0) -> (0, 0)
@@ -5570,8 +5570,8 @@ SDValue DAGCombiner::visitMULO(SDNode *N) {
}
// canonicalize constant to RHS.
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
// fold (mulo x, 0) -> 0 + no carry out
@@ -5784,8 +5784,8 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
return N0;
// canonicalize constant to RHS
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(Opcode, DL, VT, N1, N0);
// fold vector ops
@@ -7048,8 +7048,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return C;
// canonicalize constant to RHS
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(ISD::AND, DL, VT, N1, N0);
if (areBitwiseNotOfEachother(N0, N1))
@@ -7945,8 +7945,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return C;
// canonicalize constant to RHS
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(ISD::OR, DL, VT, N1, N0);
// fold vector ops
@@ -9501,8 +9501,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
return C;
// canonicalize constant to RHS
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
- !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1)))
return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
// fold vector ops
diff --git a/llvm/test/CodeGen/X86/avx2-arith.ll b/llvm/test/CodeGen/X86/avx2-arith.ll
index 90733dfb8465ef..44ab33ad67f272 100644
--- a/llvm/test/CodeGen/X86/avx2-arith.ll
+++ b/llvm/test/CodeGen/X86/avx2-arith.ll
@@ -122,7 +122,7 @@ define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
; CHECK-LABEL: mul_v32i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; CHECK-NEXT: vpand %ymm1, %ymm2, %ymm3
+; CHECK-NEXT: vpand %ymm2, %ymm1, %ymm3
; CHECK-NEXT: vpmaddubsw %ymm3, %ymm0, %ymm3
; CHECK-NEXT: vpand %ymm2, %ymm3, %ymm3
; CHECK-NEXT: vpandn %ymm1, %ymm2, %ymm1
diff --git a/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll b/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll
index 6fd3db3464decb..ee83a79b6dd550 100644
--- a/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll
+++ b/llvm/test/CodeGen/X86/midpoint-int-vec-128.ll
@@ -2369,8 +2369,8 @@ define <16 x i8> @vec128_i8_signed_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounwin
; SSE41-NEXT: psrlw $1, %xmm1
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
-; SSE41-NEXT: movdqa %xmm3, %xmm4
-; SSE41-NEXT: pand %xmm2, %xmm4
+; SSE41-NEXT: movdqa %xmm2, %xmm4
+; SSE41-NEXT: pand %xmm3, %xmm4
; SSE41-NEXT: movdqa %xmm1, %xmm5
; SSE41-NEXT: pmaddubsw %xmm4, %xmm5
; SSE41-NEXT: pand %xmm3, %xmm5
@@ -2391,7 +2391,7 @@ define <16 x i8> @vec128_i8_signed_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounwin
; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
-; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm4
+; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4
; AVX1-NEXT: vpand %xmm3, %xmm4, %xmm4
; AVX1-NEXT: vpandn %xmm2, %xmm3, %xmm2
@@ -2432,7 +2432,7 @@ define <16 x i8> @vec128_i8_signed_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounwin
; XOP-FALLBACK-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
; XOP-FALLBACK-NEXT: vpandn %xmm2, %xmm3, %xmm4
; XOP-FALLBACK-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4
-; XOP-FALLBACK-NEXT: vpand %xmm2, %xmm3, %xmm2
+; XOP-FALLBACK-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOP-FALLBACK-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1
; XOP-FALLBACK-NEXT: vpperm {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[2],xmm4[2],xmm1[4],xmm4[4],xmm1[6],xmm4[6],xmm1[8],xmm4[8],xmm1[10],xmm4[10],xmm1[12],xmm4[12],xmm1[14],xmm4[14]
; XOP-FALLBACK-NEXT: vpaddb %xmm0, %xmm1, %xmm0
@@ -2450,7 +2450,7 @@ define <16 x i8> @vec128_i8_signed_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounwin
; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
; XOPAVX1-NEXT: vpandn %xmm2, %xmm3, %xmm4
; XOPAVX1-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4
-; XOPAVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1
; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[2],xmm4[2],xmm1[4],xmm4[4],xmm1[6],xmm4[6],xmm1[8],xmm4[8],xmm1[10],xmm4[10],xmm1[12],xmm4[12],xmm1[14],xmm4[14]
; XOPAVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
@@ -2592,8 +2592,8 @@ define <16 x i8> @vec128_i8_unsigned_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounw
; SSE41-NEXT: psrlw $1, %xmm1
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
-; SSE41-NEXT: movdqa %xmm2, %xmm3
-; SSE41-NEXT: pand %xmm4, %xmm3
+; SSE41-NEXT: movdqa %xmm4, %xmm3
+; SSE41-NEXT: pand %xmm2, %xmm3
; SSE41-NEXT: movdqa %xmm1, %xmm5
; SSE41-NEXT: pmaddubsw %xmm3, %xmm5
; SSE41-NEXT: pand %xmm2, %xmm5
@@ -2616,7 +2616,7 @@ define <16 x i8> @vec128_i8_unsigned_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounw
; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
-; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
+; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm4
; AVX1-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4
; AVX1-NEXT: vpand %xmm2, %xmm4, %xmm4
; AVX1-NEXT: vpandn %xmm3, %xmm2, %xmm2
@@ -2659,7 +2659,7 @@ define <16 x i8> @vec128_i8_unsigned_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounw
; XOP-FALLBACK-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
; XOP-FALLBACK-NEXT: vpandn %xmm2, %xmm3, %xmm4
; XOP-FALLBACK-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4
-; XOP-FALLBACK-NEXT: vpand %xmm2, %xmm3, %xmm2
+; XOP-FALLBACK-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOP-FALLBACK-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1
; XOP-FALLBACK-NEXT: vpperm {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[2],xmm4[2],xmm1[4],xmm4[4],xmm1[6],xmm4[6],xmm1[8],xmm4[8],xmm1[10],xmm4[10],xmm1[12],xmm4[12],xmm1[14],xmm4[14]
; XOP-FALLBACK-NEXT: vpaddb %xmm0, %xmm1, %xmm0
@@ -2677,7 +2677,7 @@ define <16 x i8> @vec128_i8_unsigned_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounw
; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
; XOPAVX1-NEXT: vpandn %xmm2, %xmm3, %xmm4
; XOPAVX1-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4
-; XOPAVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1
; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[2],xmm4[2],xmm1[4],xmm4[4],xmm1[6],xmm4[6],xmm1[8],xmm4[8],xmm1[10],xmm4[10],xmm1[12],xmm4[12],xmm1[14],xmm4[14]
; XOPAVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
@@ -2823,8 +2823,8 @@ define <16 x i8> @vec128_i8_signed_mem_reg(ptr %a1_addr, <16 x i8> %a2) nounwind
; SSE41-NEXT: psrlw $1, %xmm0
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
-; SSE41-NEXT: movdqa %xmm3, %xmm4
-; SSE41-NEXT: pand %xmm2, %xmm4
+; SSE41-NEXT: movdqa %xmm2, %xmm4
+; SSE41-NEXT: pand %xmm3, %xmm4
; SSE41-NEXT: movdqa %xmm0, %xmm5
; SSE41-NEXT: pmaddubsw %xmm4, %xmm5
; SSE41-NEXT: pand %xmm3, %xmm5
@@ -2846,7 +2846,7 @@ define <16 x i8> @vec128_i8_signed_mem_reg(ptr %a1_addr, <16 x i8> %a2) nounwind
; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
-; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm4
+; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vpmaddubsw %xmm4, %xmm0, %xmm4
; AVX1-NEXT: vpand %xmm3, %xmm4, %xmm4
; AVX1-NEXT: vpandn %xmm2, %xmm3, %xmm2
@@ -2889,7 +2889,7 @@ define <16 x i8> @vec128_i8_signed_mem_reg(ptr %a1_addr, <16 x i8> %a2) nounwind
; XOP-FALLBACK-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
; XOP-FALLBACK-NEXT: vpandn %xmm2, %xmm3, %xmm4
; XOP-FALLBACK-NEXT: vpmaddubsw %xmm4, %xmm0, %xmm4
-; XOP-FALLBACK-NEXT: vpand %xmm2, %xmm3, %xmm2
+; XOP-FALLBACK-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOP-FALLBACK-NEXT: vpmaddubsw %xmm2, %xmm0, %xmm0
; XOP-FALLBACK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[2],xmm4[2],xmm0[4],xmm4[4],xmm0[6],xmm4[6],xmm0[8],xmm4[8],xmm0[10],xmm4[10],xmm0[12],xmm4[12],xmm0[14],xmm4[14]
; XOP-FALLBACK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
@@ -2908,7 +2908,7 @@ define <16 x i8> @vec128_i8_signed_mem_reg(ptr %a1_addr, <16 x i8> %a2) nounwind
; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
; XOPAVX1-NEXT: vpandn %xmm2, %xmm3, %xmm4
; XOPAVX1-NEXT: vpmaddubsw %xmm4, %xmm0, %xmm4
-; XOPAVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpmaddubsw %xmm2, %xmm0, %xmm0
; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[2],xmm4[2],xmm0[4],xmm4[4],xmm0[6],xmm4[6],xmm0[8],xmm4[8],xmm0[10],xmm4[10],xmm0[12],xmm4[12],xmm0[14],xmm4[14]
; XOPAVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
@@ -3054,8 +3054,8 @@ define <16 x i8> @vec128_i8_signed_reg_mem(<16 x i8> %a1, ptr %a2_addr) nounwind
; SSE41-NEXT: psrlw $1, %xmm1
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
-; SSE41-NEXT: movdqa %xmm3, %xmm4
-; SSE41-NEXT: pand %xmm2, %xmm4
+; SSE41-NEXT: movdqa %xmm2, %xmm4
+; SSE41-NEXT: pand %xmm3, %xmm4
; SSE41-NEXT: movdqa %xmm1, %xmm5
; SSE41-NEXT: pmaddubsw %xmm4, %xmm5
; SSE41-NEXT: pand %xmm3, %xmm5
@@ -3077,7 +3077,7 @@ define <16 x i8> @vec128_i8_signed_reg_mem(<16 x i8> %a1, ptr %a2_addr) nounwind
; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
-; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm4
+; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4
; AVX1-NEXT: vpand %xmm3, %xmm4, %xmm4
; AVX1-NEXT: vpandn %xmm2, %xmm3, %xmm2
@@ -3120,7 +3120,7 @@ define <16 x i8> @vec128_i8_signed_reg_mem(<16 x i8> %a1, ptr %a2_addr) nounwind
; XOP-FALLBACK-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
; XOP-FALLBACK-NEXT: vpandn %xmm2, %xmm3, %xmm4
; XOP-FALLBACK-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4
-; XOP-FALLBACK-NEXT: vpand %xmm2, %xmm3, %xmm2
+; XOP-FALLBACK-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOP-FALLBACK-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1
; XOP-FALLBACK-NEXT: vpperm {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[2],xmm4[2],xmm1[4],xmm4[4],xmm1[6],xmm4[6],xmm1[8],xmm4[8],xmm1[10],xmm4[10],xmm1[12],xmm4[12],xmm1[14],xmm4[14]
; XOP-FALLBACK-NEXT: vpaddb %xmm0, %xmm1, %xmm0
@@ -3139,7 +3139,7 @@ define <16 x i8> @vec128_i8_signed_reg_mem(<16 x i8> %a1, ptr %a2_addr) nounwind
; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
; XOPAVX1-NEXT: vpandn %xmm2, %xmm3, %xmm4
; XOPAVX1-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4
-; XOPAVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOPAVX1-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1
; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[2],xmm4[2],xmm1[4],xmm4[4],xmm1[6],xmm4[6],xmm1[8],xmm4[8],xmm1[10],xmm4[10],xmm1[12],xmm4[12],xmm1[14],xmm4[14]
; XOPAVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
@@ -3287,8 +3287,8 @@ define <16 x i8> @vec128_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind
; SSE41-NEXT: psrlw $1, %xmm0
; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
-; SSE41-NEXT: movdqa %xmm3, %xmm4
-; SSE41-NEXT: pand %xmm2, %xmm4
+; SSE41-NEXT: movdqa %xmm2, %xmm4
+; SSE41-NEXT: pand %xmm3, %xmm4
; SSE41-NEXT: movdqa %xmm0, %xmm5
; SSE41-NEXT: pmaddubsw %xmm4, %xmm5
; SSE41-NEXT: pand %xmm3, %xmm5
@@ -3311,7 +3311,7 @@ define <16 x i8> @vec128_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind
; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
-; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm4
+; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
; AVX1-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4
; AVX1-NEXT: vpand %xmm3, %xmm4, %xmm4
; AVX1-NEXT: vpandn %xmm2, %xmm3, %xmm2
@@ -3356,7 +3356,7 @@ define <16 x i8> @vec128_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind
; XOP-FALLBACK-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
; XOP-FALLBACK-NEXT: vpandn %xmm2, %xmm3, %xmm4
; XOP-FALLBACK-NEXT: vpmaddubsw %xmm4, %xmm1, %xmm4
-; XOP-FALLBACK-NEXT: vpand %xmm2, %xmm3, %xmm2
+; XOP-FALLBACK-NEXT: vpand %xmm3, %xmm2, %xmm2
; XOP-FALLBACK-NEXT: vpmaddubsw %xmm2, %xmm1, %xmm1
; XOP-FALLBACK-NEXT: vpperm {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[2],xmm4[2],xmm1[4],xmm4[4],xmm1[6],xmm4[6],xmm1[8],xmm4[8],xmm1[10],xmm4[10],xmm1[12],xmm4[12],xmm1[14],xmm4[14]
; XOP-FALLBACK-NEXT: vpaddb %xmm0, %xmm1, %xm...
[truncated]
|
| // canonicalize constant to RHS | ||
| if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && | ||
| !DAG.isConstantIntBuildVectorOrConstantInt(N1)) | ||
| if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N0)) && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we do it in isConstantIntBuildVectorOrConstantInt instead?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, although we would need to make it return bool instead of the SDNode (which is barely used tbh).
…mpt to constant fold Don't rely on isConstantIntBuildVectorOrConstantInt followed by getNode() will constant fold - FoldConstantArithmetic will do all of this for us. Cleanup for #112682
…mpt to constant fold Don't rely on isConstantFPBuildVectorOrConstantFP followed by getNode() will constant fold - FoldConstantArithmetic will do all of this for us. Cleanup for #112682
Alter both isConstantIntBuildVectorOrConstantInt + isConstantFPBuildVectorOrConstantFP to return a bool instead of the underlying SDNode, and adjust usage to account for this. Update isConstantIntBuildVectorOrConstantInt to peek though bitcasts when attempting to find a constant, in particular this improves canonicalization of constants to the RHS on commutable instructions. X86 is the beneficiary here as it often bitcasts rematerializable 0/-1 vector constants as vXi32 and bitcasts to the requested type Minor cleanup that helps with llvm#107423 Alternative to llvm#112682
…ttempt to constant fold Don't rely on isConstantFPBuildVectorOrConstantFP followed by getNode() will constant fold - FoldConstantArithmetic will do all of this for us. Cleanup for #112682
Don't rely on isConstantFPBuildVectorOrConstantFP followed by getNode() will constant fold - FoldConstantArithmetic will do all of this for us. Cleanup for #112682
|
closing this in preference for #112710 (the FoldConstantArithmetic cleanup is just as valid for that PR btw) |
X86 is the beneficiary here as it often bitcasts rematerializable 0/-1 vector constants as vXi32 and bitcasts to the requested type
Minor cleanup that helps with #107423