diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 12843e16d0da1..a04db101276bc 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -146,25 +146,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64}) .widenScalarToNextPow2(0) .clampScalar(0, s32, s64) - .clampMaxNumElements(0, s8, 16) - .clampMaxNumElements(0, s16, 8) + .clampNumElements(0, v8s8, v16s8) + .clampNumElements(0, v4s16, v8s16) .clampNumElements(0, v2s32, v4s32) .clampNumElements(0, v2s64, v2s64) - .minScalarOrEltIf( - [=](const LegalityQuery &Query) { - return Query.Types[0].getNumElements() <= 2; - }, - 0, s32) - .minScalarOrEltIf( - [=](const LegalityQuery &Query) { - return Query.Types[0].getNumElements() <= 4; - }, - 0, s16) - .minScalarOrEltIf( - [=](const LegalityQuery &Query) { - return Query.Types[0].getNumElements() <= 16; - }, - 0, s8) + .widenScalarOrEltToNextPow2OrMinSize(0, 8) .scalarizeIf(scalarOrEltWiderThan(0, 64), 0) .moreElementsToNextPow2(0); @@ -172,25 +158,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .legalFor({s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32, v2s64}) .widenScalarToNextPow2(0) .clampScalar(0, s32, s64) - .clampMaxNumElements(0, s8, 16) - .clampMaxNumElements(0, s16, 8) + .clampNumElements(0, v8s8, v16s8) + .clampNumElements(0, v4s16, v8s16) .clampNumElements(0, v2s32, v4s32) .clampNumElements(0, v2s64, v2s64) - .minScalarOrEltIf( - [=](const LegalityQuery &Query) { - return Query.Types[0].getNumElements() <= 2; - }, - 0, s32) - .minScalarOrEltIf( - [=](const LegalityQuery &Query) { - return Query.Types[0].getNumElements() <= 4; - }, - 0, s16) - .minScalarOrEltIf( - [=](const LegalityQuery &Query) { - return Query.Types[0].getNumElements() <= 16; - }, - 0, s8) + .widenScalarOrEltToNextPow2OrMinSize(0, 8) .scalarizeIf(scalarOrEltWiderThan(0, 64), 0) .moreElementsToNextPow2(0); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir index ceddb4bca7255..b061c2af1ec3b 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir @@ -331,8 +331,21 @@ body: | ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $d3 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<2 x s32>) = G_ICMP intpred(eq), [[COPY]](<2 x s32>), [[COPY1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<2 x s32>) = G_ICMP intpred(eq), [[COPY2]](<2 x s32>), [[COPY3]] - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<2 x s32>) = G_ADD [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $d0 = COPY [[ADD]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ICMP]](<2 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ICMP1]](<2 x s32>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC2]](s8), [[TRUNC3]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<8 x s8>) = G_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[ADD]](<8 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; CHECK-NEXT: $d0 = COPY [[BUILD_VECTOR2]](<2 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(<2 x s32>) = COPY $d0 %1:_(<2 x s32>) = COPY $d1 @@ -358,15 +371,11 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $b0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY $b1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY $b2 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY1]](s8) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY2]](s8) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT]](s16), [[ANYEXT1]](s16), [[ANYEXT2]](s16), [[DEF]](s16) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<4 x s16>) = G_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[ADD]](<4 x s16>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16) - ; CHECK-NEXT: $b0 = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[COPY]](s8), [[COPY1]](s8), [[COPY2]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<8 x s8>) = G_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[ADD]](<8 x s8>) + ; CHECK-NEXT: $b0 = COPY [[UV]](s8) ; CHECK-NEXT: RET_ReallyLR implicit $b0 %1:_(s8) = COPY $b0 %2:_(s8) = COPY $b1 @@ -395,8 +404,23 @@ body: | ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s16>) = COPY $d3 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s16>) = G_ICMP intpred(eq), [[COPY]](<4 x s16>), [[COPY1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x s16>) = G_ICMP intpred(eq), [[COPY2]](<4 x s16>), [[COPY3]] - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<4 x s16>) = G_ADD [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $d0 = COPY [[ADD]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[ICMP]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s16) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s16) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[ICMP1]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s16) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s16) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s16) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<8 x s8>) = G_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[ADD]](<8 x s8>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s16>), [[UV9:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>) + ; CHECK-NEXT: $d0 = COPY [[UV8]](<4 x s16>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(<4 x s16>) = COPY $d0 %1:_(<4 x s16>) = COPY $d1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-and.mir index fa1700ac4fc52..9fd9e83c7e7f1 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-and.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-and.mir @@ -216,8 +216,21 @@ body: | ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $d3 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<2 x s32>) = G_ICMP intpred(eq), [[COPY]](<2 x s32>), [[COPY1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<2 x s32>) = G_ICMP intpred(eq), [[COPY2]](<2 x s32>), [[COPY3]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $d0 = COPY [[AND]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ICMP]](<2 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ICMP1]](<2 x s32>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC2]](s8), [[TRUNC3]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<8 x s8>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[AND]](<8 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; CHECK-NEXT: $d0 = COPY [[BUILD_VECTOR2]](<2 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(<2 x s32>) = COPY $d0 %1:_(<2 x s32>) = COPY $d1 @@ -243,15 +256,11 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $b0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY $b1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY $b2 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY1]](s8) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY2]](s8) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT]](s16), [[ANYEXT1]](s16), [[ANYEXT2]](s16), [[DEF]](s16) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[AND]](<4 x s16>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16) - ; CHECK-NEXT: $b0 = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[COPY]](s8), [[COPY1]](s8), [[COPY2]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<8 x s8>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[AND]](<8 x s8>) + ; CHECK-NEXT: $b0 = COPY [[UV]](s8) ; CHECK-NEXT: RET_ReallyLR implicit $b0 %1:_(s8) = COPY $b0 %2:_(s8) = COPY $b1 @@ -280,8 +289,23 @@ body: | ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s16>) = COPY $d3 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s16>) = G_ICMP intpred(eq), [[COPY]](<4 x s16>), [[COPY1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x s16>) = G_ICMP intpred(eq), [[COPY2]](<4 x s16>), [[COPY3]] - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $d0 = COPY [[AND]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[ICMP]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s16) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s16) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[ICMP1]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s16) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s16) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s16) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<8 x s8>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[AND]](<8 x s8>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s16>), [[UV9:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>) + ; CHECK-NEXT: $d0 = COPY [[UV8]](<4 x s16>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(<4 x s16>) = COPY $d0 %1:_(<4 x s16>) = COPY $d1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir index 03c28efe7e09f..c87f219565156 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir @@ -192,12 +192,36 @@ body: | ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(<4 x s32>) = nnan ninf nsz arcp contract afn reassoc G_FCMP floatpred(ogt), [[COPY1]](<4 x s32>), [[COPY]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[FCMP1]](<4 x s32>) ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[TRUNC]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[FCMP]](<4 x s32>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC1]], [[FREEZE]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[AND]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FCMP]](<4 x s32>) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[FREEZE]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s16) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s16) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s16) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<8 x s8>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[AND]](<8 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT]](s16), [[ANYEXT1]](s16), [[DEF1]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[BUILD_VECTOR3]](<4 x s16>) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<2 x s32>), [[UV17:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[ANYEXT4]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT2]](s16), [[ANYEXT3]](s16), [[DEF1]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[BUILD_VECTOR4]](<4 x s16>) + ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(<2 x s32>), [[UV19:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[ANYEXT5]](<4 x s32>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV16]](<2 x s32>), [[UV18]](<2 x s32>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s32>) = G_AND [[ANYEXT]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s32>) = G_AND [[CONCAT_VECTORS]], [[BUILD_VECTOR5]] ; CHECK-NEXT: $q0 = COPY [[AND1]](<4 x s32>) %1:_(<4 x s32>) = COPY $q0 %2:_(<4 x s32>) = COPY $q1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir index b29670a89c8cf..dc683db6ab128 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir @@ -511,8 +511,21 @@ body: | ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $d3 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<2 x s32>) = G_ICMP intpred(eq), [[COPY]](<2 x s32>), [[COPY1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<2 x s32>) = G_ICMP intpred(eq), [[COPY2]](<2 x s32>), [[COPY3]] - ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s32>) = G_MUL [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $d0 = COPY [[MUL]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ICMP]](<2 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ICMP1]](<2 x s32>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC2]](s8), [[TRUNC3]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<8 x s8>) = G_MUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[MUL]](<8 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; CHECK-NEXT: $d0 = COPY [[BUILD_VECTOR2]](<2 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(<2 x s32>) = COPY $d0 %1:_(<2 x s32>) = COPY $d1 @@ -538,15 +551,11 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $b0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY $b1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY $b2 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY1]](s8) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY2]](s8) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT]](s16), [[ANYEXT1]](s16), [[ANYEXT2]](s16), [[DEF]](s16) - ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<4 x s16>) = G_MUL [[BUILD_VECTOR]], [[BUILD_VECTOR]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[MUL]](<4 x s16>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16) - ; CHECK-NEXT: $b0 = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[COPY]](s8), [[COPY1]](s8), [[COPY2]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<8 x s8>) = G_MUL [[BUILD_VECTOR]], [[BUILD_VECTOR]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[MUL]](<8 x s8>) + ; CHECK-NEXT: $b0 = COPY [[UV]](s8) ; CHECK-NEXT: RET_ReallyLR implicit $b0 %1:_(s8) = COPY $b0 %2:_(s8) = COPY $b1 @@ -575,8 +584,23 @@ body: | ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s16>) = COPY $d3 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s16>) = G_ICMP intpred(eq), [[COPY]](<4 x s16>), [[COPY1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x s16>) = G_ICMP intpred(eq), [[COPY2]](<4 x s16>), [[COPY3]] - ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<4 x s16>) = G_MUL [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $d0 = COPY [[MUL]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[ICMP]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s16) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s16) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[ICMP1]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s16) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s16) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s16) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<8 x s8>) = G_MUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[MUL]](<8 x s8>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s16>), [[UV9:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>) + ; CHECK-NEXT: $d0 = COPY [[UV8]](<4 x s16>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(<4 x s16>) = COPY $d0 %1:_(<4 x s16>) = COPY $d1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-or.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-or.mir index 7b3be3468b93a..fc6f2e71ae794 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-or.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-or.mir @@ -160,8 +160,21 @@ body: | ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $d3 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<2 x s32>) = G_ICMP intpred(eq), [[COPY]](<2 x s32>), [[COPY1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<2 x s32>) = G_ICMP intpred(eq), [[COPY2]](<2 x s32>), [[COPY3]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $d0 = COPY [[OR]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ICMP]](<2 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ICMP1]](<2 x s32>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC2]](s8), [[TRUNC3]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<8 x s8>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[OR]](<8 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; CHECK-NEXT: $d0 = COPY [[BUILD_VECTOR2]](<2 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(<2 x s32>) = COPY $d0 %1:_(<2 x s32>) = COPY $d1 @@ -187,19 +200,12 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $b0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY $b1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY $b2 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY1]](s8) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY2]](s8) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT]](s16), [[ANYEXT1]](s16), [[ANYEXT2]](s16), [[DEF]](s16) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY1]](s8) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY2]](s8) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT3]](s16), [[ANYEXT4]](s16), [[ANYEXT5]](s16), [[DEF]](s16) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[OR]](<4 x s16>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16) - ; CHECK-NEXT: $b0 = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[COPY]](s8), [[COPY1]](s8), [[COPY2]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[COPY]](s8), [[COPY1]](s8), [[COPY2]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<8 x s8>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[OR]](<8 x s8>) + ; CHECK-NEXT: $b0 = COPY [[UV]](s8) ; CHECK-NEXT: RET_ReallyLR implicit $b0 %1:_(s8) = COPY $b0 %2:_(s8) = COPY $b1 @@ -228,8 +234,23 @@ body: | ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s16>) = COPY $d3 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s16>) = G_ICMP intpred(eq), [[COPY]](<4 x s16>), [[COPY1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x s16>) = G_ICMP intpred(eq), [[COPY2]](<4 x s16>), [[COPY3]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $d0 = COPY [[OR]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[ICMP]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s16) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s16) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[ICMP1]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s16) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s16) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s16) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<8 x s8>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[OR]](<8 x s8>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s16>), [[UV9:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>) + ; CHECK-NEXT: $d0 = COPY [[UV8]](<4 x s16>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(<4 x s16>) = COPY $d0 %1:_(<4 x s16>) = COPY $d1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir index 52a28ad37e362..3ba920df069eb 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir @@ -308,23 +308,39 @@ body: | ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s8>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR]](<8 x s8>), [[BUILD_VECTOR1]], shufflemask(0, 0, 0, 0, undef, undef, undef, undef) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[SHUF]](<8 x s8>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s16>), [[UV9:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>) ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR2]](<8 x s8>) - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<4 x s16>), [[UV11:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT1]](<8 x s16>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[UV8]], [[UV10]] - ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[SHUF]](<8 x s8>) - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<4 x s16>), [[UV13:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT2]](<8 x s16>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC9]], [[UV12]] - ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC10]], [[XOR]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[AND]], [[AND1]] - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[OR]](<4 x s16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<8 x s8>) = G_XOR [[SHUF]], [[BUILD_VECTOR2]] + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ICMP]](<4 x s32>) + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[UV8]](s32) + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[UV9]](s32) + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[UV10]](s32) + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[UV11]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8), [[TRUNC12]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<8 x s8>) = G_AND [[BUILD_VECTOR3]], [[SHUF]] + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ICMP1]](<4 x s32>) + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[UV12]](s32) + ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[UV13]](s32) + ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[UV14]](s32) + ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(s8) = G_TRUNC [[UV15]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8), [[TRUNC16]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<8 x s8>) = G_AND [[BUILD_VECTOR4]], [[XOR]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<8 x s8>) = G_OR [[AND]], [[AND1]] + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8), [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[OR]](<8 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV16]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV17]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV18]](s8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV19]](s8) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT]](s16), [[ANYEXT1]](s16), [[DEF2]](s16), [[DEF2]](s16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[BUILD_VECTOR5]](<4 x s16>) + ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(<2 x s32>), [[UV25:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[ANYEXT4]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT2]](s16), [[ANYEXT3]](s16), [[DEF2]](s16), [[DEF2]](s16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[BUILD_VECTOR6]](<4 x s16>) + ; CHECK-NEXT: [[UV26:%[0-9]+]]:_(<2 x s32>), [[UV27:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[ANYEXT5]](<4 x s32>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV24]](<2 x s32>), [[UV26]](<2 x s32>) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32) - ; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_AND [[ANYEXT3]], [[BUILD_VECTOR3]] + ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32) + ; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_AND [[CONCAT_VECTORS]], [[BUILD_VECTOR7]] ; CHECK-NEXT: $q0 = COPY %zext_select(<4 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %w0:_(s32) = COPY $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sub.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sub.mir index 205c32f6971ac..776753738daea 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sub.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-sub.mir @@ -131,8 +131,21 @@ body: | ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $d3 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<2 x s32>) = G_ICMP intpred(eq), [[COPY]](<2 x s32>), [[COPY1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<2 x s32>) = G_ICMP intpred(eq), [[COPY2]](<2 x s32>), [[COPY3]] - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<2 x s32>) = G_SUB [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $d0 = COPY [[SUB]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ICMP]](<2 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ICMP1]](<2 x s32>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC2]](s8), [[TRUNC3]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<8 x s8>) = G_SUB [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[SUB]](<8 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; CHECK-NEXT: $d0 = COPY [[BUILD_VECTOR2]](<2 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(<2 x s32>) = COPY $d0 %1:_(<2 x s32>) = COPY $d1 @@ -158,15 +171,11 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $b0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY $b1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY $b2 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY1]](s8) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY2]](s8) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT]](s16), [[ANYEXT1]](s16), [[ANYEXT2]](s16), [[DEF]](s16) - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<4 x s16>) = G_SUB [[BUILD_VECTOR]], [[BUILD_VECTOR]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[SUB]](<4 x s16>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16) - ; CHECK-NEXT: $b0 = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[COPY]](s8), [[COPY1]](s8), [[COPY2]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<8 x s8>) = G_SUB [[BUILD_VECTOR]], [[BUILD_VECTOR]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[SUB]](<8 x s8>) + ; CHECK-NEXT: $b0 = COPY [[UV]](s8) ; CHECK-NEXT: RET_ReallyLR implicit $b0 %1:_(s8) = COPY $b0 %2:_(s8) = COPY $b1 @@ -195,8 +204,23 @@ body: | ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s16>) = COPY $d3 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s16>) = G_ICMP intpred(eq), [[COPY]](<4 x s16>), [[COPY1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x s16>) = G_ICMP intpred(eq), [[COPY2]](<4 x s16>), [[COPY3]] - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<4 x s16>) = G_SUB [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $d0 = COPY [[SUB]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[ICMP]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s16) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s16) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[ICMP1]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s16) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s16) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s16) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<8 x s8>) = G_SUB [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[SUB]](<8 x s8>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s16>), [[UV9:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>) + ; CHECK-NEXT: $d0 = COPY [[UV8]](<4 x s16>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(<4 x s16>) = COPY $d0 %1:_(<4 x s16>) = COPY $d1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-threeway-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-threeway-cmp.mir index ae16e40671785..bc1afa93fd5c2 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-threeway-cmp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-threeway-cmp.mir @@ -63,11 +63,34 @@ body: | ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(ugt), [[BUILD_VECTOR]](<4 x s32>), [[BUILD_VECTOR1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(ult), [[BUILD_VECTOR]](<4 x s32>), [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>) - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<4 x s16>) = G_SUB [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[SUB]](<4 x s16>) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(<4 x s32>) = G_SEXT_INREG [[ANYEXT]], 2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ICMP1]](<4 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ICMP]](<4 x s32>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<8 x s8>) = G_SUB [[BUILD_VECTOR2]], [[BUILD_VECTOR3]] + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[SUB]](<8 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT]](s16), [[ANYEXT1]](s16), [[DEF1]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[BUILD_VECTOR4]](<4 x s16>) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<2 x s32>), [[UV17:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[ANYEXT4]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT2]](s16), [[ANYEXT3]](s16), [[DEF1]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[BUILD_VECTOR5]](<4 x s16>) + ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(<2 x s32>), [[UV19:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[ANYEXT5]](<4 x s32>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV16]](<2 x s32>), [[UV18]](<2 x s32>) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(<4 x s32>) = G_SEXT_INREG [[CONCAT_VECTORS]], 2 ; CHECK-NEXT: $q0 = COPY [[SEXT_INREG]](<4 x s32>) %0:_(s32) = COPY $w0 %1:_(s32) = COPY $w1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xor.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xor.mir index 9c528623eca23..7a6dbd5525156 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xor.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xor.mir @@ -203,8 +203,21 @@ body: | ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $d3 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<2 x s32>) = G_ICMP intpred(eq), [[COPY]](<2 x s32>), [[COPY1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<2 x s32>) = G_ICMP intpred(eq), [[COPY2]](<2 x s32>), [[COPY3]] - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $d0 = COPY [[XOR]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ICMP]](<2 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ICMP1]](<2 x s32>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC2]](s8), [[TRUNC3]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<8 x s8>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[XOR]](<8 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; CHECK-NEXT: $d0 = COPY [[BUILD_VECTOR2]](<2 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(<2 x s32>) = COPY $d0 %1:_(<2 x s32>) = COPY $d1 @@ -230,15 +243,11 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $b0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY $b1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY $b2 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY1]](s8) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY2]](s8) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT]](s16), [[ANYEXT1]](s16), [[ANYEXT2]](s16), [[DEF]](s16) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[XOR]](<4 x s16>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16) - ; CHECK-NEXT: $b0 = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[COPY]](s8), [[COPY1]](s8), [[COPY2]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<8 x s8>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[XOR]](<8 x s8>) + ; CHECK-NEXT: $b0 = COPY [[UV]](s8) ; CHECK-NEXT: RET_ReallyLR implicit $b0 %1:_(s8) = COPY $b0 %2:_(s8) = COPY $b1 @@ -267,8 +276,23 @@ body: | ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s16>) = COPY $d3 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s16>) = G_ICMP intpred(eq), [[COPY]](<4 x s16>), [[COPY1]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x s16>) = G_ICMP intpred(eq), [[COPY2]](<4 x s16>), [[COPY3]] - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $d0 = COPY [[XOR]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[ICMP]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s16) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s16) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[ICMP1]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s16) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s16) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s16) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<8 x s8>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[XOR]](<8 x s8>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s16>), [[UV9:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>) + ; CHECK-NEXT: $d0 = COPY [[UV8]](<4 x s16>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(<4 x s16>) = COPY $d0 %1:_(<4 x s16>) = COPY $d1 diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll index be79135c8b831..1382ae5ec593d 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll @@ -184,13 +184,15 @@ define <2 x i16> @dupsext_v2i8_v2i16(i8 %src, <2 x i8> %b) { ; ; CHECK-GI-LABEL: dupsext_v2i8_v2i16: ; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h ; CHECK-GI-NEXT: lsl w8, w0, #8 -; CHECK-GI-NEXT: shl v0.2s, v0.2s, #24 ; CHECK-GI-NEXT: sbfx w8, w8, #8, #8 -; CHECK-GI-NEXT: sshr v0.2s, v0.2s, #24 +; CHECK-GI-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-GI-NEXT: dup v1.4h, w8 -; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-GI-NEXT: mul v0.2s, v1.2s, v0.2s +; CHECK-GI-NEXT: sshr v0.4h, v0.4h, #8 +; CHECK-GI-NEXT: mul v0.4h, v1.4h, v0.4h +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret entry: %in = sext i8 %src to i16 diff --git a/llvm/test/CodeGen/AArch64/add.ll b/llvm/test/CodeGen/AArch64/add.ll index d5bd1b712a2a6..0b86a7c3038a2 100644 --- a/llvm/test/CodeGen/AArch64/add.ll +++ b/llvm/test/CodeGen/AArch64/add.ll @@ -70,16 +70,15 @@ define void @v2i8(ptr %p1, ptr %p2) { ; ; CHECK-GI-LABEL: v2i8: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ld1 { v0.b }[0], [x0] -; CHECK-GI-NEXT: ld1 { v1.b }[0], [x1] -; CHECK-GI-NEXT: ldr b2, [x0, #1] -; CHECK-GI-NEXT: ldr b3, [x1, #1] -; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] -; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] -; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: str b0, [x0] -; CHECK-GI-NEXT: str b1, [x0, #1] +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x0, #1 +; CHECK-GI-NEXT: add x9, x1, #1 +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ld1 { v1.b }[1], [x9] +; CHECK-GI-NEXT: add v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: st1 { v0.b }[0], [x0] +; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] ; CHECK-GI-NEXT: ret entry: %d = load <2 x i8>, ptr %p1 @@ -110,24 +109,20 @@ define void @v3i8(ptr %p1, ptr %p2) { ; ; CHECK-GI-LABEL: v3i8: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ldrb w8, [x0] -; CHECK-GI-NEXT: ldrb w9, [x1] -; CHECK-GI-NEXT: ldrb w10, [x0, #1] -; CHECK-GI-NEXT: ldrb w11, [x1, #1] -; CHECK-GI-NEXT: fmov s0, w8 -; CHECK-GI-NEXT: fmov s1, w9 -; CHECK-GI-NEXT: ldrb w8, [x0, #2] -; CHECK-GI-NEXT: ldrb w9, [x1, #2] -; CHECK-GI-NEXT: mov v0.h[1], w10 -; CHECK-GI-NEXT: mov v1.h[1], w11 -; CHECK-GI-NEXT: mov v0.h[2], w8 -; CHECK-GI-NEXT: mov v1.h[2], w9 -; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov h2, v0.h[2] -; CHECK-GI-NEXT: str b0, [x0] -; CHECK-GI-NEXT: str b1, [x0, #1] -; CHECK-GI-NEXT: str b2, [x0, #2] +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x0, #1 +; CHECK-GI-NEXT: add x9, x1, #1 +; CHECK-GI-NEXT: add x10, x1, #2 +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ld1 { v1.b }[1], [x9] +; CHECK-GI-NEXT: add x9, x0, #2 +; CHECK-GI-NEXT: ld1 { v0.b }[2], [x9] +; CHECK-GI-NEXT: ld1 { v1.b }[2], [x10] +; CHECK-GI-NEXT: add v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: st1 { v0.b }[0], [x0] +; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: st1 { v0.b }[2], [x9] ; CHECK-GI-NEXT: ret entry: %d = load <3 x i8>, ptr %p1 @@ -154,25 +149,20 @@ define void @v4i8(ptr %p1, ptr %p2) { ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: fmov s1, w9 ; CHECK-GI-NEXT: mov b2, v0.b[1] -; CHECK-GI-NEXT: mov b3, v1.b[1] -; CHECK-GI-NEXT: mov b4, v0.b[2] -; CHECK-GI-NEXT: mov b5, v0.b[3] -; CHECK-GI-NEXT: fmov w8, s2 -; CHECK-GI-NEXT: mov b2, v1.b[2] -; CHECK-GI-NEXT: fmov w9, s3 -; CHECK-GI-NEXT: mov b3, v1.b[3] -; CHECK-GI-NEXT: mov v0.h[1], w8 -; CHECK-GI-NEXT: mov v1.h[1], w9 -; CHECK-GI-NEXT: fmov w8, s4 -; CHECK-GI-NEXT: fmov w9, s2 -; CHECK-GI-NEXT: mov v0.h[2], w8 -; CHECK-GI-NEXT: mov v1.h[2], w9 -; CHECK-GI-NEXT: fmov w8, s5 -; CHECK-GI-NEXT: fmov w9, s3 -; CHECK-GI-NEXT: mov v0.h[3], w8 -; CHECK-GI-NEXT: mov v1.h[3], w9 -; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-GI-NEXT: mov v3.b[0], v0.b[0] +; CHECK-GI-NEXT: mov b4, v1.b[1] +; CHECK-GI-NEXT: mov v5.b[0], v1.b[0] +; CHECK-GI-NEXT: mov v3.b[1], v2.b[0] +; CHECK-GI-NEXT: mov b2, v0.b[2] +; CHECK-GI-NEXT: mov b0, v0.b[3] +; CHECK-GI-NEXT: mov v5.b[1], v4.b[0] +; CHECK-GI-NEXT: mov b4, v1.b[2] +; CHECK-GI-NEXT: mov b1, v1.b[3] +; CHECK-GI-NEXT: mov v3.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v5.b[2], v4.b[0] +; CHECK-GI-NEXT: mov v3.b[3], v0.b[0] +; CHECK-GI-NEXT: mov v5.b[3], v1.b[0] +; CHECK-GI-NEXT: add v0.8b, v3.8b, v5.8b ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: str w8, [x0] ; CHECK-GI-NEXT: ret @@ -238,16 +228,15 @@ define void @v2i16(ptr %p1, ptr %p2) { ; ; CHECK-GI-LABEL: v2i16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ld1 { v0.h }[0], [x0] -; CHECK-GI-NEXT: ld1 { v1.h }[0], [x1] -; CHECK-GI-NEXT: ldr h2, [x0, #2] -; CHECK-GI-NEXT: ldr h3, [x1, #2] -; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] -; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] -; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s -; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: ldr h0, [x0] +; CHECK-GI-NEXT: ldr h1, [x1] +; CHECK-GI-NEXT: add x8, x0, #2 +; CHECK-GI-NEXT: add x9, x1, #2 +; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8] +; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9] +; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h ; CHECK-GI-NEXT: str h0, [x0] -; CHECK-GI-NEXT: str h1, [x0, #2] +; CHECK-GI-NEXT: st1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ret entry: %d = load <2 x i16>, ptr %p1 diff --git a/llvm/test/CodeGen/AArch64/andorxor.ll b/llvm/test/CodeGen/AArch64/andorxor.ll index f7df1092287bd..9fc9d902bf286 100644 --- a/llvm/test/CodeGen/AArch64/andorxor.ll +++ b/llvm/test/CodeGen/AArch64/andorxor.ll @@ -190,16 +190,15 @@ define void @and_v2i8(ptr %p1, ptr %p2) { ; ; CHECK-GI-LABEL: and_v2i8: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ld1 { v0.b }[0], [x0] -; CHECK-GI-NEXT: ld1 { v1.b }[0], [x1] -; CHECK-GI-NEXT: ldr b2, [x0, #1] -; CHECK-GI-NEXT: ldr b3, [x1, #1] -; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] -; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x0, #1 +; CHECK-GI-NEXT: add x9, x1, #1 +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ld1 { v1.b }[1], [x9] ; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: str b0, [x0] -; CHECK-GI-NEXT: str b1, [x0, #1] +; CHECK-GI-NEXT: st1 { v0.b }[0], [x0] +; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] ; CHECK-GI-NEXT: ret entry: %d = load <2 x i8>, ptr %p1 @@ -226,16 +225,15 @@ define void @or_v2i8(ptr %p1, ptr %p2) { ; ; CHECK-GI-LABEL: or_v2i8: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ld1 { v0.b }[0], [x0] -; CHECK-GI-NEXT: ld1 { v1.b }[0], [x1] -; CHECK-GI-NEXT: ldr b2, [x0, #1] -; CHECK-GI-NEXT: ldr b3, [x1, #1] -; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] -; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x0, #1 +; CHECK-GI-NEXT: add x9, x1, #1 +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ld1 { v1.b }[1], [x9] ; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: str b0, [x0] -; CHECK-GI-NEXT: str b1, [x0, #1] +; CHECK-GI-NEXT: st1 { v0.b }[0], [x0] +; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] ; CHECK-GI-NEXT: ret entry: %d = load <2 x i8>, ptr %p1 @@ -262,16 +260,15 @@ define void @xor_v2i8(ptr %p1, ptr %p2) { ; ; CHECK-GI-LABEL: xor_v2i8: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ld1 { v0.b }[0], [x0] -; CHECK-GI-NEXT: ld1 { v1.b }[0], [x1] -; CHECK-GI-NEXT: ldr b2, [x0, #1] -; CHECK-GI-NEXT: ldr b3, [x1, #1] -; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] -; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x0, #1 +; CHECK-GI-NEXT: add x9, x1, #1 +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ld1 { v1.b }[1], [x9] ; CHECK-GI-NEXT: eor v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: str b0, [x0] -; CHECK-GI-NEXT: str b1, [x0, #1] +; CHECK-GI-NEXT: st1 { v0.b }[0], [x0] +; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] ; CHECK-GI-NEXT: ret entry: %d = load <2 x i8>, ptr %p1 @@ -302,24 +299,20 @@ define void @and_v3i8(ptr %p1, ptr %p2) { ; ; CHECK-GI-LABEL: and_v3i8: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ldrb w8, [x0] -; CHECK-GI-NEXT: ldrb w9, [x1] -; CHECK-GI-NEXT: ldrb w10, [x0, #1] -; CHECK-GI-NEXT: ldrb w11, [x1, #1] -; CHECK-GI-NEXT: fmov s0, w8 -; CHECK-GI-NEXT: fmov s1, w9 -; CHECK-GI-NEXT: ldrb w8, [x0, #2] -; CHECK-GI-NEXT: ldrb w9, [x1, #2] -; CHECK-GI-NEXT: mov v0.h[1], w10 -; CHECK-GI-NEXT: mov v1.h[1], w11 -; CHECK-GI-NEXT: mov v0.h[2], w8 -; CHECK-GI-NEXT: mov v1.h[2], w9 +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x0, #1 +; CHECK-GI-NEXT: add x9, x1, #1 +; CHECK-GI-NEXT: add x10, x1, #2 +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ld1 { v1.b }[1], [x9] +; CHECK-GI-NEXT: add x9, x0, #2 +; CHECK-GI-NEXT: ld1 { v0.b }[2], [x9] +; CHECK-GI-NEXT: ld1 { v1.b }[2], [x10] ; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov h2, v0.h[2] -; CHECK-GI-NEXT: str b0, [x0] -; CHECK-GI-NEXT: str b1, [x0, #1] -; CHECK-GI-NEXT: str b2, [x0, #2] +; CHECK-GI-NEXT: st1 { v0.b }[0], [x0] +; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: st1 { v0.b }[2], [x9] ; CHECK-GI-NEXT: ret entry: %d = load <3 x i8>, ptr %p1 @@ -350,24 +343,20 @@ define void @or_v3i8(ptr %p1, ptr %p2) { ; ; CHECK-GI-LABEL: or_v3i8: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ldrb w8, [x0] -; CHECK-GI-NEXT: ldrb w9, [x1] -; CHECK-GI-NEXT: ldrb w10, [x0, #1] -; CHECK-GI-NEXT: ldrb w11, [x1, #1] -; CHECK-GI-NEXT: fmov s0, w8 -; CHECK-GI-NEXT: fmov s1, w9 -; CHECK-GI-NEXT: ldrb w8, [x0, #2] -; CHECK-GI-NEXT: ldrb w9, [x1, #2] -; CHECK-GI-NEXT: mov v0.h[1], w10 -; CHECK-GI-NEXT: mov v1.h[1], w11 -; CHECK-GI-NEXT: mov v0.h[2], w8 -; CHECK-GI-NEXT: mov v1.h[2], w9 +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x0, #1 +; CHECK-GI-NEXT: add x9, x1, #1 +; CHECK-GI-NEXT: add x10, x1, #2 +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ld1 { v1.b }[1], [x9] +; CHECK-GI-NEXT: add x9, x0, #2 +; CHECK-GI-NEXT: ld1 { v0.b }[2], [x9] +; CHECK-GI-NEXT: ld1 { v1.b }[2], [x10] ; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov h2, v0.h[2] -; CHECK-GI-NEXT: str b0, [x0] -; CHECK-GI-NEXT: str b1, [x0, #1] -; CHECK-GI-NEXT: str b2, [x0, #2] +; CHECK-GI-NEXT: st1 { v0.b }[0], [x0] +; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: st1 { v0.b }[2], [x9] ; CHECK-GI-NEXT: ret entry: %d = load <3 x i8>, ptr %p1 @@ -398,24 +387,20 @@ define void @xor_v3i8(ptr %p1, ptr %p2) { ; ; CHECK-GI-LABEL: xor_v3i8: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ldrb w8, [x0] -; CHECK-GI-NEXT: ldrb w9, [x1] -; CHECK-GI-NEXT: ldrb w10, [x0, #1] -; CHECK-GI-NEXT: ldrb w11, [x1, #1] -; CHECK-GI-NEXT: fmov s0, w8 -; CHECK-GI-NEXT: fmov s1, w9 -; CHECK-GI-NEXT: ldrb w8, [x0, #2] -; CHECK-GI-NEXT: ldrb w9, [x1, #2] -; CHECK-GI-NEXT: mov v0.h[1], w10 -; CHECK-GI-NEXT: mov v1.h[1], w11 -; CHECK-GI-NEXT: mov v0.h[2], w8 -; CHECK-GI-NEXT: mov v1.h[2], w9 +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x0, #1 +; CHECK-GI-NEXT: add x9, x1, #1 +; CHECK-GI-NEXT: add x10, x1, #2 +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ld1 { v1.b }[1], [x9] +; CHECK-GI-NEXT: add x9, x0, #2 +; CHECK-GI-NEXT: ld1 { v0.b }[2], [x9] +; CHECK-GI-NEXT: ld1 { v1.b }[2], [x10] ; CHECK-GI-NEXT: eor v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov h2, v0.h[2] -; CHECK-GI-NEXT: str b0, [x0] -; CHECK-GI-NEXT: str b1, [x0, #1] -; CHECK-GI-NEXT: str b2, [x0, #2] +; CHECK-GI-NEXT: st1 { v0.b }[0], [x0] +; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: st1 { v0.b }[2], [x9] ; CHECK-GI-NEXT: ret entry: %d = load <3 x i8>, ptr %p1 @@ -444,25 +429,20 @@ define void @and_v4i8(ptr %p1, ptr %p2) { ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: fmov s1, w9 ; CHECK-GI-NEXT: mov b2, v0.b[1] -; CHECK-GI-NEXT: mov b3, v1.b[1] -; CHECK-GI-NEXT: mov b4, v0.b[2] -; CHECK-GI-NEXT: mov b5, v0.b[3] -; CHECK-GI-NEXT: fmov w8, s2 -; CHECK-GI-NEXT: mov b2, v1.b[2] -; CHECK-GI-NEXT: fmov w9, s3 -; CHECK-GI-NEXT: mov b3, v1.b[3] -; CHECK-GI-NEXT: mov v0.h[1], w8 -; CHECK-GI-NEXT: mov v1.h[1], w9 -; CHECK-GI-NEXT: fmov w8, s4 -; CHECK-GI-NEXT: fmov w9, s2 -; CHECK-GI-NEXT: mov v0.h[2], w8 -; CHECK-GI-NEXT: mov v1.h[2], w9 -; CHECK-GI-NEXT: fmov w8, s5 -; CHECK-GI-NEXT: fmov w9, s3 -; CHECK-GI-NEXT: mov v0.h[3], w8 -; CHECK-GI-NEXT: mov v1.h[3], w9 -; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-GI-NEXT: mov v3.b[0], v0.b[0] +; CHECK-GI-NEXT: mov b4, v1.b[1] +; CHECK-GI-NEXT: mov v5.b[0], v1.b[0] +; CHECK-GI-NEXT: mov v3.b[1], v2.b[0] +; CHECK-GI-NEXT: mov b2, v0.b[2] +; CHECK-GI-NEXT: mov b0, v0.b[3] +; CHECK-GI-NEXT: mov v5.b[1], v4.b[0] +; CHECK-GI-NEXT: mov b4, v1.b[2] +; CHECK-GI-NEXT: mov b1, v1.b[3] +; CHECK-GI-NEXT: mov v3.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v5.b[2], v4.b[0] +; CHECK-GI-NEXT: mov v3.b[3], v0.b[0] +; CHECK-GI-NEXT: mov v5.b[3], v1.b[0] +; CHECK-GI-NEXT: and v0.8b, v3.8b, v5.8b ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: str w8, [x0] ; CHECK-GI-NEXT: ret @@ -493,25 +473,20 @@ define void @or_v4i8(ptr %p1, ptr %p2) { ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: fmov s1, w9 ; CHECK-GI-NEXT: mov b2, v0.b[1] -; CHECK-GI-NEXT: mov b3, v1.b[1] -; CHECK-GI-NEXT: mov b4, v0.b[2] -; CHECK-GI-NEXT: mov b5, v0.b[3] -; CHECK-GI-NEXT: fmov w8, s2 -; CHECK-GI-NEXT: mov b2, v1.b[2] -; CHECK-GI-NEXT: fmov w9, s3 -; CHECK-GI-NEXT: mov b3, v1.b[3] -; CHECK-GI-NEXT: mov v0.h[1], w8 -; CHECK-GI-NEXT: mov v1.h[1], w9 -; CHECK-GI-NEXT: fmov w8, s4 -; CHECK-GI-NEXT: fmov w9, s2 -; CHECK-GI-NEXT: mov v0.h[2], w8 -; CHECK-GI-NEXT: mov v1.h[2], w9 -; CHECK-GI-NEXT: fmov w8, s5 -; CHECK-GI-NEXT: fmov w9, s3 -; CHECK-GI-NEXT: mov v0.h[3], w8 -; CHECK-GI-NEXT: mov v1.h[3], w9 -; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-GI-NEXT: mov v3.b[0], v0.b[0] +; CHECK-GI-NEXT: mov b4, v1.b[1] +; CHECK-GI-NEXT: mov v5.b[0], v1.b[0] +; CHECK-GI-NEXT: mov v3.b[1], v2.b[0] +; CHECK-GI-NEXT: mov b2, v0.b[2] +; CHECK-GI-NEXT: mov b0, v0.b[3] +; CHECK-GI-NEXT: mov v5.b[1], v4.b[0] +; CHECK-GI-NEXT: mov b4, v1.b[2] +; CHECK-GI-NEXT: mov b1, v1.b[3] +; CHECK-GI-NEXT: mov v3.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v5.b[2], v4.b[0] +; CHECK-GI-NEXT: mov v3.b[3], v0.b[0] +; CHECK-GI-NEXT: mov v5.b[3], v1.b[0] +; CHECK-GI-NEXT: orr v0.8b, v3.8b, v5.8b ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: str w8, [x0] ; CHECK-GI-NEXT: ret @@ -542,25 +517,20 @@ define void @xor_v4i8(ptr %p1, ptr %p2) { ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: fmov s1, w9 ; CHECK-GI-NEXT: mov b2, v0.b[1] -; CHECK-GI-NEXT: mov b3, v1.b[1] -; CHECK-GI-NEXT: mov b4, v0.b[2] -; CHECK-GI-NEXT: mov b5, v0.b[3] -; CHECK-GI-NEXT: fmov w8, s2 -; CHECK-GI-NEXT: mov b2, v1.b[2] -; CHECK-GI-NEXT: fmov w9, s3 -; CHECK-GI-NEXT: mov b3, v1.b[3] -; CHECK-GI-NEXT: mov v0.h[1], w8 -; CHECK-GI-NEXT: mov v1.h[1], w9 -; CHECK-GI-NEXT: fmov w8, s4 -; CHECK-GI-NEXT: fmov w9, s2 -; CHECK-GI-NEXT: mov v0.h[2], w8 -; CHECK-GI-NEXT: mov v1.h[2], w9 -; CHECK-GI-NEXT: fmov w8, s5 -; CHECK-GI-NEXT: fmov w9, s3 -; CHECK-GI-NEXT: mov v0.h[3], w8 -; CHECK-GI-NEXT: mov v1.h[3], w9 -; CHECK-GI-NEXT: eor v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-GI-NEXT: mov v3.b[0], v0.b[0] +; CHECK-GI-NEXT: mov b4, v1.b[1] +; CHECK-GI-NEXT: mov v5.b[0], v1.b[0] +; CHECK-GI-NEXT: mov v3.b[1], v2.b[0] +; CHECK-GI-NEXT: mov b2, v0.b[2] +; CHECK-GI-NEXT: mov b0, v0.b[3] +; CHECK-GI-NEXT: mov v5.b[1], v4.b[0] +; CHECK-GI-NEXT: mov b4, v1.b[2] +; CHECK-GI-NEXT: mov b1, v1.b[3] +; CHECK-GI-NEXT: mov v3.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v5.b[2], v4.b[0] +; CHECK-GI-NEXT: mov v3.b[3], v0.b[0] +; CHECK-GI-NEXT: mov v5.b[3], v1.b[0] +; CHECK-GI-NEXT: eor v0.8b, v3.8b, v5.8b ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: str w8, [x0] ; CHECK-GI-NEXT: ret @@ -700,16 +670,15 @@ define void @and_v2i16(ptr %p1, ptr %p2) { ; ; CHECK-GI-LABEL: and_v2i16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ld1 { v0.h }[0], [x0] -; CHECK-GI-NEXT: ld1 { v1.h }[0], [x1] -; CHECK-GI-NEXT: ldr h2, [x0, #2] -; CHECK-GI-NEXT: ldr h3, [x1, #2] -; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] -; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] +; CHECK-GI-NEXT: ldr h0, [x0] +; CHECK-GI-NEXT: ldr h1, [x1] +; CHECK-GI-NEXT: add x8, x0, #2 +; CHECK-GI-NEXT: add x9, x1, #2 +; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8] +; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9] ; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov s1, v0.s[1] ; CHECK-GI-NEXT: str h0, [x0] -; CHECK-GI-NEXT: str h1, [x0, #2] +; CHECK-GI-NEXT: st1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ret entry: %d = load <2 x i16>, ptr %p1 @@ -736,16 +705,15 @@ define void @or_v2i16(ptr %p1, ptr %p2) { ; ; CHECK-GI-LABEL: or_v2i16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ld1 { v0.h }[0], [x0] -; CHECK-GI-NEXT: ld1 { v1.h }[0], [x1] -; CHECK-GI-NEXT: ldr h2, [x0, #2] -; CHECK-GI-NEXT: ldr h3, [x1, #2] -; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] -; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] +; CHECK-GI-NEXT: ldr h0, [x0] +; CHECK-GI-NEXT: ldr h1, [x1] +; CHECK-GI-NEXT: add x8, x0, #2 +; CHECK-GI-NEXT: add x9, x1, #2 +; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8] +; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9] ; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov s1, v0.s[1] ; CHECK-GI-NEXT: str h0, [x0] -; CHECK-GI-NEXT: str h1, [x0, #2] +; CHECK-GI-NEXT: st1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ret entry: %d = load <2 x i16>, ptr %p1 @@ -772,16 +740,15 @@ define void @xor_v2i16(ptr %p1, ptr %p2) { ; ; CHECK-GI-LABEL: xor_v2i16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ld1 { v0.h }[0], [x0] -; CHECK-GI-NEXT: ld1 { v1.h }[0], [x1] -; CHECK-GI-NEXT: ldr h2, [x0, #2] -; CHECK-GI-NEXT: ldr h3, [x1, #2] -; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] -; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] +; CHECK-GI-NEXT: ldr h0, [x0] +; CHECK-GI-NEXT: ldr h1, [x1] +; CHECK-GI-NEXT: add x8, x0, #2 +; CHECK-GI-NEXT: add x9, x1, #2 +; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8] +; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9] ; CHECK-GI-NEXT: eor v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov s1, v0.s[1] ; CHECK-GI-NEXT: str h0, [x0] -; CHECK-GI-NEXT: str h1, [x0, #2] +; CHECK-GI-NEXT: st1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ret entry: %d = load <2 x i16>, ptr %p1 diff --git a/llvm/test/CodeGen/AArch64/bitcast.ll b/llvm/test/CodeGen/AArch64/bitcast.ll index d2f72ecacc86c..5d14f7e2d1c1f 100644 --- a/llvm/test/CodeGen/AArch64/bitcast.ll +++ b/llvm/test/CodeGen/AArch64/bitcast.ll @@ -59,8 +59,9 @@ define i32 @bitcast_v4i8_i32(<4 x i8> %a, <4 x i8> %b){ ; ; CHECK-GI-LABEL: bitcast_v4i8_i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h ; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b +; CHECK-GI-NEXT: add v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret %c = add <4 x i8> %a, %b @@ -111,8 +112,9 @@ define i32 @bitcast_v2i16_i32(<2 x i16> %a, <2 x i16> %b){ ; ; CHECK-GI-LABEL: bitcast_v2i16_i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s ; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h +; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h +; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret %c = add <2 x i16> %a, %b @@ -409,8 +411,9 @@ define <4 x i8> @bitcast_v2i16_v4i8(<2 x i16> %a, <2 x i16> %b){ ; ; CHECK-GI-LABEL: bitcast_v2i16_v4i8: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s ; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h +; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h +; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h ; CHECK-GI-NEXT: mov b1, v0.b[1] ; CHECK-GI-NEXT: mov b2, v0.b[2] ; CHECK-GI-NEXT: fmov w8, s1 @@ -445,8 +448,9 @@ define <2 x i16> @bitcast_v4i8_v2i16(<4 x i8> %a, <4 x i8> %b){ ; ; CHECK-GI-LABEL: bitcast_v4i8_v2i16: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h ; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b +; CHECK-GI-NEXT: add v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: mov h1, v0.h[1] ; CHECK-GI-NEXT: fmov w8, s1 ; CHECK-GI-NEXT: mov v0.s[1], w8 diff --git a/llvm/test/CodeGen/AArch64/cttz.ll b/llvm/test/CodeGen/AArch64/cttz.ll index 93ac97e20dabd..ecafbb6a00171 100644 --- a/llvm/test/CodeGen/AArch64/cttz.ll +++ b/llvm/test/CodeGen/AArch64/cttz.ll @@ -23,18 +23,18 @@ define void @v2i8(ptr %p1) { ; ; CHECK-GI-LABEL: v2i8: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ld1 { v1.b }[0], [x0] -; CHECK-GI-NEXT: ldr b2, [x0, #1] -; CHECK-GI-NEXT: movi d0, #0xffffffffffffffff -; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] -; CHECK-GI-NEXT: add v0.2s, v1.2s, v0.2s -; CHECK-GI-NEXT: bic v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mov w8, v0.s[1] -; CHECK-GI-NEXT: mov v0.b[1], w8 -; CHECK-GI-NEXT: add x8, x0, #1 +; CHECK-GI-NEXT: mov w8, #255 // =0xff +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: add x9, x0, #1 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x9] +; CHECK-GI-NEXT: mov v1.b[1], w8 +; CHECK-GI-NEXT: eor v2.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: add v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: and v0.8b, v2.8b, v0.8b ; CHECK-GI-NEXT: cnt v0.8b, v0.8b ; CHECK-GI-NEXT: st1 { v0.b }[0], [x0] -; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: st1 { v0.b }[1], [x9] ; CHECK-GI-NEXT: ret entry: %d = load <2 x i8>, ptr %p1 @@ -68,26 +68,22 @@ define void @v3i8(ptr %p1) { ; ; CHECK-GI-LABEL: v3i8: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ldrb w9, [x0] -; CHECK-GI-NEXT: mov w8, #65535 // =0xffff -; CHECK-GI-NEXT: ldrb w10, [x0, #1] +; CHECK-GI-NEXT: mov w8, #255 // =0xff +; CHECK-GI-NEXT: ldr b1, [x0] +; CHECK-GI-NEXT: add x9, x0, #1 ; CHECK-GI-NEXT: fmov s0, w8 -; CHECK-GI-NEXT: fmov s1, w9 -; CHECK-GI-NEXT: ldrb w9, [x0, #2] -; CHECK-GI-NEXT: mov v0.h[1], w8 -; CHECK-GI-NEXT: mov v1.h[1], w10 -; CHECK-GI-NEXT: mov v0.h[2], w8 -; CHECK-GI-NEXT: add x8, x0, #1 -; CHECK-GI-NEXT: mov v1.h[2], w9 -; CHECK-GI-NEXT: add x9, x0, #2 +; CHECK-GI-NEXT: add x10, x0, #2 +; CHECK-GI-NEXT: ld1 { v1.b }[1], [x9] +; CHECK-GI-NEXT: mov v0.b[1], w8 +; CHECK-GI-NEXT: ld1 { v1.b }[2], [x10] +; CHECK-GI-NEXT: mov v0.b[2], w8 ; CHECK-GI-NEXT: eor v2.8b, v1.8b, v0.8b -; CHECK-GI-NEXT: add v0.4h, v1.4h, v0.4h +; CHECK-GI-NEXT: add v0.8b, v1.8b, v0.8b ; CHECK-GI-NEXT: and v0.8b, v2.8b, v0.8b -; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b ; CHECK-GI-NEXT: cnt v0.8b, v0.8b ; CHECK-GI-NEXT: st1 { v0.b }[0], [x0] -; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] -; CHECK-GI-NEXT: st1 { v0.b }[2], [x9] +; CHECK-GI-NEXT: st1 { v0.b }[1], [x9] +; CHECK-GI-NEXT: st1 { v0.b }[2], [x10] ; CHECK-GI-NEXT: ret entry: %d = load <3 x i8>, ptr %p1 @@ -116,24 +112,21 @@ define void @v4i8(ptr %p1) { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: ldr w9, [x0] ; CHECK-GI-NEXT: mov w8, #255 // =0xff +; CHECK-GI-NEXT: fmov s3, w8 ; CHECK-GI-NEXT: fmov s0, w9 +; CHECK-GI-NEXT: mov v3.b[1], w8 ; CHECK-GI-NEXT: mov b1, v0.b[1] -; CHECK-GI-NEXT: mov b2, v0.b[2] -; CHECK-GI-NEXT: mov b3, v0.b[3] -; CHECK-GI-NEXT: fmov w9, s1 -; CHECK-GI-NEXT: fmov s1, w8 -; CHECK-GI-NEXT: mov v0.h[1], w9 -; CHECK-GI-NEXT: mov v1.h[1], w8 -; CHECK-GI-NEXT: fmov w9, s2 -; CHECK-GI-NEXT: mov v0.h[2], w9 -; CHECK-GI-NEXT: mov v1.h[2], w8 -; CHECK-GI-NEXT: fmov w9, s3 -; CHECK-GI-NEXT: mov v0.h[3], w9 -; CHECK-GI-NEXT: mov v1.h[3], w8 -; CHECK-GI-NEXT: eor v2.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: and v0.8b, v2.8b, v0.8b -; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-GI-NEXT: mov v2.b[0], v0.b[0] +; CHECK-GI-NEXT: mov v3.b[2], w8 +; CHECK-GI-NEXT: mov v2.b[1], v1.b[0] +; CHECK-GI-NEXT: mov b1, v0.b[2] +; CHECK-GI-NEXT: mov b0, v0.b[3] +; CHECK-GI-NEXT: mov v3.b[3], w8 +; CHECK-GI-NEXT: mov v2.b[2], v1.b[0] +; CHECK-GI-NEXT: mov v2.b[3], v0.b[0] +; CHECK-GI-NEXT: eor v0.8b, v2.8b, v3.8b +; CHECK-GI-NEXT: add v1.8b, v2.8b, v3.8b +; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: cnt v0.8b, v0.8b ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: str w8, [x0] @@ -235,20 +228,18 @@ define void @v2i16(ptr %p1) { ; CHECK-GI-LABEL: v2i16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov w8, #65535 // =0xffff -; CHECK-GI-NEXT: ld1 { v0.h }[0], [x0] -; CHECK-GI-NEXT: ldr h1, [x0, #2] -; CHECK-GI-NEXT: fmov s2, w8 -; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] -; CHECK-GI-NEXT: mov v2.s[1], w8 -; CHECK-GI-NEXT: add x8, x0, #2 -; CHECK-GI-NEXT: eor v1.8b, v0.8b, v2.8b -; CHECK-GI-NEXT: add v0.2s, v0.2s, v2.2s -; CHECK-GI-NEXT: and v0.8b, v1.8b, v0.8b -; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h +; CHECK-GI-NEXT: ldr h0, [x0] +; CHECK-GI-NEXT: add x9, x0, #2 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: ld1 { v0.h }[1], [x9] +; CHECK-GI-NEXT: mov v1.h[1], w8 +; CHECK-GI-NEXT: eor v2.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-GI-NEXT: and v0.8b, v2.8b, v0.8b ; CHECK-GI-NEXT: cnt v0.8b, v0.8b ; CHECK-GI-NEXT: uaddlp v0.4h, v0.8b ; CHECK-GI-NEXT: str h0, [x0] -; CHECK-GI-NEXT: st1 { v0.h }[1], [x8] +; CHECK-GI-NEXT: st1 { v0.h }[1], [x9] ; CHECK-GI-NEXT: ret entry: %d = load <2 x i16>, ptr %p1 diff --git a/llvm/test/CodeGen/AArch64/freeze.ll b/llvm/test/CodeGen/AArch64/freeze.ll index 0c56e1b66e81f..7082b75740b50 100644 --- a/llvm/test/CodeGen/AArch64/freeze.ll +++ b/llvm/test/CodeGen/AArch64/freeze.ll @@ -58,15 +58,14 @@ define <3 x i8> @freeze_v3i8() { ; CHECK-GI-LABEL: freeze_v3i8: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov b0, v0.b[1] -; CHECK-GI-NEXT: mov b1, v0.b[2] -; CHECK-GI-NEXT: fmov w8, s0 -; CHECK-GI-NEXT: mov v0.h[1], w8 -; CHECK-GI-NEXT: fmov w8, s1 -; CHECK-GI-NEXT: mov v0.h[2], w8 -; CHECK-GI-NEXT: add v0.4h, v0.4h, v0.4h -; CHECK-GI-NEXT: umov w0, v0.h[0] -; CHECK-GI-NEXT: umov w1, v0.h[1] -; CHECK-GI-NEXT: umov w2, v0.h[2] +; CHECK-GI-NEXT: mov v1.b[0], v0.b[0] +; CHECK-GI-NEXT: mov b2, v0.b[2] +; CHECK-GI-NEXT: mov v1.b[1], v0.b[0] +; CHECK-GI-NEXT: mov v1.b[2], v2.b[0] +; CHECK-GI-NEXT: add v0.8b, v1.8b, v1.8b +; CHECK-GI-NEXT: umov w0, v0.b[0] +; CHECK-GI-NEXT: umov w1, v0.b[1] +; CHECK-GI-NEXT: umov w2, v0.b[2] ; CHECK-GI-NEXT: ret %y1 = freeze <3 x i8> undef %t1 = add <3 x i8> %y1, %y1 @@ -82,15 +81,15 @@ define <4 x i8> @freeze_v4i8() { ; CHECK-GI-LABEL: freeze_v4i8: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov b0, v0.b[1] -; CHECK-GI-NEXT: fmov w8, s0 -; CHECK-GI-NEXT: mov b1, v0.b[2] -; CHECK-GI-NEXT: mov v0.h[1], w8 -; CHECK-GI-NEXT: fmov w8, s1 -; CHECK-GI-NEXT: mov b2, v0.b[3] -; CHECK-GI-NEXT: mov v0.h[2], w8 -; CHECK-GI-NEXT: fmov w8, s2 -; CHECK-GI-NEXT: mov v0.h[3], w8 -; CHECK-GI-NEXT: add v0.4h, v0.4h, v0.4h +; CHECK-GI-NEXT: mov v1.b[0], v0.b[0] +; CHECK-GI-NEXT: mov b2, v0.b[2] +; CHECK-GI-NEXT: mov v1.b[1], v0.b[0] +; CHECK-GI-NEXT: mov b0, v0.b[3] +; CHECK-GI-NEXT: mov v1.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v1.b[3], v0.b[0] +; CHECK-GI-NEXT: add v0.8b, v1.8b, v1.8b +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %y1 = freeze <4 x i8> undef %t1 = add <4 x i8> %y1, %y1 @@ -137,9 +136,10 @@ define <2 x i16> @freeze_v2i16() { ; CHECK-GI-LABEL: freeze_v2i16: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov h0, v0.h[1] -; CHECK-GI-NEXT: fmov w8, s0 -; CHECK-GI-NEXT: mov v0.s[1], w8 -; CHECK-GI-NEXT: add v0.2s, v0.2s, v0.2s +; CHECK-GI-NEXT: mov v0.h[1], v0.h[0] +; CHECK-GI-NEXT: add v0.4h, v0.4h, v0.4h +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %y1 = freeze <2 x i16> undef %t1 = add <2 x i16> %y1, %y1 diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll index e8194b9bd9b27..c72b644ca2fc9 100644 --- a/llvm/test/CodeGen/AArch64/itofp.ll +++ b/llvm/test/CodeGen/AArch64/itofp.ll @@ -8182,9 +8182,11 @@ define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) { ; ; CHECK-GI-FP16-LABEL: utofp_v2i8_v2f16: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: movi d1, #0x0000ff000000ff -; CHECK-GI-FP16-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-GI-FP16-NEXT: mov w8, #255 // =0xff ; CHECK-GI-FP16-NEXT: uzp1 v0.4h, v0.4h, v0.4h +; CHECK-GI-FP16-NEXT: fmov s1, w8 +; CHECK-GI-FP16-NEXT: mov v1.h[1], w8 +; CHECK-GI-FP16-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-GI-FP16-NEXT: ucvtf v0.4h, v0.4h ; CHECK-GI-FP16-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/mul.ll b/llvm/test/CodeGen/AArch64/mul.ll index 1558043f7f40a..d5c50ea7b7b20 100644 --- a/llvm/test/CodeGen/AArch64/mul.ll +++ b/llvm/test/CodeGen/AArch64/mul.ll @@ -82,16 +82,15 @@ define void @v2i8(ptr %p1, ptr %p2) { ; ; CHECK-GI-LABEL: v2i8: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ld1 { v0.b }[0], [x0] -; CHECK-GI-NEXT: ld1 { v1.b }[0], [x1] -; CHECK-GI-NEXT: ldr b2, [x0, #1] -; CHECK-GI-NEXT: ldr b3, [x1, #1] -; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] -; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] -; CHECK-GI-NEXT: mul v0.2s, v0.2s, v1.2s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: str b0, [x0] -; CHECK-GI-NEXT: str b1, [x0, #1] +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x0, #1 +; CHECK-GI-NEXT: add x9, x1, #1 +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ld1 { v1.b }[1], [x9] +; CHECK-GI-NEXT: mul v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: st1 { v0.b }[0], [x0] +; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] ; CHECK-GI-NEXT: ret entry: %d = load <2 x i8>, ptr %p1 @@ -122,24 +121,20 @@ define void @v3i8(ptr %p1, ptr %p2) { ; ; CHECK-GI-LABEL: v3i8: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ldrb w8, [x0] -; CHECK-GI-NEXT: ldrb w9, [x1] -; CHECK-GI-NEXT: ldrb w10, [x0, #1] -; CHECK-GI-NEXT: ldrb w11, [x1, #1] -; CHECK-GI-NEXT: fmov s0, w8 -; CHECK-GI-NEXT: fmov s1, w9 -; CHECK-GI-NEXT: ldrb w8, [x0, #2] -; CHECK-GI-NEXT: ldrb w9, [x1, #2] -; CHECK-GI-NEXT: mov v0.h[1], w10 -; CHECK-GI-NEXT: mov v1.h[1], w11 -; CHECK-GI-NEXT: mov v0.h[2], w8 -; CHECK-GI-NEXT: mov v1.h[2], w9 -; CHECK-GI-NEXT: mul v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov h2, v0.h[2] -; CHECK-GI-NEXT: str b0, [x0] -; CHECK-GI-NEXT: str b1, [x0, #1] -; CHECK-GI-NEXT: str b2, [x0, #2] +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x0, #1 +; CHECK-GI-NEXT: add x9, x1, #1 +; CHECK-GI-NEXT: add x10, x1, #2 +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ld1 { v1.b }[1], [x9] +; CHECK-GI-NEXT: add x9, x0, #2 +; CHECK-GI-NEXT: ld1 { v0.b }[2], [x9] +; CHECK-GI-NEXT: ld1 { v1.b }[2], [x10] +; CHECK-GI-NEXT: mul v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: st1 { v0.b }[0], [x0] +; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: st1 { v0.b }[2], [x9] ; CHECK-GI-NEXT: ret entry: %d = load <3 x i8>, ptr %p1 @@ -166,25 +161,20 @@ define void @v4i8(ptr %p1, ptr %p2) { ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: fmov s1, w9 ; CHECK-GI-NEXT: mov b2, v0.b[1] -; CHECK-GI-NEXT: mov b3, v1.b[1] -; CHECK-GI-NEXT: mov b4, v0.b[2] -; CHECK-GI-NEXT: mov b5, v0.b[3] -; CHECK-GI-NEXT: fmov w8, s2 -; CHECK-GI-NEXT: mov b2, v1.b[2] -; CHECK-GI-NEXT: fmov w9, s3 -; CHECK-GI-NEXT: mov b3, v1.b[3] -; CHECK-GI-NEXT: mov v0.h[1], w8 -; CHECK-GI-NEXT: mov v1.h[1], w9 -; CHECK-GI-NEXT: fmov w8, s4 -; CHECK-GI-NEXT: fmov w9, s2 -; CHECK-GI-NEXT: mov v0.h[2], w8 -; CHECK-GI-NEXT: mov v1.h[2], w9 -; CHECK-GI-NEXT: fmov w8, s5 -; CHECK-GI-NEXT: fmov w9, s3 -; CHECK-GI-NEXT: mov v0.h[3], w8 -; CHECK-GI-NEXT: mov v1.h[3], w9 -; CHECK-GI-NEXT: mul v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-GI-NEXT: mov v3.b[0], v0.b[0] +; CHECK-GI-NEXT: mov b4, v1.b[1] +; CHECK-GI-NEXT: mov v5.b[0], v1.b[0] +; CHECK-GI-NEXT: mov v3.b[1], v2.b[0] +; CHECK-GI-NEXT: mov b2, v0.b[2] +; CHECK-GI-NEXT: mov b0, v0.b[3] +; CHECK-GI-NEXT: mov v5.b[1], v4.b[0] +; CHECK-GI-NEXT: mov b4, v1.b[2] +; CHECK-GI-NEXT: mov b1, v1.b[3] +; CHECK-GI-NEXT: mov v3.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v5.b[2], v4.b[0] +; CHECK-GI-NEXT: mov v3.b[3], v0.b[0] +; CHECK-GI-NEXT: mov v5.b[3], v1.b[0] +; CHECK-GI-NEXT: mul v0.8b, v3.8b, v5.8b ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: str w8, [x0] ; CHECK-GI-NEXT: ret @@ -250,16 +240,15 @@ define void @v2i16(ptr %p1, ptr %p2) { ; ; CHECK-GI-LABEL: v2i16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ld1 { v0.h }[0], [x0] -; CHECK-GI-NEXT: ld1 { v1.h }[0], [x1] -; CHECK-GI-NEXT: ldr h2, [x0, #2] -; CHECK-GI-NEXT: ldr h3, [x1, #2] -; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] -; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] -; CHECK-GI-NEXT: mul v0.2s, v0.2s, v1.2s -; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: ldr h0, [x0] +; CHECK-GI-NEXT: ldr h1, [x1] +; CHECK-GI-NEXT: add x8, x0, #2 +; CHECK-GI-NEXT: add x9, x1, #2 +; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8] +; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9] +; CHECK-GI-NEXT: mul v0.4h, v0.4h, v1.4h ; CHECK-GI-NEXT: str h0, [x0] -; CHECK-GI-NEXT: str h1, [x0, #2] +; CHECK-GI-NEXT: st1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ret entry: %d = load <2 x i16>, ptr %p1 diff --git a/llvm/test/CodeGen/AArch64/select-with-and-or.ll b/llvm/test/CodeGen/AArch64/select-with-and-or.ll index 60f2add81b45c..e5f5d493c68aa 100644 --- a/llvm/test/CodeGen/AArch64/select-with-and-or.ll +++ b/llvm/test/CodeGen/AArch64/select-with-and-or.ll @@ -105,8 +105,17 @@ define <4 x i1> @and_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: cmgt v1.4s, v2.4s, v3.4s -; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b -; CHECK-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-GI-NEXT: mov w8, v0.s[1] +; CHECK-GI-NEXT: mov w9, v0.s[2] +; CHECK-GI-NEXT: mov w10, v0.s[3] +; CHECK-GI-NEXT: xtn v1.4h, v1.4s +; CHECK-GI-NEXT: mov v0.b[1], w8 +; CHECK-GI-NEXT: mov v0.b[2], w9 +; CHECK-GI-NEXT: mov v0.b[3], w10 +; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b +; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %a = icmp eq <4 x i32> %x, %y %b = icmp sgt <4 x i32> %z, %w @@ -127,8 +136,17 @@ define <4 x i1> @or_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: cmgt v1.4s, v2.4s, v3.4s -; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-GI-NEXT: mov w8, v0.s[1] +; CHECK-GI-NEXT: mov w9, v0.s[2] +; CHECK-GI-NEXT: mov w10, v0.s[3] +; CHECK-GI-NEXT: xtn v1.4h, v1.4s +; CHECK-GI-NEXT: mov v0.b[1], w8 +; CHECK-GI-NEXT: mov v0.b[2], w9 +; CHECK-GI-NEXT: mov v0.b[3], w10 +; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b +; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %a = icmp eq <4 x i32> %x, %y %b = icmp sgt <4 x i32> %z, %w @@ -149,8 +167,18 @@ define <4 x i1> @and_not_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: cmgt v1.4s, v2.4s, v3.4s -; CHECK-GI-NEXT: bic v0.16b, v1.16b, v0.16b -; CHECK-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-GI-NEXT: mvn v0.16b, v0.16b +; CHECK-GI-NEXT: xtn v1.4h, v1.4s +; CHECK-GI-NEXT: mov w8, v0.s[1] +; CHECK-GI-NEXT: mov w9, v0.s[2] +; CHECK-GI-NEXT: mov w10, v0.s[3] +; CHECK-GI-NEXT: mov v0.b[1], w8 +; CHECK-GI-NEXT: mov v0.b[2], w9 +; CHECK-GI-NEXT: mov v0.b[3], w10 +; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b +; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %a = icmp eq <4 x i32> %x, %y %b = icmp sgt <4 x i32> %z, %w @@ -171,8 +199,18 @@ define <4 x i1> @or_not_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: cmgt v1.4s, v2.4s, v3.4s -; CHECK-GI-NEXT: orn v0.16b, v1.16b, v0.16b -; CHECK-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-GI-NEXT: mvn v0.16b, v0.16b +; CHECK-GI-NEXT: xtn v1.4h, v1.4s +; CHECK-GI-NEXT: mov w8, v0.s[1] +; CHECK-GI-NEXT: mov w9, v0.s[2] +; CHECK-GI-NEXT: mov w10, v0.s[3] +; CHECK-GI-NEXT: mov v0.b[1], w8 +; CHECK-GI-NEXT: mov v0.b[2], w9 +; CHECK-GI-NEXT: mov v0.b[3], w10 +; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b +; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %a = icmp eq <4 x i32> %x, %y %b = icmp sgt <4 x i32> %z, %w @@ -193,8 +231,17 @@ define <4 x i1> @and_vec_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i3 ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: cmgt v1.4s, v2.4s, v3.4s -; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b -; CHECK-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-GI-NEXT: mov w8, v0.s[1] +; CHECK-GI-NEXT: mov w9, v0.s[2] +; CHECK-GI-NEXT: mov w10, v0.s[3] +; CHECK-GI-NEXT: xtn v1.4h, v1.4s +; CHECK-GI-NEXT: mov v0.b[1], w8 +; CHECK-GI-NEXT: mov v0.b[2], w9 +; CHECK-GI-NEXT: mov v0.b[3], w10 +; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b +; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %a = icmp eq <4 x i32> %x, %y %b = icmp sgt <4 x i32> %z, %w @@ -215,8 +262,17 @@ define <4 x i1> @or_vec_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32 ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: cmgt v1.4s, v2.4s, v3.4s -; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-GI-NEXT: mov w8, v0.s[1] +; CHECK-GI-NEXT: mov w9, v0.s[2] +; CHECK-GI-NEXT: mov w10, v0.s[3] +; CHECK-GI-NEXT: xtn v1.4h, v1.4s +; CHECK-GI-NEXT: mov v0.b[1], w8 +; CHECK-GI-NEXT: mov v0.b[2], w9 +; CHECK-GI-NEXT: mov v0.b[3], w10 +; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b +; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %a = icmp eq <4 x i32> %x, %y %b = icmp sgt <4 x i32> %z, %w @@ -237,8 +293,18 @@ define <4 x i1> @and_not_vec_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: cmgt v1.4s, v2.4s, v3.4s -; CHECK-GI-NEXT: bic v0.16b, v1.16b, v0.16b -; CHECK-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-GI-NEXT: mvn v0.16b, v0.16b +; CHECK-GI-NEXT: xtn v1.4h, v1.4s +; CHECK-GI-NEXT: mov w8, v0.s[1] +; CHECK-GI-NEXT: mov w9, v0.s[2] +; CHECK-GI-NEXT: mov w10, v0.s[3] +; CHECK-GI-NEXT: mov v0.b[1], w8 +; CHECK-GI-NEXT: mov v0.b[2], w9 +; CHECK-GI-NEXT: mov v0.b[3], w10 +; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b +; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %a = icmp eq <4 x i32> %x, %y %b = icmp sgt <4 x i32> %z, %w @@ -259,8 +325,18 @@ define <4 x i1> @or_not_vec_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: cmgt v1.4s, v2.4s, v3.4s -; CHECK-GI-NEXT: orn v0.16b, v1.16b, v0.16b -; CHECK-GI-NEXT: xtn v0.4h, v0.4s +; CHECK-GI-NEXT: mvn v0.16b, v0.16b +; CHECK-GI-NEXT: xtn v1.4h, v1.4s +; CHECK-GI-NEXT: mov w8, v0.s[1] +; CHECK-GI-NEXT: mov w9, v0.s[2] +; CHECK-GI-NEXT: mov w10, v0.s[3] +; CHECK-GI-NEXT: mov v0.b[1], w8 +; CHECK-GI-NEXT: mov v0.b[2], w9 +; CHECK-GI-NEXT: mov v0.b[3], w10 +; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b +; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %a = icmp eq <4 x i32> %x, %y %b = icmp sgt <4 x i32> %z, %w diff --git a/llvm/test/CodeGen/AArch64/sub.ll b/llvm/test/CodeGen/AArch64/sub.ll index 7a436eddb23a6..c4d6db0e80b1b 100644 --- a/llvm/test/CodeGen/AArch64/sub.ll +++ b/llvm/test/CodeGen/AArch64/sub.ll @@ -70,16 +70,15 @@ define void @v2i8(ptr %p1, ptr %p2) { ; ; CHECK-GI-LABEL: v2i8: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ld1 { v0.b }[0], [x0] -; CHECK-GI-NEXT: ld1 { v1.b }[0], [x1] -; CHECK-GI-NEXT: ldr b2, [x0, #1] -; CHECK-GI-NEXT: ldr b3, [x1, #1] -; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] -; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] -; CHECK-GI-NEXT: sub v0.2s, v0.2s, v1.2s -; CHECK-GI-NEXT: mov s1, v0.s[1] -; CHECK-GI-NEXT: str b0, [x0] -; CHECK-GI-NEXT: str b1, [x0, #1] +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x0, #1 +; CHECK-GI-NEXT: add x9, x1, #1 +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ld1 { v1.b }[1], [x9] +; CHECK-GI-NEXT: sub v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: st1 { v0.b }[0], [x0] +; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] ; CHECK-GI-NEXT: ret entry: %d = load <2 x i8>, ptr %p1 @@ -110,24 +109,20 @@ define void @v3i8(ptr %p1, ptr %p2) { ; ; CHECK-GI-LABEL: v3i8: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ldrb w8, [x0] -; CHECK-GI-NEXT: ldrb w9, [x1] -; CHECK-GI-NEXT: ldrb w10, [x0, #1] -; CHECK-GI-NEXT: ldrb w11, [x1, #1] -; CHECK-GI-NEXT: fmov s0, w8 -; CHECK-GI-NEXT: fmov s1, w9 -; CHECK-GI-NEXT: ldrb w8, [x0, #2] -; CHECK-GI-NEXT: ldrb w9, [x1, #2] -; CHECK-GI-NEXT: mov v0.h[1], w10 -; CHECK-GI-NEXT: mov v1.h[1], w11 -; CHECK-GI-NEXT: mov v0.h[2], w8 -; CHECK-GI-NEXT: mov v1.h[2], w9 -; CHECK-GI-NEXT: sub v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: mov h2, v0.h[2] -; CHECK-GI-NEXT: str b0, [x0] -; CHECK-GI-NEXT: str b1, [x0, #1] -; CHECK-GI-NEXT: str b2, [x0, #2] +; CHECK-GI-NEXT: ldr b0, [x0] +; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x0, #1 +; CHECK-GI-NEXT: add x9, x1, #1 +; CHECK-GI-NEXT: add x10, x1, #2 +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ld1 { v1.b }[1], [x9] +; CHECK-GI-NEXT: add x9, x0, #2 +; CHECK-GI-NEXT: ld1 { v0.b }[2], [x9] +; CHECK-GI-NEXT: ld1 { v1.b }[2], [x10] +; CHECK-GI-NEXT: sub v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: st1 { v0.b }[0], [x0] +; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: st1 { v0.b }[2], [x9] ; CHECK-GI-NEXT: ret entry: %d = load <3 x i8>, ptr %p1 @@ -154,25 +149,20 @@ define void @v4i8(ptr %p1, ptr %p2) { ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: fmov s1, w9 ; CHECK-GI-NEXT: mov b2, v0.b[1] -; CHECK-GI-NEXT: mov b3, v1.b[1] -; CHECK-GI-NEXT: mov b4, v0.b[2] -; CHECK-GI-NEXT: mov b5, v0.b[3] -; CHECK-GI-NEXT: fmov w8, s2 -; CHECK-GI-NEXT: mov b2, v1.b[2] -; CHECK-GI-NEXT: fmov w9, s3 -; CHECK-GI-NEXT: mov b3, v1.b[3] -; CHECK-GI-NEXT: mov v0.h[1], w8 -; CHECK-GI-NEXT: mov v1.h[1], w9 -; CHECK-GI-NEXT: fmov w8, s4 -; CHECK-GI-NEXT: fmov w9, s2 -; CHECK-GI-NEXT: mov v0.h[2], w8 -; CHECK-GI-NEXT: mov v1.h[2], w9 -; CHECK-GI-NEXT: fmov w8, s5 -; CHECK-GI-NEXT: fmov w9, s3 -; CHECK-GI-NEXT: mov v0.h[3], w8 -; CHECK-GI-NEXT: mov v1.h[3], w9 -; CHECK-GI-NEXT: sub v0.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-GI-NEXT: mov v3.b[0], v0.b[0] +; CHECK-GI-NEXT: mov b4, v1.b[1] +; CHECK-GI-NEXT: mov v5.b[0], v1.b[0] +; CHECK-GI-NEXT: mov v3.b[1], v2.b[0] +; CHECK-GI-NEXT: mov b2, v0.b[2] +; CHECK-GI-NEXT: mov b0, v0.b[3] +; CHECK-GI-NEXT: mov v5.b[1], v4.b[0] +; CHECK-GI-NEXT: mov b4, v1.b[2] +; CHECK-GI-NEXT: mov b1, v1.b[3] +; CHECK-GI-NEXT: mov v3.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v5.b[2], v4.b[0] +; CHECK-GI-NEXT: mov v3.b[3], v0.b[0] +; CHECK-GI-NEXT: mov v5.b[3], v1.b[0] +; CHECK-GI-NEXT: sub v0.8b, v3.8b, v5.8b ; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: str w8, [x0] ; CHECK-GI-NEXT: ret @@ -238,16 +228,15 @@ define void @v2i16(ptr %p1, ptr %p2) { ; ; CHECK-GI-LABEL: v2i16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ld1 { v0.h }[0], [x0] -; CHECK-GI-NEXT: ld1 { v1.h }[0], [x1] -; CHECK-GI-NEXT: ldr h2, [x0, #2] -; CHECK-GI-NEXT: ldr h3, [x1, #2] -; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] -; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] -; CHECK-GI-NEXT: sub v0.2s, v0.2s, v1.2s -; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: ldr h0, [x0] +; CHECK-GI-NEXT: ldr h1, [x1] +; CHECK-GI-NEXT: add x8, x0, #2 +; CHECK-GI-NEXT: add x9, x1, #2 +; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8] +; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9] +; CHECK-GI-NEXT: sub v0.4h, v0.4h, v1.4h ; CHECK-GI-NEXT: str h0, [x0] -; CHECK-GI-NEXT: str h1, [x0, #2] +; CHECK-GI-NEXT: st1 { v0.h }[1], [x8] ; CHECK-GI-NEXT: ret entry: %d = load <2 x i16>, ptr %p1 diff --git a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll index d6d323530946e..83f0fe46dcec0 100644 --- a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll +++ b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll @@ -274,18 +274,30 @@ define i4 @convert_to_bitmask_with_compare_chain(<4 x i32> %vec1, <4 x i32> %vec ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #16 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 -; CHECK-GI-NEXT: cmeq.4s v2, v0, #0 +; CHECK-GI-NEXT: cmtst.4s v2, v0, v0 ; CHECK-GI-NEXT: cmeq.4s v0, v0, v1 -; CHECK-GI-NEXT: bic.16b v0, v0, v2 -; CHECK-GI-NEXT: mov.s w8, v0[1] -; CHECK-GI-NEXT: mov.s w9, v0[2] -; CHECK-GI-NEXT: fmov w11, s0 -; CHECK-GI-NEXT: mov.s w10, v0[3] +; CHECK-GI-NEXT: mov.s w9, v0[1] +; CHECK-GI-NEXT: mov.s w12, v0[3] +; CHECK-GI-NEXT: mov.s w8, v2[1] +; CHECK-GI-NEXT: mov.s w10, v2[2] +; CHECK-GI-NEXT: mov.s w11, v2[3] +; CHECK-GI-NEXT: mov.b v2[1], w8 +; CHECK-GI-NEXT: mov.s w8, v0[2] +; CHECK-GI-NEXT: mov.b v0[1], w9 +; CHECK-GI-NEXT: mov.b v2[2], w10 +; CHECK-GI-NEXT: mov.b v0[2], w8 +; CHECK-GI-NEXT: mov.b v2[3], w11 +; CHECK-GI-NEXT: mov.b v0[3], w12 +; CHECK-GI-NEXT: and.8b v0, v2, v0 +; CHECK-GI-NEXT: umov.b w8, v0[1] +; CHECK-GI-NEXT: umov.b w9, v0[0] +; CHECK-GI-NEXT: umov.b w10, v0[2] +; CHECK-GI-NEXT: umov.b w11, v0[3] ; CHECK-GI-NEXT: and w8, w8, #0x1 -; CHECK-GI-NEXT: bfi w11, w8, #1, #31 -; CHECK-GI-NEXT: and w8, w9, #0x1 -; CHECK-GI-NEXT: and w9, w10, #0x1 -; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2 +; CHECK-GI-NEXT: bfi w9, w8, #1, #31 +; CHECK-GI-NEXT: and w8, w10, #0x1 +; CHECK-GI-NEXT: orr w8, w9, w8, lsl #2 +; CHECK-GI-NEXT: and w9, w11, #0x1 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 ; CHECK-GI-NEXT: strb w8, [sp, #15] ; CHECK-GI-NEXT: and w0, w8, #0xff @@ -318,17 +330,29 @@ define i4 @convert_to_bitmask_with_trunc_in_chain(<4 x i32> %vec1, <4 x i32> %ve ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #16 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 -; CHECK-GI-NEXT: cmeq.4s v0, v0, #0 -; CHECK-GI-NEXT: bic.16b v0, v1, v0 +; CHECK-GI-NEXT: cmtst.4s v0, v0, v0 +; CHECK-GI-NEXT: mov.s w9, v1[1] +; CHECK-GI-NEXT: mov.s w12, v1[3] ; CHECK-GI-NEXT: mov.s w8, v0[1] -; CHECK-GI-NEXT: mov.s w9, v0[2] -; CHECK-GI-NEXT: fmov w11, s0 -; CHECK-GI-NEXT: mov.s w10, v0[3] +; CHECK-GI-NEXT: mov.s w10, v0[2] +; CHECK-GI-NEXT: mov.s w11, v0[3] +; CHECK-GI-NEXT: mov.b v0[1], w8 +; CHECK-GI-NEXT: mov.s w8, v1[2] +; CHECK-GI-NEXT: mov.b v1[1], w9 +; CHECK-GI-NEXT: mov.b v0[2], w10 +; CHECK-GI-NEXT: mov.b v1[2], w8 +; CHECK-GI-NEXT: mov.b v0[3], w11 +; CHECK-GI-NEXT: mov.b v1[3], w12 +; CHECK-GI-NEXT: and.8b v0, v0, v1 +; CHECK-GI-NEXT: umov.b w8, v0[1] +; CHECK-GI-NEXT: umov.b w9, v0[0] +; CHECK-GI-NEXT: umov.b w10, v0[2] +; CHECK-GI-NEXT: umov.b w11, v0[3] ; CHECK-GI-NEXT: and w8, w8, #0x1 -; CHECK-GI-NEXT: bfi w11, w8, #1, #31 -; CHECK-GI-NEXT: and w8, w9, #0x1 -; CHECK-GI-NEXT: and w9, w10, #0x1 -; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2 +; CHECK-GI-NEXT: bfi w9, w8, #1, #31 +; CHECK-GI-NEXT: and w8, w10, #0x1 +; CHECK-GI-NEXT: orr w8, w9, w8, lsl #2 +; CHECK-GI-NEXT: and w9, w11, #0x1 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 ; CHECK-GI-NEXT: strb w8, [sp, #15] ; CHECK-GI-NEXT: and w0, w8, #0xff @@ -373,49 +397,53 @@ define i4 @convert_to_bitmask_with_unknown_type_in_long_chain(<4 x i32> %vec1, < ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: sub sp, sp, #16 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: cmtst.4s v0, v0, v0 +; CHECK-GI-NEXT: cmeq.4s v1, v1, #0 ; CHECK-GI-NEXT: mov w8, #1 ; =0x1 -; CHECK-GI-NEXT: mov w9, #0 ; =0x0 -; CHECK-GI-NEXT: cmeq.4s v0, v0, #0 ; CHECK-GI-NEXT: fmov s2, w8 -; CHECK-GI-NEXT: fmov s4, w9 -; CHECK-GI-NEXT: cmeq.4s v1, v1, #0 +; CHECK-GI-NEXT: mov.s w12, v1[1] +; CHECK-GI-NEXT: mov.s w14, v1[2] +; CHECK-GI-NEXT: mov.s w11, v1[3] +; CHECK-GI-NEXT: mov.s w10, v0[1] +; CHECK-GI-NEXT: mov.s w13, v0[2] +; CHECK-GI-NEXT: mov.s w9, v0[3] ; CHECK-GI-NEXT: mov.16b v3, v2 +; CHECK-GI-NEXT: mov.16b v6, v2 +; CHECK-GI-NEXT: mov.b v2[1], w8 +; CHECK-GI-NEXT: mov.b v1[1], w12 +; CHECK-GI-NEXT: mov.b v0[1], w10 +; CHECK-GI-NEXT: mov w10, #0 ; =0x0 +; CHECK-GI-NEXT: mov.b v3[1], w8 +; CHECK-GI-NEXT: fmov s4, w10 +; CHECK-GI-NEXT: mov.b v6[1], w8 +; CHECK-GI-NEXT: mov.b v2[2], w8 +; CHECK-GI-NEXT: mov.b v1[2], w14 ; CHECK-GI-NEXT: mov.16b v5, v4 -; CHECK-GI-NEXT: mov.h v4[1], w8 -; CHECK-GI-NEXT: bic.16b v0, v1, v0 -; CHECK-GI-NEXT: mov.16b v1, v2 -; CHECK-GI-NEXT: mov.h v2[1], w8 -; CHECK-GI-NEXT: mov.h v3[1], w8 -; CHECK-GI-NEXT: mov.h v5[1], w8 -; CHECK-GI-NEXT: mov.h v1[1], w8 -; CHECK-GI-NEXT: mov.h v4[2], w8 -; CHECK-GI-NEXT: xtn.4h v0, v0 -; CHECK-GI-NEXT: mov.h v2[2], w8 -; CHECK-GI-NEXT: mov.h v3[2], w9 -; CHECK-GI-NEXT: mov.h v5[2], w9 -; CHECK-GI-NEXT: mov.h v1[2], w9 -; CHECK-GI-NEXT: mov.h v4[3], w9 -; CHECK-GI-NEXT: mov.h v2[3], w9 -; CHECK-GI-NEXT: mov.h v3[3], w9 -; CHECK-GI-NEXT: mov.h v5[3], w8 -; CHECK-GI-NEXT: mov.h v1[3], w8 +; CHECK-GI-NEXT: mov.b v0[2], w13 +; CHECK-GI-NEXT: mov.b v4[1], w8 +; CHECK-GI-NEXT: mov.b v3[2], w10 +; CHECK-GI-NEXT: mov.b v6[2], w10 +; CHECK-GI-NEXT: mov.b v2[3], w10 +; CHECK-GI-NEXT: mov.b v5[1], w8 +; CHECK-GI-NEXT: mov.b v1[3], w11 +; CHECK-GI-NEXT: mov.b v0[3], w9 +; CHECK-GI-NEXT: mov.b v4[2], w8 +; CHECK-GI-NEXT: mov.b v3[3], w10 +; CHECK-GI-NEXT: mov.b v6[3], w8 +; CHECK-GI-NEXT: mov.b v5[2], w10 +; CHECK-GI-NEXT: and.8b v0, v0, v1 +; CHECK-GI-NEXT: mov.b v4[3], w10 +; CHECK-GI-NEXT: mov.b v5[3], w8 ; CHECK-GI-NEXT: orr.8b v0, v0, v3 -; CHECK-GI-NEXT: eor.8b v3, v0, v5 +; CHECK-GI-NEXT: eor.8b v1, v0, v5 ; CHECK-GI-NEXT: eor.8b v0, v4, v0 -; CHECK-GI-NEXT: and.8b v1, v3, v1 +; CHECK-GI-NEXT: and.8b v1, v1, v6 ; CHECK-GI-NEXT: orr.8b v0, v2, v0 ; CHECK-GI-NEXT: orr.8b v0, v1, v0 -; CHECK-GI-NEXT: ushll.4s v0, v0, #0 -; CHECK-GI-NEXT: mov.s w8, v0[1] -; CHECK-GI-NEXT: mov.s w9, v0[2] -; CHECK-GI-NEXT: fmov w11, s0 -; CHECK-GI-NEXT: mov.s w10, v0[3] +; CHECK-GI-NEXT: umov.b w8, v0[3] ; CHECK-GI-NEXT: and w8, w8, #0x1 -; CHECK-GI-NEXT: bfi w11, w8, #1, #31 -; CHECK-GI-NEXT: and w8, w9, #0x1 -; CHECK-GI-NEXT: and w9, w10, #0x1 -; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2 -; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 +; CHECK-GI-NEXT: lsl w8, w8, #3 +; CHECK-GI-NEXT: orr w8, w8, #0x7 ; CHECK-GI-NEXT: strb w8, [sp, #15] ; CHECK-GI-NEXT: and w0, w8, #0xff ; CHECK-GI-NEXT: add sp, sp, #16 @@ -456,19 +484,24 @@ define i4 @convert_to_bitmask_with_different_types_in_chain(<4 x i16> %vec1, <4 ; CHECK-GI-NEXT: sub sp, sp, #16 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 ; CHECK-GI-NEXT: cmeq.4s v1, v1, #0 -; CHECK-GI-NEXT: cmeq.4h v0, v0, #0 -; CHECK-GI-NEXT: xtn.4h v1, v1 -; CHECK-GI-NEXT: orn.8b v0, v1, v0 -; CHECK-GI-NEXT: ushll.4s v0, v0, #0 -; CHECK-GI-NEXT: mov.s w8, v0[1] -; CHECK-GI-NEXT: mov.s w9, v0[2] -; CHECK-GI-NEXT: fmov w11, s0 -; CHECK-GI-NEXT: mov.s w10, v0[3] +; CHECK-GI-NEXT: cmtst.4h v0, v0, v0 +; CHECK-GI-NEXT: mov.s w8, v1[1] +; CHECK-GI-NEXT: mov.s w9, v1[2] +; CHECK-GI-NEXT: mov.s w10, v1[3] +; CHECK-GI-NEXT: uzp1.8b v0, v0, v0 +; CHECK-GI-NEXT: mov.b v1[1], w8 +; CHECK-GI-NEXT: mov.b v1[2], w9 +; CHECK-GI-NEXT: mov.b v1[3], w10 +; CHECK-GI-NEXT: orr.8b v0, v0, v1 +; CHECK-GI-NEXT: umov.b w8, v0[1] +; CHECK-GI-NEXT: umov.b w9, v0[0] +; CHECK-GI-NEXT: umov.b w10, v0[2] +; CHECK-GI-NEXT: umov.b w11, v0[3] ; CHECK-GI-NEXT: and w8, w8, #0x1 -; CHECK-GI-NEXT: bfi w11, w8, #1, #31 -; CHECK-GI-NEXT: and w8, w9, #0x1 -; CHECK-GI-NEXT: and w9, w10, #0x1 -; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2 +; CHECK-GI-NEXT: bfi w9, w8, #1, #31 +; CHECK-GI-NEXT: and w8, w10, #0x1 +; CHECK-GI-NEXT: orr w8, w9, w8, lsl #2 +; CHECK-GI-NEXT: and w9, w11, #0x1 ; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 ; CHECK-GI-NEXT: strb w8, [sp, #15] ; CHECK-GI-NEXT: and w0, w8, #0xff