Skip to content

Commit ebdb903

Browse files
authored
[X86] Handle X86ISD::EXPAND/COMPRESS nodes as target shuffles (#171119)
Allows for shuffle simplification Required a minor fix to the overly reduced compress-undef-float-passthrough.ll regression test
1 parent b08c72b commit ebdb903

File tree

3 files changed

+59
-58
lines changed

3 files changed

+59
-58
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2888,6 +2888,8 @@ static bool isTargetShuffle(unsigned Opcode) {
28882888
case X86ISD::VPERMV:
28892889
case X86ISD::VPERMV3:
28902890
case X86ISD::VZEXT_MOVL:
2891+
case X86ISD::COMPRESS:
2892+
case X86ISD::EXPAND:
28912893
return true;
28922894
}
28932895
}
@@ -5839,6 +5841,48 @@ static bool getTargetShuffleMask(SDValue N, bool AllowSentinelZero,
58395841
}
58405842
return false;
58415843
}
5844+
case X86ISD::COMPRESS: {
5845+
SDValue CmpVec = N.getOperand(0);
5846+
SDValue PassThru = N.getOperand(1);
5847+
SDValue CmpMask = N.getOperand(2);
5848+
APInt UndefElts;
5849+
SmallVector<APInt> EltBits;
5850+
if (!getTargetConstantBitsFromNode(CmpMask, 1, UndefElts, EltBits))
5851+
return false;
5852+
assert(UndefElts.getBitWidth() == NumElems && EltBits.size() == NumElems &&
5853+
"Illegal compression mask");
5854+
for (unsigned I = 0; I != NumElems; ++I) {
5855+
if (!EltBits[I].isZero())
5856+
Mask.push_back(I);
5857+
}
5858+
while (Mask.size() != NumElems) {
5859+
Mask.push_back(NumElems + Mask.size());
5860+
}
5861+
Ops.push_back(CmpVec);
5862+
Ops.push_back(PassThru);
5863+
return true;
5864+
}
5865+
case X86ISD::EXPAND: {
5866+
SDValue ExpVec = N.getOperand(0);
5867+
SDValue PassThru = N.getOperand(1);
5868+
SDValue ExpMask = N.getOperand(2);
5869+
APInt UndefElts;
5870+
SmallVector<APInt> EltBits;
5871+
if (!getTargetConstantBitsFromNode(ExpMask, 1, UndefElts, EltBits))
5872+
return false;
5873+
assert(UndefElts.getBitWidth() == NumElems && EltBits.size() == NumElems &&
5874+
"Illegal expansion mask");
5875+
unsigned ExpIndex = 0;
5876+
for (unsigned I = 0; I != NumElems; ++I) {
5877+
if (EltBits[I].isZero())
5878+
Mask.push_back(I + NumElems);
5879+
else
5880+
Mask.push_back(ExpIndex++);
5881+
}
5882+
Ops.push_back(ExpVec);
5883+
Ops.push_back(PassThru);
5884+
return true;
5885+
}
58425886
default:
58435887
llvm_unreachable("unknown target shuffle node");
58445888
}
@@ -61325,6 +61369,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
6132561369
case X86ISD::VPERM2X128:
6132661370
case X86ISD::SHUF128:
6132761371
case X86ISD::VZEXT_MOVL:
61372+
case X86ISD::COMPRESS:
61373+
case X86ISD::EXPAND:
6132861374
case ISD::VECTOR_SHUFFLE: return combineShuffle(N, DAG, DCI,Subtarget);
6132961375
case X86ISD::FMADD_RND:
6133061376
case X86ISD::FMSUB:

llvm/test/CodeGen/X86/compress-undef-float-passthrough.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512f,avx512vl | FileCheck %s --check-prefix=CHECK
33

4-
define void @test_compress_undef_float_passthrough() {
4+
define void @test_compress_undef_float_passthrough(<4 x double> %a0) {
55
; CHECK-LABEL: test_compress_undef_float_passthrough:
66
; CHECK: # %bb.0: # %entry
77
; CHECK-NEXT: movb $5, %al
@@ -12,7 +12,7 @@ define void @test_compress_undef_float_passthrough() {
1212
; CHECK-NEXT: retq
1313
entry: ; preds = %loop.50
1414
%0 = bitcast i4 undef to <4 x i1>
15-
%1 = call <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double> undef, <4 x double> undef, <4 x i1> <i1 1, i1 0, i1 1, i1 0>)
15+
%1 = call <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double> %a0, <4 x double> undef, <4 x i1> <i1 1, i1 0, i1 1, i1 0>)
1616
call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %1, <4 x ptr> undef, i32 0, <4 x i1> %0)
1717
ret void
1818
}

llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll

Lines changed: 11 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1035,68 +1035,23 @@ define <8 x double> @concat_vpermilvar_v8f64_v4f64(<4 x double> %a0, <4 x double
10351035
ret <8 x double> %res
10361036
}
10371037

1038-
; TODO - shift elements up by one
1038+
; shift elements up by one
10391039
define <16 x i32> @combine_vexpandd_as_valignd(<16 x i32> %x) {
1040-
; X86-AVX512F-LABEL: combine_vexpandd_as_valignd:
1041-
; X86-AVX512F: # %bb.0:
1042-
; X86-AVX512F-NEXT: movw $-2, %ax
1043-
; X86-AVX512F-NEXT: kmovw %eax, %k1
1044-
; X86-AVX512F-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z}
1045-
; X86-AVX512F-NEXT: retl
1046-
;
1047-
; X86-AVX512BW-LABEL: combine_vexpandd_as_valignd:
1048-
; X86-AVX512BW: # %bb.0:
1049-
; X86-AVX512BW-NEXT: movw $-2, %ax
1050-
; X86-AVX512BW-NEXT: kmovd %eax, %k1
1051-
; X86-AVX512BW-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z}
1052-
; X86-AVX512BW-NEXT: retl
1053-
;
1054-
; X64-AVX512F-LABEL: combine_vexpandd_as_valignd:
1055-
; X64-AVX512F: # %bb.0:
1056-
; X64-AVX512F-NEXT: movw $-2, %ax
1057-
; X64-AVX512F-NEXT: kmovw %eax, %k1
1058-
; X64-AVX512F-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z}
1059-
; X64-AVX512F-NEXT: retq
1060-
;
1061-
; X64-AVX512BW-LABEL: combine_vexpandd_as_valignd:
1062-
; X64-AVX512BW: # %bb.0:
1063-
; X64-AVX512BW-NEXT: movw $-2, %ax
1064-
; X64-AVX512BW-NEXT: kmovd %eax, %k1
1065-
; X64-AVX512BW-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z}
1066-
; X64-AVX512BW-NEXT: retq
1040+
; CHECK-LABEL: combine_vexpandd_as_valignd:
1041+
; CHECK: # %bb.0:
1042+
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
1043+
; CHECK-NEXT: valignd {{.*#+}} zmm0 = zmm1[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
1044+
; CHECK-NEXT: ret{{[l|q]}}
10671045
%res = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> %x, <16 x i32> zeroinitializer, <16 x i1> <i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
10681046
ret <16 x i32> %res
10691047
}
10701048

1071-
; TODO - zero upper half of vector
1049+
; zero upper half of vector
10721050
define <16 x i32> @combine_vcompressd_as_vmov(<16 x i32> %x) {
1073-
; X86-AVX512F-LABEL: combine_vcompressd_as_vmov:
1074-
; X86-AVX512F: # %bb.0:
1075-
; X86-AVX512F-NEXT: movw $255, %ax
1076-
; X86-AVX512F-NEXT: kmovw %eax, %k1
1077-
; X86-AVX512F-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z}
1078-
; X86-AVX512F-NEXT: retl
1079-
;
1080-
; X86-AVX512BW-LABEL: combine_vcompressd_as_vmov:
1081-
; X86-AVX512BW: # %bb.0:
1082-
; X86-AVX512BW-NEXT: movw $255, %ax
1083-
; X86-AVX512BW-NEXT: kmovd %eax, %k1
1084-
; X86-AVX512BW-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z}
1085-
; X86-AVX512BW-NEXT: retl
1086-
;
1087-
; X64-AVX512F-LABEL: combine_vcompressd_as_vmov:
1088-
; X64-AVX512F: # %bb.0:
1089-
; X64-AVX512F-NEXT: movw $255, %ax
1090-
; X64-AVX512F-NEXT: kmovw %eax, %k1
1091-
; X64-AVX512F-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z}
1092-
; X64-AVX512F-NEXT: retq
1093-
;
1094-
; X64-AVX512BW-LABEL: combine_vcompressd_as_vmov:
1095-
; X64-AVX512BW: # %bb.0:
1096-
; X64-AVX512BW-NEXT: movw $255, %ax
1097-
; X64-AVX512BW-NEXT: kmovd %eax, %k1
1098-
; X64-AVX512BW-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z}
1099-
; X64-AVX512BW-NEXT: retq
1051+
; CHECK-LABEL: combine_vcompressd_as_vmov:
1052+
; CHECK: # %bb.0:
1053+
; CHECK-NEXT: vmovaps %ymm0, %ymm0
1054+
; CHECK-NEXT: ret{{[l|q]}}
11001055
%res = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> %x, <16 x i32> zeroinitializer, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>)
11011056
ret <16 x i32> %res
11021057
}

0 commit comments

Comments
 (0)