Skip to content

Commit 3a6781e

Browse files
authored
[X86] vector-shuffle-combining-avx512f.ll - add tests showing failure to simplify expand/compress nodes (#171113)
1 parent 32ff710 commit 3a6781e

File tree

1 file changed

+69
-0
lines changed

1 file changed

+69
-0
lines changed

llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x
2222
declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2323
declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2424

25+
declare <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32>, <16 x i32>, <16 x i1>)
26+
declare <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32>, <16 x i32>, <16 x i1>)
27+
2528
define <8 x double> @combine_permvar_8f64_identity(<8 x double> %x0, <8 x double> %x1) {
2629
; CHECK-LABEL: combine_permvar_8f64_identity:
2730
; CHECK: # %bb.0:
@@ -1031,3 +1034,69 @@ define <8 x double> @concat_vpermilvar_v8f64_v4f64(<4 x double> %a0, <4 x double
10311034
%res = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10321035
ret <8 x double> %res
10331036
}
1037+
1038+
; TODO - shift elements up by one
1039+
define <16 x i32> @combine_vexpandd_as_valignd(<16 x i32> %x) {
1040+
; X86-AVX512F-LABEL: combine_vexpandd_as_valignd:
1041+
; X86-AVX512F: # %bb.0:
1042+
; X86-AVX512F-NEXT: movw $-2, %ax
1043+
; X86-AVX512F-NEXT: kmovw %eax, %k1
1044+
; X86-AVX512F-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z}
1045+
; X86-AVX512F-NEXT: retl
1046+
;
1047+
; X86-AVX512BW-LABEL: combine_vexpandd_as_valignd:
1048+
; X86-AVX512BW: # %bb.0:
1049+
; X86-AVX512BW-NEXT: movw $-2, %ax
1050+
; X86-AVX512BW-NEXT: kmovd %eax, %k1
1051+
; X86-AVX512BW-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z}
1052+
; X86-AVX512BW-NEXT: retl
1053+
;
1054+
; X64-AVX512F-LABEL: combine_vexpandd_as_valignd:
1055+
; X64-AVX512F: # %bb.0:
1056+
; X64-AVX512F-NEXT: movw $-2, %ax
1057+
; X64-AVX512F-NEXT: kmovw %eax, %k1
1058+
; X64-AVX512F-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z}
1059+
; X64-AVX512F-NEXT: retq
1060+
;
1061+
; X64-AVX512BW-LABEL: combine_vexpandd_as_valignd:
1062+
; X64-AVX512BW: # %bb.0:
1063+
; X64-AVX512BW-NEXT: movw $-2, %ax
1064+
; X64-AVX512BW-NEXT: kmovd %eax, %k1
1065+
; X64-AVX512BW-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z}
1066+
; X64-AVX512BW-NEXT: retq
1067+
%res = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> %x, <16 x i32> zeroinitializer, <16 x i1> <i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
1068+
ret <16 x i32> %res
1069+
}
1070+
1071+
; TODO - zero upper half of vector
1072+
define <16 x i32> @combine_vcompressd_as_vmov(<16 x i32> %x) {
1073+
; X86-AVX512F-LABEL: combine_vcompressd_as_vmov:
1074+
; X86-AVX512F: # %bb.0:
1075+
; X86-AVX512F-NEXT: movw $255, %ax
1076+
; X86-AVX512F-NEXT: kmovw %eax, %k1
1077+
; X86-AVX512F-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z}
1078+
; X86-AVX512F-NEXT: retl
1079+
;
1080+
; X86-AVX512BW-LABEL: combine_vcompressd_as_vmov:
1081+
; X86-AVX512BW: # %bb.0:
1082+
; X86-AVX512BW-NEXT: movw $255, %ax
1083+
; X86-AVX512BW-NEXT: kmovd %eax, %k1
1084+
; X86-AVX512BW-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z}
1085+
; X86-AVX512BW-NEXT: retl
1086+
;
1087+
; X64-AVX512F-LABEL: combine_vcompressd_as_vmov:
1088+
; X64-AVX512F: # %bb.0:
1089+
; X64-AVX512F-NEXT: movw $255, %ax
1090+
; X64-AVX512F-NEXT: kmovw %eax, %k1
1091+
; X64-AVX512F-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z}
1092+
; X64-AVX512F-NEXT: retq
1093+
;
1094+
; X64-AVX512BW-LABEL: combine_vcompressd_as_vmov:
1095+
; X64-AVX512BW: # %bb.0:
1096+
; X64-AVX512BW-NEXT: movw $255, %ax
1097+
; X64-AVX512BW-NEXT: kmovd %eax, %k1
1098+
; X64-AVX512BW-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z}
1099+
; X64-AVX512BW-NEXT: retq
1100+
%res = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> %x, <16 x i32> zeroinitializer, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>)
1101+
ret <16 x i32> %res
1102+
}

0 commit comments

Comments
 (0)