@@ -1092,3 +1092,116 @@ define void @packss_zext_v8i1() {
1092
1092
store <16 x i16 > %tmp11 , ptr undef , align 2
1093
1093
ret void
1094
1094
}
1095
+
1096
+ define <32 x i16 > @PR158415 (<8 x i8 > %arg ) {
1097
+ ; X86-AVX2-LABEL: PR158415:
1098
+ ; X86-AVX2: # %bb.0: # %entry
1099
+ ; X86-AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u],zero,xmm0[u,u,u,0,2,u,u,u,u,u,u,u,4]
1100
+ ; X86-AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,1]
1101
+ ; X86-AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1102
+ ; X86-AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,24],zero,ymm0[25],zero,ymm0[30],zero,ymm0[31],zero,ymm0[u,u,u,u,u,u,u,u]
1103
+ ; X86-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
1104
+ ; X86-AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3]
1105
+ ; X86-AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[12,13,14,15],zero,zero,ymm1[4,5,u,u,u,u,u,u,u,u,28,29,30,31],zero,zero,ymm1[20,21],zero,zero,ymm1[26,27,28,29,30,31]
1106
+ ; X86-AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,3,0,2]
1107
+ ; X86-AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1108
+ ; X86-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1
1109
+ ; X86-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2
1110
+ ; X86-AVX2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1111
+ ; X86-AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
1112
+ ; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
1113
+ ; X86-AVX2-NEXT: vpbroadcastw %xmm1, %ymm3
1114
+ ; X86-AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm3[2],ymm0[3,4,5,6,7,8,9],ymm3[10],ymm0[11,12,13,14,15]
1115
+ ; X86-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
1116
+ ; X86-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7]
1117
+ ; X86-AVX2-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,xmm2[14,15],zero,zero,zero,zero,xmm2[u,u],zero,zero
1118
+ ; X86-AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
1119
+ ; X86-AVX2-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
1120
+ ; X86-AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5],xmm1[6],xmm2[7]
1121
+ ; X86-AVX2-NEXT: retl
1122
+ ;
1123
+ ; X86-AVX512-LABEL: PR158415:
1124
+ ; X86-AVX512: # %bb.0: # %entry
1125
+ ; X86-AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u],zero,xmm0[u,u,u,0,2,u,u,u,u,u,u,u,4]
1126
+ ; X86-AVX512-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,1]
1127
+ ; X86-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1128
+ ; X86-AVX512-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
1129
+ ; X86-AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1130
+ ; X86-AVX512-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
1131
+ ; X86-AVX512-NEXT: vextracti128 $1, %ymm1, %xmm1
1132
+ ; X86-AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
1133
+ ; X86-AVX512-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1134
+ ; X86-AVX512-NEXT: vpbroadcastd %xmm0, %ymm0
1135
+ ; X86-AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1136
+ ; X86-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
1137
+ ; X86-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1138
+ ; X86-AVX512-NEXT: vextracti128 $1, %ymm1, %xmm2
1139
+ ; X86-AVX512-NEXT: vpsrld $16, %xmm2, %xmm2
1140
+ ; X86-AVX512-NEXT: vpalignr {{.*#+}} xmm2 = xmm0[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
1141
+ ; X86-AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1142
+ ; X86-AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,3]
1143
+ ; X86-AVX512-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
1144
+ ; X86-AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
1145
+ ; X86-AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1146
+ ; X86-AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0
1147
+ ; X86-AVX512-NEXT: retl
1148
+ ;
1149
+ ; X64-AVX2-LABEL: PR158415:
1150
+ ; X64-AVX2: # %bb.0: # %entry
1151
+ ; X64-AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u],zero,xmm0[u,u,u,0,2,u,u,u,u,u,u,u,4]
1152
+ ; X64-AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,1]
1153
+ ; X64-AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1154
+ ; X64-AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,24],zero,ymm0[25],zero,ymm0[30],zero,ymm0[31],zero,ymm0[u,u,u,u,u,u,u,u]
1155
+ ; X64-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
1156
+ ; X64-AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3]
1157
+ ; X64-AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[12,13,14,15],zero,zero,ymm1[4,5,u,u,u,u,u,u,u,u,28,29,30,31],zero,zero,ymm1[20,21],zero,zero,ymm1[26,27,28,29,30,31]
1158
+ ; X64-AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,3,0,2]
1159
+ ; X64-AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1160
+ ; X64-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1
1161
+ ; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2
1162
+ ; X64-AVX2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1163
+ ; X64-AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
1164
+ ; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1165
+ ; X64-AVX2-NEXT: vpbroadcastw %xmm1, %ymm3
1166
+ ; X64-AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm3[2],ymm0[3,4,5,6,7,8,9],ymm3[10],ymm0[11,12,13,14,15]
1167
+ ; X64-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
1168
+ ; X64-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7]
1169
+ ; X64-AVX2-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,xmm2[14,15],zero,zero,zero,zero,xmm2[u,u],zero,zero
1170
+ ; X64-AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
1171
+ ; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
1172
+ ; X64-AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5],xmm1[6],xmm2[7]
1173
+ ; X64-AVX2-NEXT: retq
1174
+ ;
1175
+ ; X64-AVX512-LABEL: PR158415:
1176
+ ; X64-AVX512: # %bb.0: # %entry
1177
+ ; X64-AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u],zero,xmm0[u,u,u,0,2,u,u,u,u,u,u,u,4]
1178
+ ; X64-AVX512-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,1]
1179
+ ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1180
+ ; X64-AVX512-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
1181
+ ; X64-AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1182
+ ; X64-AVX512-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
1183
+ ; X64-AVX512-NEXT: vextracti128 $1, %ymm1, %xmm1
1184
+ ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
1185
+ ; X64-AVX512-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1186
+ ; X64-AVX512-NEXT: vpbroadcastd %xmm0, %ymm0
1187
+ ; X64-AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
1188
+ ; X64-AVX512-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
1189
+ ; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1190
+ ; X64-AVX512-NEXT: vextracti128 $1, %ymm1, %xmm2
1191
+ ; X64-AVX512-NEXT: vpsrld $16, %xmm2, %xmm2
1192
+ ; X64-AVX512-NEXT: vpalignr {{.*#+}} xmm2 = xmm0[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
1193
+ ; X64-AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1194
+ ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,3]
1195
+ ; X64-AVX512-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
1196
+ ; X64-AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
1197
+ ; X64-AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1198
+ ; X64-AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
1199
+ ; X64-AVX512-NEXT: retq
1200
+ entry:
1201
+ %shuffle2 = shufflevector <8 x i8 > %arg , <8 x i8 > zeroinitializer , <32 x i32 > <i32 2 , i32 2 , i32 9 , i32 3 , i32 1 , i32 0 , i32 0 , i32 2 , i32 0 , i32 5 , i32 9 , i32 6 , i32 5 , i32 4 , i32 7 , i32 2 , i32 7 , i32 9 , i32 4 , i32 0 , i32 9 , i32 2 , i32 4 , i32 3 , i32 3 , i32 2 , i32 2 , i32 3 , i32 9 , i32 0 , i32 6 , i32 4 >
1202
+ %conv3 = zext <32 x i8 > %shuffle2 to <32 x i16 >
1203
+ %shuffle4 = shufflevector <32 x i16 > zeroinitializer , <32 x i16 > %conv3 , <32 x i32 > <i32 5 , i32 3 , i32 4 , i32 47 , i32 5 , i32 5 , i32 3 , i32 63 , i32 4 , i32 4 , i32 60 , i32 2 , i32 2 , i32 5 , i32 4 , i32 0 , i32 38 , i32 1 , i32 0 , i32 3 , i32 59 , i32 2 , i32 3 , i32 1 , i32 1 , i32 0 , i32 3 , i32 34 , i32 0 , i32 0 , i32 62 , i32 5 >
1204
+ %not = xor <32 x i16 > %shuffle4 , splat (i16 1 )
1205
+ %shuffle5 = shufflevector <32 x i16 > zeroinitializer , <32 x i16 > %not , <32 x i32 > <i32 3 , i32 9 , i32 3 , i32 1 , i32 9 , i32 8 , i32 9 , i32 2 , i32 0 , i32 8 , i32 48 , i32 8 , i32 35 , i32 3 , i32 0 , i32 4 , i32 4 , i32 7 , i32 4 , i32 39 , i32 9 , i32 0 , i32 59 , i32 6 , i32 0 , i32 4 , i32 9 , i32 1 , i32 1 , i32 2 , i32 8 , i32 9 >
1206
+ ret <32 x i16 > %shuffle5
1207
+ }
0 commit comments