@@ -5045,6 +5045,64 @@ define <32 x i8> @shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_
50455045 ret <32 x i8 > %5
50465046}
50475047
5048+ ; PR116931
5049+ define void @shuffle_v32i8_store_00_08_16_24_01_09_17_25_02_10_18_26_03_11_19_27_04_12_20_28_05_13_21_29_06_14_22_30_07_15_23_31 (ptr %out , <32 x i8 > %a0 ) {
5050+ ; AVX1-LABEL: shuffle_v32i8_store_00_08_16_24_01_09_17_25_02_10_18_26_03_11_19_27_04_12_20_28_05_13_21_29_06_14_22_30_07_15_23_31:
5051+ ; AVX1: # %bb.0:
5052+ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
5053+ ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,8,1,9,2,10,3,11,4,12,5,13,6,14,7,15]
5054+ ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
5055+ ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
5056+ ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
5057+ ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
5058+ ; AVX1-NEXT: vmovdqa %xmm0, 16(%rdi)
5059+ ; AVX1-NEXT: vmovdqa %xmm2, (%rdi)
5060+ ; AVX1-NEXT: vzeroupper
5061+ ; AVX1-NEXT: retq
5062+ ;
5063+ ; AVX2OR512VL-LABEL: shuffle_v32i8_store_00_08_16_24_01_09_17_25_02_10_18_26_03_11_19_27_04_12_20_28_05_13_21_29_06_14_22_30_07_15_23_31:
5064+ ; AVX2OR512VL: # %bb.0:
5065+ ; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
5066+ ; AVX2OR512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [0,8,1,9,2,10,3,11,4,12,5,13,6,14,7,15]
5067+ ; AVX2OR512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0
5068+ ; AVX2OR512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm1
5069+ ; AVX2OR512VL-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
5070+ ; AVX2OR512VL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
5071+ ; AVX2OR512VL-NEXT: vmovdqa %xmm0, 16(%rdi)
5072+ ; AVX2OR512VL-NEXT: vmovdqa %xmm2, (%rdi)
5073+ ; AVX2OR512VL-NEXT: vzeroupper
5074+ ; AVX2OR512VL-NEXT: retq
5075+ ;
5076+ ; XOPAVX1-LABEL: shuffle_v32i8_store_00_08_16_24_01_09_17_25_02_10_18_26_03_11_19_27_04_12_20_28_05_13_21_29_06_14_22_30_07_15_23_31:
5077+ ; XOPAVX1: # %bb.0:
5078+ ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
5079+ ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,8,1,9,2,10,3,11,4,12,5,13,6,14,7,15]
5080+ ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
5081+ ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
5082+ ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
5083+ ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
5084+ ; XOPAVX1-NEXT: vmovdqa %xmm0, 16(%rdi)
5085+ ; XOPAVX1-NEXT: vmovdqa %xmm2, (%rdi)
5086+ ; XOPAVX1-NEXT: vzeroupper
5087+ ; XOPAVX1-NEXT: retq
5088+ ;
5089+ ; XOPAVX2-LABEL: shuffle_v32i8_store_00_08_16_24_01_09_17_25_02_10_18_26_03_11_19_27_04_12_20_28_05_13_21_29_06_14_22_30_07_15_23_31:
5090+ ; XOPAVX2: # %bb.0:
5091+ ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
5092+ ; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,8,1,9,2,10,3,11,4,12,5,13,6,14,7,15]
5093+ ; XOPAVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
5094+ ; XOPAVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
5095+ ; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
5096+ ; XOPAVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
5097+ ; XOPAVX2-NEXT: vmovdqa %xmm0, 16(%rdi)
5098+ ; XOPAVX2-NEXT: vmovdqa %xmm2, (%rdi)
5099+ ; XOPAVX2-NEXT: vzeroupper
5100+ ; XOPAVX2-NEXT: retq
5101+ %r = shufflevector <32 x i8 > %a0 , <32 x i8 > poison, <32 x i32 > <i32 0 , i32 8 , i32 16 , i32 24 , i32 1 , i32 9 , i32 17 , i32 25 , i32 2 , i32 10 , i32 18 , i32 26 , i32 3 , i32 11 , i32 19 , i32 27 , i32 4 , i32 12 , i32 20 , i32 28 , i32 5 , i32 13 , i32 21 , i32 29 , i32 6 , i32 14 , i32 22 , i32 30 , i32 7 , i32 15 , i32 23 , i32 31 >
5102+ store <32 x i8 > %r , ptr %out , align 32
5103+ ret void
5104+ }
5105+
50485106define <4 x i64 > @PR28136 (<32 x i8 > %a0 , <32 x i8 > %a1 ) {
50495107; AVX1-LABEL: PR28136:
50505108; AVX1: # %bb.0:
0 commit comments