@@ -7376,6 +7376,160 @@ define <16 x i16> @shuffle_v16i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_1
7376
7376
ret <16 x i16 > %shuffle
7377
7377
}
7378
7378
7379
+ define <16 x i16 > @shuffle_v16i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08 (<16 x i16 > noundef %x ) {
7380
+ ; AVX1-LABEL: shuffle_v16i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08:
7381
+ ; AVX1: # %bb.0:
7382
+ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7383
+ ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
7384
+ ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7385
+ ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7386
+ ; AVX1-NEXT: retq
7387
+ ;
7388
+ ; AVX2-LABEL: shuffle_v16i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08:
7389
+ ; AVX2: # %bb.0:
7390
+ ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
7391
+ ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
7392
+ ; AVX2-NEXT: retq
7393
+ ;
7394
+ ; AVX512VL-SLOW-LABEL: shuffle_v16i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08:
7395
+ ; AVX512VL-SLOW: # %bb.0:
7396
+ ; AVX512VL-SLOW-NEXT: vextracti128 $1, %ymm0, %xmm0
7397
+ ; AVX512VL-SLOW-NEXT: vpbroadcastw %xmm0, %ymm0
7398
+ ; AVX512VL-SLOW-NEXT: retq
7399
+ ;
7400
+ ; AVX512VL-FAST-CROSSLANE-LABEL: shuffle_v16i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08:
7401
+ ; AVX512VL-FAST-CROSSLANE: # %bb.0:
7402
+ ; AVX512VL-FAST-CROSSLANE-NEXT: vpbroadcastw {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
7403
+ ; AVX512VL-FAST-CROSSLANE-NEXT: vpermw %ymm0, %ymm1, %ymm0
7404
+ ; AVX512VL-FAST-CROSSLANE-NEXT: retq
7405
+ ;
7406
+ ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v16i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08:
7407
+ ; AVX512VL-FAST-PERLANE: # %bb.0:
7408
+ ; AVX512VL-FAST-PERLANE-NEXT: vextracti128 $1, %ymm0, %xmm0
7409
+ ; AVX512VL-FAST-PERLANE-NEXT: vpbroadcastw %xmm0, %ymm0
7410
+ ; AVX512VL-FAST-PERLANE-NEXT: retq
7411
+ ;
7412
+ ; XOPAVX1-LABEL: shuffle_v16i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08:
7413
+ ; XOPAVX1: # %bb.0:
7414
+ ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7415
+ ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
7416
+ ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7417
+ ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7418
+ ; XOPAVX1-NEXT: retq
7419
+ ;
7420
+ ; XOPAVX2-LABEL: shuffle_v16i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08:
7421
+ ; XOPAVX2: # %bb.0:
7422
+ ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
7423
+ ; XOPAVX2-NEXT: vpbroadcastw %xmm0, %ymm0
7424
+ ; XOPAVX2-NEXT: retq
7425
+ %r = shufflevector <16 x i16 > %x , <16 x i16 > poison, <16 x i32 > splat(i32 8 )
7426
+ ret <16 x i16 > %r
7427
+ }
7428
+
7429
+ define <16 x i16 > @shuffle_v16i16_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11 (<16 x i16 > noundef %x ) {
7430
+ ; AVX1-LABEL: shuffle_v16i16_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11:
7431
+ ; AVX1: # %bb.0:
7432
+ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7433
+ ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
7434
+ ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7435
+ ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7436
+ ; AVX1-NEXT: retq
7437
+ ;
7438
+ ; AVX2-LABEL: shuffle_v16i16_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11:
7439
+ ; AVX2: # %bb.0:
7440
+ ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15]
7441
+ ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
7442
+ ; AVX2-NEXT: retq
7443
+ ;
7444
+ ; AVX512VL-SLOW-LABEL: shuffle_v16i16_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11:
7445
+ ; AVX512VL-SLOW: # %bb.0:
7446
+ ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15]
7447
+ ; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
7448
+ ; AVX512VL-SLOW-NEXT: retq
7449
+ ;
7450
+ ; AVX512VL-FAST-CROSSLANE-LABEL: shuffle_v16i16_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11:
7451
+ ; AVX512VL-FAST-CROSSLANE: # %bb.0:
7452
+ ; AVX512VL-FAST-CROSSLANE-NEXT: vpbroadcastw {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11]
7453
+ ; AVX512VL-FAST-CROSSLANE-NEXT: vpermw %ymm0, %ymm1, %ymm0
7454
+ ; AVX512VL-FAST-CROSSLANE-NEXT: retq
7455
+ ;
7456
+ ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v16i16_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11:
7457
+ ; AVX512VL-FAST-PERLANE: # %bb.0:
7458
+ ; AVX512VL-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15]
7459
+ ; AVX512VL-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
7460
+ ; AVX512VL-FAST-PERLANE-NEXT: retq
7461
+ ;
7462
+ ; XOPAVX1-LABEL: shuffle_v16i16_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11:
7463
+ ; XOPAVX1: # %bb.0:
7464
+ ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7465
+ ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
7466
+ ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7467
+ ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7468
+ ; XOPAVX1-NEXT: retq
7469
+ ;
7470
+ ; XOPAVX2-LABEL: shuffle_v16i16_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11:
7471
+ ; XOPAVX2: # %bb.0:
7472
+ ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15]
7473
+ ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
7474
+ ; XOPAVX2-NEXT: retq
7475
+ %r = shufflevector <16 x i16 > %x , <16 x i16 > poison, <16 x i32 > splat(i32 11 )
7476
+ ret <16 x i16 > %r
7477
+ }
7478
+
7479
+ ; PR129276
7480
+ define <16 x i16 > @shuffle_v16i16_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15 (<16 x i16 > noundef %x ) {
7481
+ ; AVX1-LABEL: shuffle_v16i16_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15:
7482
+ ; AVX1: # %bb.0:
7483
+ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7484
+ ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
7485
+ ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
7486
+ ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7487
+ ; AVX1-NEXT: retq
7488
+ ;
7489
+ ; AVX2-SLOW-LABEL: shuffle_v16i16_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15:
7490
+ ; AVX2-SLOW: # %bb.0:
7491
+ ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
7492
+ ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
7493
+ ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
7494
+ ; AVX2-SLOW-NEXT: retq
7495
+ ;
7496
+ ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15:
7497
+ ; AVX2-FAST-ALL: # %bb.0:
7498
+ ; AVX2-FAST-ALL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
7499
+ ; AVX2-FAST-ALL-NEXT: vpbroadcastd {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6]
7500
+ ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
7501
+ ; AVX2-FAST-ALL-NEXT: retq
7502
+ ;
7503
+ ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15:
7504
+ ; AVX2-FAST-PERLANE: # %bb.0:
7505
+ ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,30,31,30,31,30,31,30,31,u,u,u,u,u,u,u,u]
7506
+ ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
7507
+ ; AVX2-FAST-PERLANE-NEXT: retq
7508
+ ;
7509
+ ; AVX512VL-LABEL: shuffle_v16i16_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15:
7510
+ ; AVX512VL: # %bb.0:
7511
+ ; AVX512VL-NEXT: vpbroadcastw {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7512
+ ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
7513
+ ; AVX512VL-NEXT: retq
7514
+ ;
7515
+ ; XOPAVX1-LABEL: shuffle_v16i16_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15:
7516
+ ; XOPAVX1: # %bb.0:
7517
+ ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7518
+ ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
7519
+ ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
7520
+ ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7521
+ ; XOPAVX1-NEXT: retq
7522
+ ;
7523
+ ; XOPAVX2-LABEL: shuffle_v16i16_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15:
7524
+ ; XOPAVX2: # %bb.0:
7525
+ ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
7526
+ ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
7527
+ ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
7528
+ ; XOPAVX2-NEXT: retq
7529
+ %r = shufflevector <16 x i16 > %x , <16 x i16 > poison, <16 x i32 > splat(i32 15 )
7530
+ ret <16 x i16 > %r
7531
+ }
7532
+
7379
7533
define <16 x i16 > @insert_v16i16_0elt_into_zero_vector (ptr %ptr ) {
7380
7534
; ALL-LABEL: insert_v16i16_0elt_into_zero_vector:
7381
7535
; ALL: # %bb.0:
0 commit comments