@@ -3287,3 +3287,131 @@ define void @PR45604(<32 x i16>* %dst, <8 x i16>* %src) {
3287
3287
store <32 x i16 > %v3 , <32 x i16 >* %dst , align 16
3288
3288
ret void
3289
3289
}
3290
+
3291
+ ; Test case reported on D105827
3292
+ define void @SpinningCube () {
3293
+ ; SSE2-LABEL: SpinningCube:
3294
+ ; SSE2: # %bb.0: # %entry
3295
+ ; SSE2-NEXT: movl $1065353216, (%rax) # imm = 0x3F800000
3296
+ ; SSE2-NEXT: movaps {{.*#+}} xmm0 = <u,u,u,1.0E+0>
3297
+ ; SSE2-NEXT: movaps {{.*#+}} xmm1 = <0.0E+0,-2.0E+0,u,u>
3298
+ ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
3299
+ ; SSE2-NEXT: movaps %xmm2, %xmm3
3300
+ ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[1,3]
3301
+ ; SSE2-NEXT: xorps %xmm4, %xmm4
3302
+ ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,0]
3303
+ ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm0[2,3]
3304
+ ; SSE2-NEXT: addps %xmm4, %xmm2
3305
+ ; SSE2-NEXT: movaps %xmm2, (%rax)
3306
+ ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
3307
+ ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0,0,0]
3308
+ ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,1,3]
3309
+ ; SSE2-NEXT: mulps %xmm2, %xmm1
3310
+ ; SSE2-NEXT: addps %xmm0, %xmm1
3311
+ ; SSE2-NEXT: movaps %xmm1, (%rax)
3312
+ ; SSE2-NEXT: retq
3313
+ ;
3314
+ ; SSSE3-LABEL: SpinningCube:
3315
+ ; SSSE3: # %bb.0: # %entry
3316
+ ; SSSE3-NEXT: movl $1065353216, (%rax) # imm = 0x3F800000
3317
+ ; SSSE3-NEXT: movaps {{.*#+}} xmm0 = <u,u,u,1.0E+0>
3318
+ ; SSSE3-NEXT: movaps {{.*#+}} xmm1 = <0.0E+0,-2.0E+0,u,u>
3319
+ ; SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
3320
+ ; SSSE3-NEXT: movaps %xmm2, %xmm3
3321
+ ; SSSE3-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[1,3]
3322
+ ; SSSE3-NEXT: xorps %xmm4, %xmm4
3323
+ ; SSSE3-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,0]
3324
+ ; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm0[2,3]
3325
+ ; SSSE3-NEXT: addps %xmm4, %xmm2
3326
+ ; SSSE3-NEXT: movaps %xmm2, (%rax)
3327
+ ; SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
3328
+ ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,1,3]
3329
+ ; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0,0,2]
3330
+ ; SSSE3-NEXT: mulps %xmm1, %xmm2
3331
+ ; SSSE3-NEXT: addps %xmm0, %xmm2
3332
+ ; SSSE3-NEXT: movaps %xmm2, (%rax)
3333
+ ; SSSE3-NEXT: retq
3334
+ ;
3335
+ ; SSE41-LABEL: SpinningCube:
3336
+ ; SSE41: # %bb.0: # %entry
3337
+ ; SSE41-NEXT: movl $1065353216, (%rax) # imm = 0x3F800000
3338
+ ; SSE41-NEXT: movaps {{.*#+}} xmm0 = <u,u,u,1.0E+0>
3339
+ ; SSE41-NEXT: movaps {{.*#+}} xmm1 = <0.0E+0,-2.0E+0,u,u>
3340
+ ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,1,3]
3341
+ ; SSE41-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
3342
+ ; SSE41-NEXT: movaps %xmm1, %xmm3
3343
+ ; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0,1,2],xmm2[0]
3344
+ ; SSE41-NEXT: movaps %xmm0, %xmm4
3345
+ ; SSE41-NEXT: insertps {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[2,3]
3346
+ ; SSE41-NEXT: addps %xmm3, %xmm4
3347
+ ; SSE41-NEXT: movaps %xmm4, (%rax)
3348
+ ; SSE41-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
3349
+ ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0,0,2]
3350
+ ; SSE41-NEXT: mulps %xmm1, %xmm2
3351
+ ; SSE41-NEXT: addps %xmm0, %xmm2
3352
+ ; SSE41-NEXT: movaps %xmm2, (%rax)
3353
+ ; SSE41-NEXT: retq
3354
+ ;
3355
+ ; AVX1-LABEL: SpinningCube:
3356
+ ; AVX1: # %bb.0: # %entry
3357
+ ; AVX1-NEXT: movl $1065353216, (%rax) # imm = 0x3F800000
3358
+ ; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = <u,u,u,1.0E+0>
3359
+ ; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = <0.0E+0,-2.0E+0,u,u>
3360
+ ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[0,0,1,3]
3361
+ ; AVX1-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
3362
+ ; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
3363
+ ; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm0[0],xmm3[0],xmm0[2,3]
3364
+ ; AVX1-NEXT: vaddps %xmm3, %xmm2, %xmm2
3365
+ ; AVX1-NEXT: vmovaps %xmm2, (%rax)
3366
+ ; AVX1-NEXT: vbroadcastss (%rax), %xmm2
3367
+ ; AVX1-NEXT: vmulps %xmm1, %xmm2, %xmm1
3368
+ ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,3]
3369
+ ; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0
3370
+ ; AVX1-NEXT: vmovaps %xmm0, (%rax)
3371
+ ; AVX1-NEXT: retq
3372
+ ;
3373
+ ; AVX2-LABEL: SpinningCube:
3374
+ ; AVX2: # %bb.0: # %entry
3375
+ ; AVX2-NEXT: movl $1065353216, (%rax) # imm = 0x3F800000
3376
+ ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
3377
+ ; AVX2-NEXT: vmovaps {{.*#+}} xmm1 = <0.0E+0,-2.0E+0,u,u>
3378
+ ; AVX2-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[0,0,1,3]
3379
+ ; AVX2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
3380
+ ; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
3381
+ ; AVX2-NEXT: vinsertps {{.*#+}} xmm3 = xmm0[0],xmm3[0],xmm0[2,3]
3382
+ ; AVX2-NEXT: vaddps %xmm3, %xmm2, %xmm2
3383
+ ; AVX2-NEXT: vmovaps %xmm2, (%rax)
3384
+ ; AVX2-NEXT: vbroadcastss (%rax), %xmm2
3385
+ ; AVX2-NEXT: vmulps %xmm1, %xmm2, %xmm1
3386
+ ; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,3]
3387
+ ; AVX2-NEXT: vaddps %xmm0, %xmm1, %xmm0
3388
+ ; AVX2-NEXT: vmovaps %xmm0, (%rax)
3389
+ ; AVX2-NEXT: retq
3390
+ entry:
3391
+ store float 1 .000000e+00 , float * undef , align 4
3392
+ %0 = load float , float * undef , align 4
3393
+ %1 = fmul float undef , 0 .000000e+00
3394
+ %2 = insertelement <4 x float > poison, float %0 , i32 3
3395
+ %3 = load float , float * undef , align 4
3396
+ %4 = insertelement <2 x float > poison, float %3 , i32 0
3397
+ %5 = shufflevector <2 x float > %4 , <2 x float > poison, <2 x i32 > zeroinitializer
3398
+ %6 = fmul <2 x float > %5 , <float 0 .000000e+00 , float -2 .000000e+00 >
3399
+ %7 = fadd float %1 , undef
3400
+ %8 = shufflevector <2 x float > %6 , <2 x float > poison, <4 x i32 > <i32 0 , i32 1 , i32 undef , i32 undef >
3401
+ %9 = shufflevector <4 x float > undef , <4 x float > %8 , <4 x i32 > <i32 0 , i32 4 , i32 5 , i32 undef >
3402
+ %10 = insertelement <4 x float > %9 , float %7 , i32 3
3403
+ %11 = insertelement <4 x float > %2 , float 0x7FF8000000000000 , i32 1
3404
+ %12 = insertelement <4 x float > %11 , float undef , i32 0
3405
+ %13 = insertelement <4 x float > %12 , float undef , i32 2
3406
+ %14 = fadd <4 x float > %10 , %13
3407
+ store <4 x float > %14 , <4 x float >* undef , align 16
3408
+ %15 = load float , float * undef , align 4
3409
+ %16 = insertelement <2 x float > poison, float %15 , i32 0
3410
+ %17 = shufflevector <2 x float > %16 , <2 x float > poison, <2 x i32 > zeroinitializer
3411
+ %18 = fmul <2 x float > %17 , <float 0 .000000e+00 , float -2 .000000e+00 >
3412
+ %19 = shufflevector <2 x float > %18 , <2 x float > poison, <4 x i32 > <i32 0 , i32 1 , i32 undef , i32 undef >
3413
+ %20 = shufflevector <4 x float > undef , <4 x float > %19 , <4 x i32 > <i32 0 , i32 4 , i32 5 , i32 undef >
3414
+ %21 = fadd <4 x float > %20 , %2
3415
+ store <4 x float > %21 , <4 x float >* undef , align 16
3416
+ ret void
3417
+ }
0 commit comments