|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 |
| 2 | +; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake-avx512 | FileCheck %s |
| 3 | + |
| 4 | +define <8 x float> @test(ptr %x, float %v, float %a) { |
| 5 | +; CHECK-LABEL: define <8 x float> @test( |
| 6 | +; CHECK-SAME: ptr [[X:%.*]], float [[V:%.*]], float [[A:%.*]]) #[[ATTR0:[0-9]+]] { |
| 7 | +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[A]], i32 0 |
| 8 | +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> zeroinitializer |
| 9 | +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> poison, float [[V]], i32 0 |
| 10 | +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <4 x i32> zeroinitializer |
| 11 | +; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP3]], [[TMP7]] |
| 12 | +; CHECK-NEXT: [[TMP6:%.*]] = load <2 x float>, ptr [[X]], align 4 |
| 13 | +; CHECK-NEXT: [[TMP11:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP7]], <2 x float> [[TMP6]], i64 0) |
| 14 | +; CHECK-NEXT: [[TMP12:%.*]] = fadd <4 x float> [[TMP3]], [[TMP11]] |
| 15 | +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| 16 | +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP12]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> |
| 17 | +; CHECK-NEXT: [[I71:%.*]] = shufflevector <8 x float> [[TMP9]], <8 x float> [[TMP10]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7> |
| 18 | +; CHECK-NEXT: ret <8 x float> [[I71]] |
| 19 | +; |
| 20 | + %gep1 = getelementptr inbounds <4 x float>, ptr %x, i64 0, i64 1 |
| 21 | + %x0 = load float, ptr %x, align 4 |
| 22 | + %x1 = load float, ptr %gep1, align 4 |
| 23 | + %add1 = fadd float %a, %v |
| 24 | + %add2 = fadd float %a, %v |
| 25 | + %add3 = fadd float %a, %v |
| 26 | + %add4 = fadd float %a, %v |
| 27 | + %add5 = fadd float %a, %x0 |
| 28 | + %add6 = fadd float %a, %x1 |
| 29 | + %add7 = fadd float %a, %v |
| 30 | + %add8 = fadd float %a, %v |
| 31 | + %i0 = insertelement <8 x float> undef, float %add1, i32 0 |
| 32 | + %i1 = insertelement <8 x float> %i0, float %add2, i32 1 |
| 33 | + %i2 = insertelement <8 x float> %i1, float %add3, i32 2 |
| 34 | + %i3 = insertelement <8 x float> %i2, float %add4, i32 3 |
| 35 | + %i4 = insertelement <8 x float> %i3, float %add5, i32 0 |
| 36 | + %i5 = insertelement <8 x float> %i4, float %add6, i32 1 |
| 37 | + %i6 = insertelement <8 x float> %i5, float %add7, i32 2 |
| 38 | + %i7 = insertelement <8 x float> %i6, float %add8, i32 3 |
| 39 | + ret <8 x float> %i7 |
| 40 | +} |
0 commit comments