From 55d5f62faca57cb80628bd3ba8fafcd724a93d5e Mon Sep 17 00:00:00 2001
From: Hassnaa Hamdi <hassnaa.hamdi@arm.com>
Date: Tue, 22 Oct 2024 06:28:59 +0000
Subject: [PATCH] [Associativity]: Make FMul associative operator

Change-Id: I8231774b6fa7c446d221a4e32098788af32072bb
---
 llvm/include/llvm/IR/Instruction.h            |  4 +--
 .../identical-subrange-spill-infloop.ll       |  8 +++--
 .../InstCombine/2006-10-26-VectorReassoc.ll   | 11 +++----
 llvm/test/Transforms/InstCombine/fast-math.ll |  3 +-
 llvm/test/Transforms/InstCombine/fdiv.ll      |  3 +-
 llvm/test/Transforms/LICM/hoist-binop.ll      |  5 +--
 .../X86/extractelement-multi-register-use.ll  | 15 +++++----
 .../buildvector-nodes-dependency.ll           | 32 +++++++------------
 .../Transforms/SLPVectorizer/reschedule.ll    |  8 ++---
 9 files changed, 39 insertions(+), 50 deletions(-)
diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h
index 61dba265dc948..cfbdf248edf8e 100644
--- a/llvm/include/llvm/IR/Instruction.h
+++ b/llvm/include/llvm/IR/Instruction.h
@@ -696,8 +696,8 @@ class Instruction : public User,
   ///
   bool isAssociative() const LLVM_READONLY;
   static bool isAssociative(unsigned Opcode) {
-    return Opcode == And || Opcode == Or || Opcode == Xor ||
-           Opcode == Add || Opcode == Mul;
+    return Opcode == And || Opcode == Or || Opcode == Xor || Opcode == Add ||
+           Opcode == Mul || Opcode == FMul;
   }
 
   /// Return true if the instruction is commutative:
diff --git a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
index a4a8f43646d4b..61783a7e82c5d 100644
--- a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
+++ b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
@@ -139,11 +139,14 @@ define void @main(i1 %arg) #0 {
 ; CHECK-NEXT:    v_readlane_b32 s38, v7, 2
 ; CHECK-NEXT:    v_readlane_b32 s39, v7, 3
 ; CHECK-NEXT:    v_readlane_b32 s40, v7, 4
-; CHECK-NEXT:    image_sample_lz v3, v[1:2], s[44:51], s[20:23] dmask:0x1
-; CHECK-NEXT:    v_mov_b32_e32 v2, 0
+; CHECK-NEXT:    image_sample_lz v2, v[1:2], s[44:51], s[20:23] dmask:0x1
 ; CHECK-NEXT:    v_readlane_b32 s41, v7, 5
 ; CHECK-NEXT:    v_readlane_b32 s42, v7, 6
 ; CHECK-NEXT:    v_readlane_b32 s43, v7, 7
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    v_mul_f32_e32 v3, v0, v2
+; CHECK-NEXT:    v_mov_b32_e32 v2, 0
+; CHECK-NEXT:    ; implicit-def: $vgpr0
 ; CHECK-NEXT:  .LBB0_2: ; %bb50
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    v_readlane_b32 s36, v7, 32
@@ -162,7 +165,6 @@ define void @main(i1 %arg) #0 {
 ; CHECK-NEXT:    image_sample_lz v1, v[1:2], s[12:19], s[20:23] dmask:0x1
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    v_sub_f32_e32 v1, v1, v4
-; CHECK-NEXT:    v_mul_f32_e32 v1, v1, v0
 ; CHECK-NEXT:    v_mul_f32_e32 v1, v1, v3
 ; CHECK-NEXT:    s_mov_b64 vcc, vcc
 ; CHECK-NEXT:    s_cbranch_vccnz .LBB0_2
diff --git a/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll b/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
index fb860a5e7bdf3..8a9f023aef36a 100644
--- a/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
+++ b/llvm/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
@@ -4,9 +4,8 @@
 ; Verify this doesn't fold when no fast-math-flags are specified
 define <4 x float> @test_fmul(<4 x float> %V) {
 ; CHECK-LABEL: @test_fmul(
-; CHECK-NEXT:     [[TMP1:%.*]] = fmul <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
-; CHECK-NEXT:     [[TMP2:%.*]] = fmul <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00>
-; CHECK-NEXT:     ret <4 x float> [[TMP2]]
+; CHECK-NEXT:     [[TMP1:%.*]] = fmul <4 x float> [[V:%.*]], <float 1.000000e+00, float 4.000000e+05, float -9.000000e+00, float 1.600000e+01>
+; CHECK-NEXT:     ret <4 x float> [[TMP1]]
         %Y = fmul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
         %Z = fmul <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
         ret <4 x float> %Z
@@ -35,12 +34,10 @@ define <4 x float> @test_fmul_reassoc_nsz(<4 x float> %V) {
 }
 
 ; (V * C1) * C2 => V * (C1 * C2)
-; TODO: This doesn't require 'nsz'.  It should fold to V * { 1.0, 4.0e+05, -9.0, 16.0 }
 define <4 x float> @test_fmul_reassoc(<4 x float> %V) {
 ; CHECK-LABEL: @test_fmul_reassoc(
-; CHECK-NEXT:     [[TMP1:%.*]] = fmul reassoc <4 x float> [[V:%.*]], <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
-; CHECK-NEXT:     [[TMP2:%.*]] = fmul reassoc <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00>
-; CHECK-NEXT:     ret <4 x float> [[TMP2]]
+; CHECK-NEXT:     [[TMP1:%.*]] = fmul reassoc <4 x float> [[V]], <float 1.000000e+00, float 4.000000e+05, float -9.000000e+00, float 1.600000e+01>
+; CHECK-NEXT:     ret <4 x float> [[TMP1]]
         %Y = fmul reassoc <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >
         %Z = fmul reassoc <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >
         ret <4 x float> %Z
diff --git a/llvm/test/Transforms/InstCombine/fast-math.ll b/llvm/test/Transforms/InstCombine/fast-math.ll
index 32f136d53fab4..c17fd9a8c5a68 100644
--- a/llvm/test/Transforms/InstCombine/fast-math.ll
+++ b/llvm/test/Transforms/InstCombine/fast-math.ll
@@ -17,8 +17,7 @@ define float @fold(float %a) {
 ; fixed FP mode.
 define float @notfold(float %a) {
 ; CHECK-LABEL: @notfold(
-; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[A:%.*]], 0x3FF3333340000000
-; CHECK-NEXT:    [[MUL1:%.*]] = fmul float [[MUL]], 0x4002666660000000
+; CHECK-NEXT:    [[MUL1:%.*]] = fmul float [[A:%.*]], 0x4006147AE0000000
 ; CHECK-NEXT:    ret float [[MUL1]]
 ;
   %mul = fmul fast float %a, 0x3FF3333340000000
diff --git a/llvm/test/Transforms/InstCombine/fdiv.ll b/llvm/test/Transforms/InstCombine/fdiv.ll
index 12d6e6463de65..a35338ad3f370 100644
--- a/llvm/test/Transforms/InstCombine/fdiv.ll
+++ b/llvm/test/Transforms/InstCombine/fdiv.ll
@@ -524,8 +524,7 @@ define <2 x float> @div_constant_dividend2_reassoc_only(<2 x float> %x) {
 
 define <2 x float> @div_constant_dividend3(<2 x float> %x) {
 ; CHECK-LABEL: @div_constant_dividend3(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc arcp <2 x float> [[X:%.*]], <float 1.500000e+01, float -7.000000e+00>
-; CHECK-NEXT:    [[T2:%.*]] = fmul reassoc arcp <2 x float> [[TMP1]], <float 0x3FD5555560000000, float 0x3FC24924A0000000>
+; CHECK-NEXT:    [[T2:%.*]] = fmul reassoc arcp <2 x float> [[X:%.*]], <float 5.000000e+00, float -1.000000e+00>
 ; CHECK-NEXT:    ret <2 x float> [[T2]]
 ;
   %t1 = fdiv <2 x float> <float 3.0e0, float 7.0e0>, %x
diff --git a/llvm/test/Transforms/LICM/hoist-binop.ll b/llvm/test/Transforms/LICM/hoist-binop.ll
index ea7d96c07d5ff..77f91ac095e13 100644
--- a/llvm/test/Transforms/LICM/hoist-binop.ll
+++ b/llvm/test/Transforms/LICM/hoist-binop.ll
@@ -562,12 +562,13 @@ loop:
 define void @fmul_noassoc(float %c1, float %c2) {
 ; CHECK-LABEL: @fmul_noassoc(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[INVARIANT_OP:%.*]] = fmul nsz float [[C1:%.*]], [[C2:%.*]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = fmul reassoc nsz float [[INDEX]], [[C1:%.*]]
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = fmul reassoc nsz float [[INDEX]], [[C1]]
 ; CHECK-NEXT:    call void @use(float [[STEP_ADD]])
-; CHECK-NEXT:    [[INDEX_NEXT]] = fmul nsz float [[STEP_ADD]], [[C2:%.*]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = fmul nsz float [[INDEX]], [[INVARIANT_OP]]
 ; CHECK-NEXT:    br label [[LOOP]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll
index 73b73735da021..fd839732de91c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-multi-register-use.ll
@@ -11,13 +11,14 @@ define void @test(double %i) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = fsub <2 x double> zeroinitializer, [[TMP2]]
 ; CHECK-NEXT:    [[I75:%.*]] = fsub double 0.000000e+00, [[I]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = fsub <2 x double> [[TMP0]], zeroinitializer
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP5]], <4 x i32> <i32 poison, i32 0, i32 2, i32 poison>
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP28:%.*]] = shufflevector <4 x double> [[TMP6]], <4 x double> [[TMP7]], <8 x i32> <i32 poison, i32 poison, i32 0, i32 poison, i32 poison, i32 5, i32 6, i32 poison>
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <8 x double> [[TMP28]], <8 x double> <double 0.000000e+00, double 0.000000e+00, double poison, double poison, double 0.000000e+00, double poison, double poison, double poison>, <8 x i32> <i32 8, i32 9, i32 2, i32 poison, i32 12, i32 5, i32 6, i32 poison>
-; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x double> [[TMP8]], double [[I75]], i32 3
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <8 x double> [[TMP9]], <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 3>
-; CHECK-NEXT:    [[TMP11:%.*]] = fmul <8 x double> zeroinitializer, [[TMP10]]
+; CHECK-NEXT:    [[TMP28:%.*]] = insertelement <2 x double> [[TMP5]], double [[I75]], i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = fmul <2 x double> zeroinitializer, [[TMP28]]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
+; CHECK-NEXT:    [[I87:%.*]] = fmul double 0.000000e+00, [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <8 x double> <double 0.000000e+00, double 0.000000e+00, double poison, double poison, double 0.000000e+00, double poison, double poison, double poison>, <8 x double> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[I87]], i32 5
+; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP12:%.*]] = fadd <8 x double> zeroinitializer, [[TMP11]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = fadd <8 x double> [[TMP12]], zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = fcmp ult <8 x double> [[TMP13]], zeroinitializer
diff --git a/llvm/test/Transforms/SLPVectorizer/buildvector-nodes-dependency.ll b/llvm/test/Transforms/SLPVectorizer/buildvector-nodes-dependency.ll
index 36abe96567bb2..5e9637af7e76a 100644
--- a/llvm/test/Transforms/SLPVectorizer/buildvector-nodes-dependency.ll
+++ b/llvm/test/Transforms/SLPVectorizer/buildvector-nodes-dependency.ll
@@ -8,29 +8,21 @@ define double @test() {
 ; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr null, align 8
 ; CHECK-NEXT:    br label [[COND_TRUE:%.*]]
 ; CHECK:       cond.true:
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> <double 0.000000e+00, double poison>, double [[TMP0]], i32 1
+; CHECK-NEXT:    [[MUL13:%.*]] = fmul double [[TMP0]], 0.000000e+00
+; CHECK-NEXT:    [[OP_RDX4:%.*]] = fmul double 0.000000e+00, [[TMP0]]
+; CHECK-NEXT:    [[ADD17:%.*]] = fadd double [[MUL13]], [[OP_RDX4]]
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x double> zeroinitializer, [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], zeroinitializer
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x double> [[TMP3]], zeroinitializer
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul <2 x double> [[TMP6]], zeroinitializer
-; CHECK-NEXT:    [[TMP8:%.*]] = fsub <2 x double> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP9:%.*]] = fmul <2 x double> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP9]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[TMP11:%.*]] = fadd <2 x double> zeroinitializer, [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = fmul <2 x double> zeroinitializer, [[TMP10]]
-; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[TMP14:%.*]] = fsub <2 x double> [[TMP13]], [[TMP2]]
-; CHECK-NEXT:    [[TMP15:%.*]] = fadd <2 x double> [[TMP13]], [[TMP2]]
-; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <2 x double> [[TMP14]], <2 x double> [[TMP15]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP23:%.*]] = fmul <2 x double> zeroinitializer, [[TMP1]]
+; CHECK-NEXT:    [[SUB:%.*]] = fsub double 0.000000e+00, 0.000000e+00
+; CHECK-NEXT:    [[ADD:%.*]] = fadd double 0.000000e+00, [[SUB]]
+; CHECK-NEXT:    [[SUB2:%.*]] = fsub double [[ADD]], 0.000000e+00
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> poison, double [[SUB2]], i32 0
+; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <2 x double> [[TMP5]], double [[ADD17]], i32 1
 ; CHECK-NEXT:    [[TMP17:%.*]] = fsub <2 x double> [[TMP16]], zeroinitializer
-; CHECK-NEXT:    [[TMP18:%.*]] = fmul <2 x double> [[TMP4]], zeroinitializer
-; CHECK-NEXT:    [[TMP19:%.*]] = fmul <2 x double> zeroinitializer, [[TMP18]]
-; CHECK-NEXT:    [[TMP20:%.*]] = fadd <2 x double> [[TMP19]], [[TMP17]]
+; CHECK-NEXT:    [[TMP20:%.*]] = fadd <2 x double> [[TMP2]], [[TMP17]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = fsub <2 x double> [[TMP20]], zeroinitializer
-; CHECK-NEXT:    [[TMP22:%.*]] = fmul <2 x double> [[TMP5]], zeroinitializer
-; CHECK-NEXT:    [[TMP23:%.*]] = fmul <2 x double> zeroinitializer, [[TMP22]]
 ; CHECK-NEXT:    [[TMP24:%.*]] = fadd <2 x double> [[TMP23]], [[TMP21]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <2 x double> [[TMP24]], i32 0
 ; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <2 x double> [[TMP24]], i32 1
diff --git a/llvm/test/Transforms/SLPVectorizer/reschedule.ll b/llvm/test/Transforms/SLPVectorizer/reschedule.ll
index 825329b5af97e..c148a73408878 100644
--- a/llvm/test/Transforms/SLPVectorizer/reschedule.ll
+++ b/llvm/test/Transforms/SLPVectorizer/reschedule.ll
@@ -8,19 +8,17 @@ declare void @use(double, double)
 define void @test() {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  for.body602:
-; CHECK-NEXT:    [[MUL701:%.*]] = fmul double 0.000000e+00, 0.000000e+00
-; CHECK-NEXT:    [[MUL703:%.*]] = fmul double 0.000000e+00, 0.000000e+00
-; CHECK-NEXT:    [[I4:%.*]] = call double @llvm.fmuladd.f64(double [[MUL701]], double 0.000000e+00, double [[MUL703]])
+; CHECK-NEXT:    [[I4:%.*]] = call double @llvm.fmuladd.f64(double 0.000000e+00, double 0.000000e+00, double 0.000000e+00)
 ; CHECK-NEXT:    store double [[I4]], ptr null, align 8
 ; CHECK-NEXT:    [[I5:%.*]] = load double, ptr null, align 8
 ; CHECK-NEXT:    [[I6:%.*]] = load double, ptr null, align 8
 ; CHECK-NEXT:    [[MUL746:%.*]] = fmul double 0.000000e+00, [[I6]]
 ; CHECK-NEXT:    [[MUL747:%.*]] = fmul double 0.000000e+00, [[I5]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[MUL746]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[MUL701]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double 0.000000e+00, i32 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> zeroinitializer, <2 x double> zeroinitializer, <2 x double> [[TMP1]])
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[MUL747]], i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[MUL703]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 0.000000e+00, i32 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP2]], <2 x double> zeroinitializer, <2 x double> [[TMP4]])
 ; CHECK-NEXT:    [[TMP6:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP5]], <2 x double> zeroinitializer, <2 x double> zeroinitializer)
 ; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> zeroinitializer, <2 x double> [[TMP6]], <2 x double> zeroinitializer)