11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown --fp-contract=fast -- enable-no-signed-zeros-fp-math -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,NO-SZ
3- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown --fp-contract=fast - mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,HAS-SZ
2+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown --enable-no-signed-zeros-fp-math -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,NO-SZ
3+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,HAS-SZ
44
55; FADD(acc, FMA(a, b, +0.0)) can be combined to FMA(a, b, acc) if the nsz flag set.
66define dso_local <32 x half > @test1 (<32 x half > %acc , <32 x half > %a , <32 x half > %b ) {
@@ -18,9 +18,9 @@ define dso_local <32 x half> @test1(<32 x half> %acc, <32 x half> %a, <32 x half
1818entry:
1919 %0 = bitcast <32 x half > %a to <16 x float >
2020 %1 = bitcast <32 x half > %b to <16 x float >
21- %2 = tail call <16 x float > @llvm.x86.avx512fp16.mask.vfcmadd.cph.512 (<16 x float > %0 , <16 x float > %1 , <16 x float > zeroinitializer , i16 -1 , i32 4 )
21+ %2 = tail call contract <16 x float > @llvm.x86.avx512fp16.mask.vfcmadd.cph.512 (<16 x float > %0 , <16 x float > %1 , <16 x float > zeroinitializer , i16 -1 , i32 4 )
2222 %3 = bitcast <16 x float > %2 to <32 x half >
23- %add.i = fadd <32 x half > %3 , %acc
23+ %add.i = fadd contract <32 x half > %3 , %acc
2424 ret <32 x half > %add.i
2525}
2626
@@ -39,9 +39,9 @@ define dso_local <32 x half> @test2(<32 x half> %acc, <32 x half> %a, <32 x half
3939entry:
4040 %0 = bitcast <32 x half > %a to <16 x float >
4141 %1 = bitcast <32 x half > %b to <16 x float >
42- %2 = tail call <16 x float > @llvm.x86.avx512fp16.mask.vfmadd.cph.512 (<16 x float > %0 , <16 x float > %1 , <16 x float > zeroinitializer , i16 -1 , i32 4 )
42+ %2 = tail call contract <16 x float > @llvm.x86.avx512fp16.mask.vfmadd.cph.512 (<16 x float > %0 , <16 x float > %1 , <16 x float > zeroinitializer , i16 -1 , i32 4 )
4343 %3 = bitcast <16 x float > %2 to <32 x half >
44- %add.i = fadd <32 x half > %3 , %acc
44+ %add.i = fadd contract <32 x half > %3 , %acc
4545 ret <32 x half > %add.i
4646}
4747
@@ -60,9 +60,9 @@ define dso_local <16 x half> @test3(<16 x half> %acc, <16 x half> %a, <16 x half
6060entry:
6161 %0 = bitcast <16 x half > %a to <8 x float >
6262 %1 = bitcast <16 x half > %b to <8 x float >
63- %2 = tail call <8 x float > @llvm.x86.avx512fp16.mask.vfcmadd.cph.256 (<8 x float > %0 , <8 x float > %1 , <8 x float > zeroinitializer , i8 -1 )
63+ %2 = tail call contract <8 x float > @llvm.x86.avx512fp16.mask.vfcmadd.cph.256 (<8 x float > %0 , <8 x float > %1 , <8 x float > zeroinitializer , i8 -1 )
6464 %3 = bitcast <8 x float > %2 to <16 x half >
65- %add.i = fadd <16 x half > %3 , %acc
65+ %add.i = fadd contract <16 x half > %3 , %acc
6666 ret <16 x half > %add.i
6767}
6868
@@ -81,9 +81,9 @@ define dso_local <16 x half> @test4(<16 x half> %acc, <16 x half> %a, <16 x half
8181entry:
8282 %0 = bitcast <16 x half > %a to <8 x float >
8383 %1 = bitcast <16 x half > %b to <8 x float >
84- %2 = tail call <8 x float > @llvm.x86.avx512fp16.mask.vfmadd.cph.256 (<8 x float > %0 , <8 x float > %1 , <8 x float > zeroinitializer , i8 -1 )
84+ %2 = tail call contract <8 x float > @llvm.x86.avx512fp16.mask.vfmadd.cph.256 (<8 x float > %0 , <8 x float > %1 , <8 x float > zeroinitializer , i8 -1 )
8585 %3 = bitcast <8 x float > %2 to <16 x half >
86- %add.i = fadd <16 x half > %3 , %acc
86+ %add.i = fadd contract <16 x half > %3 , %acc
8787 ret <16 x half > %add.i
8888}
8989
@@ -102,9 +102,9 @@ define dso_local <8 x half> @test5(<8 x half> %acc, <8 x half> %a, <8 x half> %b
102102entry:
103103 %0 = bitcast <8 x half > %a to <4 x float >
104104 %1 = bitcast <8 x half > %b to <4 x float >
105- %2 = tail call <4 x float > @llvm.x86.avx512fp16.mask.vfcmadd.cph.128 (<4 x float > %0 , <4 x float > %1 , <4 x float > zeroinitializer , i8 -1 )
105+ %2 = tail call contract <4 x float > @llvm.x86.avx512fp16.mask.vfcmadd.cph.128 (<4 x float > %0 , <4 x float > %1 , <4 x float > zeroinitializer , i8 -1 )
106106 %3 = bitcast <4 x float > %2 to <8 x half >
107- %add.i = fadd <8 x half > %3 , %acc
107+ %add.i = fadd contract <8 x half > %3 , %acc
108108 ret <8 x half > %add.i
109109}
110110
@@ -123,9 +123,9 @@ define dso_local <8 x half> @test6(<8 x half> %acc, <8 x half> %a, <8 x half> %b
123123entry:
124124 %0 = bitcast <8 x half > %a to <4 x float >
125125 %1 = bitcast <8 x half > %b to <4 x float >
126- %2 = tail call <4 x float > @llvm.x86.avx512fp16.mask.vfmadd.cph.128 (<4 x float > %0 , <4 x float > %1 , <4 x float > zeroinitializer , i8 -1 )
126+ %2 = tail call contract <4 x float > @llvm.x86.avx512fp16.mask.vfmadd.cph.128 (<4 x float > %0 , <4 x float > %1 , <4 x float > zeroinitializer , i8 -1 )
127127 %3 = bitcast <4 x float > %2 to <8 x half >
128- %add.i = fadd <8 x half > %3 , %acc
128+ %add.i = fadd contract <8 x half > %3 , %acc
129129 ret <8 x half > %add.i
130130}
131131
@@ -138,9 +138,9 @@ define dso_local <32 x half> @test13(<32 x half> %acc, <32 x half> %a, <32 x hal
138138entry:
139139 %0 = bitcast <32 x half > %a to <16 x float >
140140 %1 = bitcast <32 x half > %b to <16 x float >
141- %2 = tail call <16 x float > @llvm.x86.avx512fp16.mask.vfcmadd.cph.512 (<16 x float > %0 , <16 x float > %1 , <16 x float > <float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 >, i16 -1 , i32 4 )
141+ %2 = tail call contract <16 x float > @llvm.x86.avx512fp16.mask.vfcmadd.cph.512 (<16 x float > %0 , <16 x float > %1 , <16 x float > <float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 >, i16 -1 , i32 4 )
142142 %3 = bitcast <16 x float > %2 to <32 x half >
143- %add.i = fadd <32 x half > %3 , %acc
143+ %add.i = fadd contract <32 x half > %3 , %acc
144144 ret <32 x half > %add.i
145145}
146146
@@ -152,9 +152,9 @@ define dso_local <32 x half> @test14(<32 x half> %acc, <32 x half> %a, <32 x hal
152152entry:
153153 %0 = bitcast <32 x half > %a to <16 x float >
154154 %1 = bitcast <32 x half > %b to <16 x float >
155- %2 = tail call <16 x float > @llvm.x86.avx512fp16.mask.vfmadd.cph.512 (<16 x float > %0 , <16 x float > %1 , <16 x float > <float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 >, i16 -1 , i32 4 )
155+ %2 = tail call contract <16 x float > @llvm.x86.avx512fp16.mask.vfmadd.cph.512 (<16 x float > %0 , <16 x float > %1 , <16 x float > <float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 >, i16 -1 , i32 4 )
156156 %3 = bitcast <16 x float > %2 to <32 x half >
157- %add.i = fadd <32 x half > %3 , %acc
157+ %add.i = fadd contract <32 x half > %3 , %acc
158158 ret <32 x half > %add.i
159159}
160160
@@ -166,9 +166,9 @@ define dso_local <16 x half> @test15(<16 x half> %acc, <16 x half> %a, <16 x hal
166166entry:
167167 %0 = bitcast <16 x half > %a to <8 x float >
168168 %1 = bitcast <16 x half > %b to <8 x float >
169- %2 = tail call <8 x float > @llvm.x86.avx512fp16.mask.vfcmadd.cph.256 (<8 x float > %0 , <8 x float > %1 , <8 x float > <float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 >, i8 -1 )
169+ %2 = tail call contract <8 x float > @llvm.x86.avx512fp16.mask.vfcmadd.cph.256 (<8 x float > %0 , <8 x float > %1 , <8 x float > <float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 >, i8 -1 )
170170 %3 = bitcast <8 x float > %2 to <16 x half >
171- %add.i = fadd <16 x half > %3 , %acc
171+ %add.i = fadd contract <16 x half > %3 , %acc
172172 ret <16 x half > %add.i
173173}
174174
@@ -180,9 +180,9 @@ define dso_local <16 x half> @test16(<16 x half> %acc, <16 x half> %a, <16 x hal
180180entry:
181181 %0 = bitcast <16 x half > %a to <8 x float >
182182 %1 = bitcast <16 x half > %b to <8 x float >
183- %2 = tail call <8 x float > @llvm.x86.avx512fp16.mask.vfmadd.cph.256 (<8 x float > %0 , <8 x float > %1 , <8 x float > <float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 >, i8 -1 )
183+ %2 = tail call contract <8 x float > @llvm.x86.avx512fp16.mask.vfmadd.cph.256 (<8 x float > %0 , <8 x float > %1 , <8 x float > <float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 >, i8 -1 )
184184 %3 = bitcast <8 x float > %2 to <16 x half >
185- %add.i = fadd <16 x half > %3 , %acc
185+ %add.i = fadd contract <16 x half > %3 , %acc
186186 ret <16 x half > %add.i
187187}
188188
@@ -194,9 +194,9 @@ define dso_local <8 x half> @test17(<8 x half> %acc, <8 x half> %a, <8 x half> %
194194entry:
195195 %0 = bitcast <8 x half > %a to <4 x float >
196196 %1 = bitcast <8 x half > %b to <4 x float >
197- %2 = tail call <4 x float > @llvm.x86.avx512fp16.mask.vfcmadd.cph.128 (<4 x float > %0 , <4 x float > %1 , <4 x float > <float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 >, i8 -1 )
197+ %2 = tail call contract <4 x float > @llvm.x86.avx512fp16.mask.vfcmadd.cph.128 (<4 x float > %0 , <4 x float > %1 , <4 x float > <float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 >, i8 -1 )
198198 %3 = bitcast <4 x float > %2 to <8 x half >
199- %add.i = fadd <8 x half > %3 , %acc
199+ %add.i = fadd contract <8 x half > %3 , %acc
200200 ret <8 x half > %add.i
201201}
202202
@@ -208,9 +208,9 @@ define dso_local <8 x half> @test18(<8 x half> %acc, <8 x half> %a, <8 x half> %
208208entry:
209209 %0 = bitcast <8 x half > %a to <4 x float >
210210 %1 = bitcast <8 x half > %b to <4 x float >
211- %2 = tail call <4 x float > @llvm.x86.avx512fp16.mask.vfmadd.cph.128 (<4 x float > %0 , <4 x float > %1 , <4 x float > <float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 >, i8 -1 )
211+ %2 = tail call contract <4 x float > @llvm.x86.avx512fp16.mask.vfmadd.cph.128 (<4 x float > %0 , <4 x float > %1 , <4 x float > <float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 , float 0xB790000000000000 >, i8 -1 )
212212 %3 = bitcast <4 x float > %2 to <8 x half >
213- %add.i = fadd <8 x half > %3 , %acc
213+ %add.i = fadd contract <8 x half > %3 , %acc
214214 ret <8 x half > %add.i
215215}
216216
0 commit comments