Skip to content

Commit 175bab3

Browse files
[X86] Don't rely on global -fp-contract=fast on X86 CodeGen tests (#158026)
IR has the `contract` to indicate that contraction is allowed. Testing shouldn't rely on global flag to perform contraction. This is a prerequisite before making backends rely only on the IR to perform contraction. See more here: https://discourse.llvm.org/t/allowfpopfusion-vs-sdnodeflags-hasallowcontract/80909/5
1 parent eeced0d commit 175bab3

10 files changed

+622
-585
lines changed

llvm/test/CodeGen/X86/avx512-fma.ll

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=KNL
3-
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=SKX
2+
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL
3+
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=SKX
44

55
define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
66
; ALL-LABEL: test_x86_fmadd_ps_z:
77
; ALL: ## %bb.0:
88
; ALL-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
99
; ALL-NEXT: retq
10-
%x = fmul <16 x float> %a0, %a1
11-
%res = fadd <16 x float> %x, %a2
10+
%x = fmul contract <16 x float> %a0, %a1
11+
%res = fadd contract <16 x float> %x, %a2
1212
ret <16 x float> %res
1313
}
1414

@@ -17,8 +17,8 @@ define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16
1717
; ALL: ## %bb.0:
1818
; ALL-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
1919
; ALL-NEXT: retq
20-
%x = fmul <16 x float> %a0, %a1
21-
%res = fsub <16 x float> %x, %a2
20+
%x = fmul contract <16 x float> %a0, %a1
21+
%res = fsub contract <16 x float> %x, %a2
2222
ret <16 x float> %res
2323
}
2424

@@ -27,8 +27,8 @@ define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <1
2727
; ALL: ## %bb.0:
2828
; ALL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
2929
; ALL-NEXT: retq
30-
%x = fmul <16 x float> %a0, %a1
31-
%res = fsub <16 x float> %a2, %x
30+
%x = fmul contract <16 x float> %a0, %a1
31+
%res = fsub contract <16 x float> %a2, %x
3232
ret <16 x float> %res
3333
}
3434

@@ -37,12 +37,12 @@ define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <1
3737
; ALL: ## %bb.0:
3838
; ALL-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
3939
; ALL-NEXT: retq
40-
%x = fmul <16 x float> %a0, %a1
41-
%y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
40+
%x = fmul contract <16 x float> %a0, %a1
41+
%y = fsub contract <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
4242
float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
4343
float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
4444
float -0.000000e+00>, %x
45-
%res = fsub <16 x float> %y, %a2
45+
%res = fsub contract <16 x float> %y, %a2
4646
ret <16 x float> %res
4747
}
4848

@@ -51,8 +51,8 @@ define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8
5151
; ALL: ## %bb.0:
5252
; ALL-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
5353
; ALL-NEXT: retq
54-
%x = fmul <8 x double> %a0, %a1
55-
%res = fadd <8 x double> %x, %a2
54+
%x = fmul contract <8 x double> %a0, %a1
55+
%res = fadd contract <8 x double> %x, %a2
5656
ret <8 x double> %res
5757
}
5858

@@ -61,8 +61,8 @@ define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8
6161
; ALL: ## %bb.0:
6262
; ALL-NEXT: vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
6363
; ALL-NEXT: retq
64-
%x = fmul <8 x double> %a0, %a1
65-
%res = fsub <8 x double> %x, %a2
64+
%x = fmul contract <8 x double> %a0, %a1
65+
%res = fsub contract <8 x double> %x, %a2
6666
ret <8 x double> %res
6767
}
6868

@@ -71,8 +71,8 @@ define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) {
7171
; ALL: ## %bb.0:
7272
; ALL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
7373
; ALL-NEXT: retq
74-
%x = fmul double %a0, %a1
75-
%res = fsub double %x, %a2
74+
%x = fmul contract double %a0, %a1
75+
%res = fsub contract double %x, %a2
7676
ret double %res
7777
}
7878

@@ -82,8 +82,8 @@ define double @test_x86_fmsub_213_m(double %a0, double %a1, ptr %a2_ptr) {
8282
; ALL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem
8383
; ALL-NEXT: retq
8484
%a2 = load double , ptr%a2_ptr
85-
%x = fmul double %a0, %a1
86-
%res = fsub double %x, %a2
85+
%x = fmul contract double %a0, %a1
86+
%res = fsub contract double %x, %a2
8787
ret double %res
8888
}
8989

@@ -93,8 +93,8 @@ define double @test_x86_fmsub_231_m(double %a0, double %a1, ptr %a2_ptr) {
9393
; ALL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1
9494
; ALL-NEXT: retq
9595
%a2 = load double , ptr%a2_ptr
96-
%x = fmul double %a0, %a2
97-
%res = fsub double %x, %a1
96+
%x = fmul contract double %a0, %a2
97+
%res = fsub contract double %x, %a1
9898
ret double %res
9999
}
100100

@@ -103,8 +103,8 @@ define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind {
103103
; ALL: ## %bb.0:
104104
; ALL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1
105105
; ALL-NEXT: retq
106-
%b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
107-
%b2 = fadd <16 x float> %b1, %a2
106+
%b1 = fmul contract <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
107+
%b2 = fadd contract <16 x float> %b1, %a2
108108
ret <16 x float> %b2
109109
}
110110

@@ -113,8 +113,8 @@ define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
113113
; ALL: ## %bb.0:
114114
; ALL-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + mem
115115
; ALL-NEXT: retq
116-
%b1 = fmul <16 x float> %a1, %a2
117-
%b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
116+
%b1 = fmul contract <16 x float> %a1, %a2
117+
%b2 = fadd contract <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
118118
ret <16 x float> %b2
119119
}
120120

@@ -135,8 +135,8 @@ define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, pt
135135
; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 {%k1} = (zmm0 * mem) + zmm1
136136
; SKX-NEXT: retq
137137
%a2 = load <16 x float>,ptr%a2_ptrt,align 1
138-
%x = fmul <16 x float> %a0, %a2
139-
%y = fadd <16 x float> %x, %a1
138+
%x = fmul contract <16 x float> %a0, %a2
139+
%y = fadd contract <16 x float> %x, %a1
140140
%res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0
141141
ret <16 x float> %res
142142
}
@@ -160,8 +160,8 @@ define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, pt
160160
; SKX-NEXT: vmovaps %zmm1, %zmm0
161161
; SKX-NEXT: retq
162162
%a2 = load <16 x float>,ptr%a2_ptrt,align 1
163-
%x = fmul <16 x float> %a0, %a2
164-
%y = fadd <16 x float> %x, %a1
163+
%x = fmul contract <16 x float> %a0, %a2
164+
%y = fadd contract <16 x float> %x, %a1
165165
%res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
166166
ret <16 x float> %res
167167
}
@@ -185,8 +185,8 @@ define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, pt
185185
; SKX-NEXT: vmovaps %zmm1, %zmm0
186186
; SKX-NEXT: retq
187187
%a2 = load <16 x float>,ptr%a2_ptrt,align 1
188-
%x = fmul <16 x float> %a1, %a0
189-
%y = fadd <16 x float> %x, %a2
188+
%x = fmul contract <16 x float> %a1, %a0
189+
%y = fadd contract <16 x float> %x, %a2
190190
%res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
191191
ret <16 x float> %res
192192
}

llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown --fp-contract=fast --enable-no-signed-zeros-fp-math -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,NO-SZ
3-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown --fp-contract=fast -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,HAS-SZ
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown --enable-no-signed-zeros-fp-math -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,NO-SZ
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,HAS-SZ
44

55
; FADD(acc, FMA(a, b, +0.0)) can be combined to FMA(a, b, acc) if the nsz flag set.
66
define dso_local <32 x half> @test1(<32 x half> %acc, <32 x half> %a, <32 x half> %b) {
@@ -18,9 +18,9 @@ define dso_local <32 x half> @test1(<32 x half> %acc, <32 x half> %a, <32 x half
1818
entry:
1919
%0 = bitcast <32 x half> %a to <16 x float>
2020
%1 = bitcast <32 x half> %b to <16 x float>
21-
%2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> zeroinitializer, i16 -1, i32 4)
21+
%2 = tail call contract <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> zeroinitializer, i16 -1, i32 4)
2222
%3 = bitcast <16 x float> %2 to <32 x half>
23-
%add.i = fadd <32 x half> %3, %acc
23+
%add.i = fadd contract <32 x half> %3, %acc
2424
ret <32 x half> %add.i
2525
}
2626

@@ -39,9 +39,9 @@ define dso_local <32 x half> @test2(<32 x half> %acc, <32 x half> %a, <32 x half
3939
entry:
4040
%0 = bitcast <32 x half> %a to <16 x float>
4141
%1 = bitcast <32 x half> %b to <16 x float>
42-
%2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> zeroinitializer, i16 -1, i32 4)
42+
%2 = tail call contract <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> zeroinitializer, i16 -1, i32 4)
4343
%3 = bitcast <16 x float> %2 to <32 x half>
44-
%add.i = fadd <32 x half> %3, %acc
44+
%add.i = fadd contract <32 x half> %3, %acc
4545
ret <32 x half> %add.i
4646
}
4747

@@ -60,9 +60,9 @@ define dso_local <16 x half> @test3(<16 x half> %acc, <16 x half> %a, <16 x half
6060
entry:
6161
%0 = bitcast <16 x half> %a to <8 x float>
6262
%1 = bitcast <16 x half> %b to <8 x float>
63-
%2 = tail call <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> zeroinitializer, i8 -1)
63+
%2 = tail call contract <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> zeroinitializer, i8 -1)
6464
%3 = bitcast <8 x float> %2 to <16 x half>
65-
%add.i = fadd <16 x half> %3, %acc
65+
%add.i = fadd contract <16 x half> %3, %acc
6666
ret <16 x half> %add.i
6767
}
6868

@@ -81,9 +81,9 @@ define dso_local <16 x half> @test4(<16 x half> %acc, <16 x half> %a, <16 x half
8181
entry:
8282
%0 = bitcast <16 x half> %a to <8 x float>
8383
%1 = bitcast <16 x half> %b to <8 x float>
84-
%2 = tail call <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> zeroinitializer, i8 -1)
84+
%2 = tail call contract <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> zeroinitializer, i8 -1)
8585
%3 = bitcast <8 x float> %2 to <16 x half>
86-
%add.i = fadd <16 x half> %3, %acc
86+
%add.i = fadd contract <16 x half> %3, %acc
8787
ret <16 x half> %add.i
8888
}
8989

@@ -102,9 +102,9 @@ define dso_local <8 x half> @test5(<8 x half> %acc, <8 x half> %a, <8 x half> %b
102102
entry:
103103
%0 = bitcast <8 x half> %a to <4 x float>
104104
%1 = bitcast <8 x half> %b to <4 x float>
105-
%2 = tail call <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> zeroinitializer, i8 -1)
105+
%2 = tail call contract <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> zeroinitializer, i8 -1)
106106
%3 = bitcast <4 x float> %2 to <8 x half>
107-
%add.i = fadd <8 x half> %3, %acc
107+
%add.i = fadd contract <8 x half> %3, %acc
108108
ret <8 x half> %add.i
109109
}
110110

@@ -123,9 +123,9 @@ define dso_local <8 x half> @test6(<8 x half> %acc, <8 x half> %a, <8 x half> %b
123123
entry:
124124
%0 = bitcast <8 x half> %a to <4 x float>
125125
%1 = bitcast <8 x half> %b to <4 x float>
126-
%2 = tail call <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> zeroinitializer, i8 -1)
126+
%2 = tail call contract <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> zeroinitializer, i8 -1)
127127
%3 = bitcast <4 x float> %2 to <8 x half>
128-
%add.i = fadd <8 x half> %3, %acc
128+
%add.i = fadd contract <8 x half> %3, %acc
129129
ret <8 x half> %add.i
130130
}
131131

@@ -138,9 +138,9 @@ define dso_local <32 x half> @test13(<32 x half> %acc, <32 x half> %a, <32 x hal
138138
entry:
139139
%0 = bitcast <32 x half> %a to <16 x float>
140140
%1 = bitcast <32 x half> %b to <16 x float>
141-
%2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i16 -1, i32 4)
141+
%2 = tail call contract <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i16 -1, i32 4)
142142
%3 = bitcast <16 x float> %2 to <32 x half>
143-
%add.i = fadd <32 x half> %3, %acc
143+
%add.i = fadd contract <32 x half> %3, %acc
144144
ret <32 x half> %add.i
145145
}
146146

@@ -152,9 +152,9 @@ define dso_local <32 x half> @test14(<32 x half> %acc, <32 x half> %a, <32 x hal
152152
entry:
153153
%0 = bitcast <32 x half> %a to <16 x float>
154154
%1 = bitcast <32 x half> %b to <16 x float>
155-
%2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i16 -1, i32 4)
155+
%2 = tail call contract <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i16 -1, i32 4)
156156
%3 = bitcast <16 x float> %2 to <32 x half>
157-
%add.i = fadd <32 x half> %3, %acc
157+
%add.i = fadd contract <32 x half> %3, %acc
158158
ret <32 x half> %add.i
159159
}
160160

@@ -166,9 +166,9 @@ define dso_local <16 x half> @test15(<16 x half> %acc, <16 x half> %a, <16 x hal
166166
entry:
167167
%0 = bitcast <16 x half> %a to <8 x float>
168168
%1 = bitcast <16 x half> %b to <8 x float>
169-
%2 = tail call <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
169+
%2 = tail call contract <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
170170
%3 = bitcast <8 x float> %2 to <16 x half>
171-
%add.i = fadd <16 x half> %3, %acc
171+
%add.i = fadd contract <16 x half> %3, %acc
172172
ret <16 x half> %add.i
173173
}
174174

@@ -180,9 +180,9 @@ define dso_local <16 x half> @test16(<16 x half> %acc, <16 x half> %a, <16 x hal
180180
entry:
181181
%0 = bitcast <16 x half> %a to <8 x float>
182182
%1 = bitcast <16 x half> %b to <8 x float>
183-
%2 = tail call <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
183+
%2 = tail call contract <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
184184
%3 = bitcast <8 x float> %2 to <16 x half>
185-
%add.i = fadd <16 x half> %3, %acc
185+
%add.i = fadd contract <16 x half> %3, %acc
186186
ret <16 x half> %add.i
187187
}
188188

@@ -194,9 +194,9 @@ define dso_local <8 x half> @test17(<8 x half> %acc, <8 x half> %a, <8 x half> %
194194
entry:
195195
%0 = bitcast <8 x half> %a to <4 x float>
196196
%1 = bitcast <8 x half> %b to <4 x float>
197-
%2 = tail call <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
197+
%2 = tail call contract <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
198198
%3 = bitcast <4 x float> %2 to <8 x half>
199-
%add.i = fadd <8 x half> %3, %acc
199+
%add.i = fadd contract <8 x half> %3, %acc
200200
ret <8 x half> %add.i
201201
}
202202

@@ -208,9 +208,9 @@ define dso_local <8 x half> @test18(<8 x half> %acc, <8 x half> %a, <8 x half> %
208208
entry:
209209
%0 = bitcast <8 x half> %a to <4 x float>
210210
%1 = bitcast <8 x half> %b to <4 x float>
211-
%2 = tail call <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
211+
%2 = tail call contract <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, i8 -1)
212212
%3 = bitcast <4 x float> %2 to <8 x half>
213-
%add.i = fadd <8 x half> %3, %acc
213+
%add.i = fadd contract <8 x half> %3, %acc
214214
ret <8 x half> %add.i
215215
}
216216

0 commit comments

Comments
 (0)