@@ -1766,7 +1766,6 @@ bb.1:
1766
1766
ret void
1767
1767
}
1768
1768
1769
- ; FIXME: This is a miscompile.
1770
1769
; The FMF on the reduction should match the incoming insts.
1771
1770
1772
1771
define float @fadd_v4f32_fmf (float * %p ) {
@@ -1776,7 +1775,7 @@ define float @fadd_v4f32_fmf(float* %p) {
1776
1775
; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
1777
1776
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
1778
1777
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
1779
- ; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]])
1778
+ ; CHECK-NEXT: [[TMP3:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]])
1780
1779
; CHECK-NEXT: ret float [[TMP3]]
1781
1780
;
1782
1781
; STORE-LABEL: @fadd_v4f32_fmf(
@@ -1785,7 +1784,7 @@ define float @fadd_v4f32_fmf(float* %p) {
1785
1784
; STORE-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
1786
1785
; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
1787
1786
; STORE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
1788
- ; STORE-NEXT: [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]])
1787
+ ; STORE-NEXT: [[TMP3:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]])
1789
1788
; STORE-NEXT: ret float [[TMP3]]
1790
1789
;
1791
1790
%p1 = getelementptr inbounds float , float * %p , i64 1
@@ -1801,14 +1800,18 @@ define float @fadd_v4f32_fmf(float* %p) {
1801
1800
ret float %add3
1802
1801
}
1803
1802
1803
+ ; The minimal FMF for fadd reduction are "reassoc nsz".
1804
+ ; Only the common FMF of all operations in the reduction propagate to the result.
1805
+ ; In this example, "contract nnan arcp" are dropped, but "ninf" transfers with the required flags.
1806
+
1804
1807
define float @fadd_v4f32_fmf_intersect (float * %p ) {
1805
1808
; CHECK-LABEL: @fadd_v4f32_fmf_intersect(
1806
1809
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
1807
1810
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
1808
1811
; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
1809
1812
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
1810
1813
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
1811
- ; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]])
1814
+ ; CHECK-NEXT: [[TMP3:%.*]] = call reassoc ninf nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]])
1812
1815
; CHECK-NEXT: ret float [[TMP3]]
1813
1816
;
1814
1817
; STORE-LABEL: @fadd_v4f32_fmf_intersect(
@@ -1817,7 +1820,7 @@ define float @fadd_v4f32_fmf_intersect(float* %p) {
1817
1820
; STORE-NEXT: [[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
1818
1821
; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
1819
1822
; STORE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
1820
- ; STORE-NEXT: [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]])
1823
+ ; STORE-NEXT: [[TMP3:%.*]] = call reassoc ninf nsz float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]])
1821
1824
; STORE-NEXT: ret float [[TMP3]]
1822
1825
;
1823
1826
%p1 = getelementptr inbounds float , float * %p , i64 1
0 commit comments