You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[RISCV] Use slideup to lower build_vector when all operand are (extract_element X, 0) (#154450)
The general lowering of build_vector starts with splatting the first
operand before sliding down other operands one-by-one. However, if the
every operands is an extract_element from the first vector element, we
could use the original _vector_ (source of extraction) from the last
build_vec operand as start value before sliding up other operands (in
reverse order) one-by-one. By doing so we can avoid the initial splat
and eliminate the vector to scalar movement later, which is something we
cannot do with vslidedown/vslide1down.
---------
Co-authored-by: Craig Topper <[email protected]>
Co-authored-by: Luke Lau <[email protected]>
Copy file name to clipboardExpand all lines: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+166Lines changed: 166 additions & 0 deletions
Original file line number
Diff line number
Diff line change
@@ -1828,3 +1828,169 @@ define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, d
1828
1828
%v7 = insertelement <8 x double> %v6, double%e7, i647
1829
1829
ret <8 x double> %v7
1830
1830
}
1831
+
1832
+
define <4 x float> @buildvec_vfredusum_slideup(float%start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind {
1833
+
; CHECK-LABEL: buildvec_vfredusum_slideup:
1834
+
; CHECK: # %bb.0:
1835
+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1836
+
; CHECK-NEXT: vfmv.s.f v16, fa0
1837
+
; CHECK-NEXT: vfredusum.vs v8, v8, v16
1838
+
; CHECK-NEXT: vfredusum.vs v9, v10, v16
1839
+
; CHECK-NEXT: vfredusum.vs v10, v12, v16
1840
+
; CHECK-NEXT: vfmv.f.s fa5, v8
1841
+
; CHECK-NEXT: vfmv.f.s fa4, v9
1842
+
; CHECK-NEXT: vfmv.f.s fa3, v10
1843
+
; CHECK-NEXT: vfredusum.vs v8, v14, v16
1844
+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1845
+
; CHECK-NEXT: vfslide1up.vf v9, v8, fa3
1846
+
; CHECK-NEXT: vfslide1up.vf v10, v9, fa4
1847
+
; CHECK-NEXT: vfslide1up.vf v8, v10, fa5
1848
+
; CHECK-NEXT: ret
1849
+
%247 = tailcall reassoc float@llvm.vector.reduce.fadd.v8f32(float%start, <8 x float> %arg1)
1850
+
%248 = insertelement <4 x float> poison, float%247, i640
1851
+
%250 = tailcall reassoc float@llvm.vector.reduce.fadd.v8f32(float%start, <8 x float> %arg2)
1852
+
%251 = insertelement <4 x float> %248, float%250, i641
1853
+
%252 = tailcall reassoc float@llvm.vector.reduce.fadd.v8f32(float%start, <8 x float> %arg3)
1854
+
%253 = insertelement <4 x float> %251, float%252, i642
1855
+
%254 = tailcall reassoc float@llvm.vector.reduce.fadd.v8f32(float%start, <8 x float> %arg4)
1856
+
%255 = insertelement <4 x float> %253, float%254, i643
1857
+
ret <4 x float> %255
1858
+
}
1859
+
1860
+
define <8 x float> @buildvec_vfredusum_slideup_leading_undef(float%start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind {
%252 = tailcall reassoc float@llvm.vector.reduce.fadd.v8f32(float%start, <8 x float> %arg1)
1878
+
%253 = insertelement <8 x float> poison, float%252, i644
1879
+
%254 = tailcall reassoc float@llvm.vector.reduce.fadd.v8f32(float%start, <8 x float> %arg2)
1880
+
%255 = insertelement <8 x float> %253, float%254, i645
1881
+
%256 = tailcall reassoc float@llvm.vector.reduce.fadd.v8f32(float%start, <8 x float> %arg3)
1882
+
%257 = insertelement <8 x float> %255, float%256, i646
1883
+
%258 = tailcall reassoc float@llvm.vector.reduce.fadd.v8f32(float%start, <8 x float> %arg4)
1884
+
%259 = insertelement <8 x float> %257, float%258, i647
1885
+
ret <8 x float> %259
1886
+
}
1887
+
1888
+
define <8 x float> @buildvec_vfredusum_slideup_trailing_undef(float%start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind {
%252 = tailcall reassoc float@llvm.vector.reduce.fadd.v8f32(float%start, <8 x float> %arg1)
1905
+
%253 = insertelement <8 x float> poison, float%252, i640
1906
+
%254 = tailcall reassoc float@llvm.vector.reduce.fadd.v8f32(float%start, <8 x float> %arg2)
1907
+
%255 = insertelement <8 x float> %253, float%254, i641
1908
+
%256 = tailcall reassoc float@llvm.vector.reduce.fadd.v8f32(float%start, <8 x float> %arg3)
1909
+
%257 = insertelement <8 x float> %255, float%256, i642
1910
+
%258 = tailcall reassoc float@llvm.vector.reduce.fadd.v8f32(float%start, <8 x float> %arg4)
1911
+
%259 = insertelement <8 x float> %257, float%258, i643
1912
+
ret <8 x float> %259
1913
+
}
1914
+
1915
+
; Negative test case checking if we generate slideup only when all build_vec operands are extraction from the first vector element.
1916
+
define <8 x float> @buildvec_vfredusum_slideup_not_extract_first(float%start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind {
%252 = tailcall reassoc float@llvm.vector.reduce.fadd.v8f32(float%start, <8 x float> %arg1)
1933
+
%253 = insertelement <8 x float> poison, float%252, i640
1934
+
%255 = insertelement <8 x float> %253, float%start, i641
1935
+
%256 = tailcall reassoc float@llvm.vector.reduce.fadd.v8f32(float%start, <8 x float> %arg3)
1936
+
%257 = insertelement <8 x float> %255, float%256, i642
1937
+
%258 = tailcall reassoc float@llvm.vector.reduce.fadd.v8f32(float%start, <8 x float> %arg4)
1938
+
%259 = insertelement <8 x float> %257, float%258, i643
1939
+
ret <8 x float> %259
1940
+
}
1941
+
1942
+
define <8 x float> @buildvec_vfredusum_slideup_mid_undef(float%start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind {
0 commit comments