@@ -1881,6 +1881,144 @@ exit: ; preds = %for.exit, %entry
18811881 ret void
18821882}
18831883
1884+ define i64 @not_dotp_ext_outside_plan (ptr %a , i16 %b , i64 %n ) #0 {
1885+ ; CHECK-INTERLEAVE1-LABEL: define i64 @not_dotp_ext_outside_plan(
1886+ ; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], i16 [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
1887+ ; CHECK-INTERLEAVE1-NEXT: entry:
1888+ ; CHECK-INTERLEAVE1-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 0
1889+ ; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[FOR_PH:%.*]]
1890+ ; CHECK-INTERLEAVE1: for.ph:
1891+ ; CHECK-INTERLEAVE1-NEXT: [[EXT_B:%.*]] = zext i16 [[B]] to i64
1892+ ; CHECK-INTERLEAVE1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8
1893+ ; CHECK-INTERLEAVE1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1894+ ; CHECK-INTERLEAVE1: vector.ph:
1895+ ; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
1896+ ; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
1897+ ; CHECK-INTERLEAVE1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[EXT_B]], i64 0
1898+ ; CHECK-INTERLEAVE1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
1899+ ; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]]
1900+ ; CHECK-INTERLEAVE1: vector.body:
1901+ ; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1902+ ; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <8 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
1903+ ; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1904+ ; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i16, ptr [[A]], i64 [[TMP0]]
1905+ ; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i32 0
1906+ ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP2]], align 2
1907+ ; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[WIDE_LOAD]] to <8 x i64>
1908+ ; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = mul nuw nsw <8 x i64> [[TMP3]], [[BROADCAST_SPLAT]]
1909+ ; CHECK-INTERLEAVE1-NEXT: [[TMP5]] = add <8 x i64> [[TMP4]], [[VEC_PHI]]
1910+ ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
1911+ ; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1912+ ; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
1913+ ; CHECK-INTERLEAVE1: middle.block:
1914+ ; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]])
1915+ ; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
1916+ ; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
1917+ ;
1918+ ; CHECK-INTERLEAVED-LABEL: define i64 @not_dotp_ext_outside_plan(
1919+ ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], i16 [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
1920+ ; CHECK-INTERLEAVED-NEXT: entry:
1921+ ; CHECK-INTERLEAVED-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 0
1922+ ; CHECK-INTERLEAVED-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[FOR_PH:%.*]]
1923+ ; CHECK-INTERLEAVED: for.ph:
1924+ ; CHECK-INTERLEAVED-NEXT: [[EXT_B:%.*]] = zext i16 [[B]] to i64
1925+ ; CHECK-INTERLEAVED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
1926+ ; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1927+ ; CHECK-INTERLEAVED: vector.ph:
1928+ ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
1929+ ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
1930+ ; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[EXT_B]], i64 0
1931+ ; CHECK-INTERLEAVED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
1932+ ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
1933+ ; CHECK-INTERLEAVED: vector.body:
1934+ ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1935+ ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <8 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
1936+ ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
1937+ ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1938+ ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i16, ptr [[A]], i64 [[TMP0]]
1939+ ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i32 0
1940+ ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i32 8
1941+ ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP2]], align 2
1942+ ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i16>, ptr [[TMP3]], align 2
1943+ ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <8 x i16> [[WIDE_LOAD]] to <8 x i64>
1944+ ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = zext <8 x i16> [[WIDE_LOAD2]] to <8 x i64>
1945+ ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = mul nuw nsw <8 x i64> [[TMP4]], [[BROADCAST_SPLAT]]
1946+ ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = mul nuw nsw <8 x i64> [[TMP5]], [[BROADCAST_SPLAT]]
1947+ ; CHECK-INTERLEAVED-NEXT: [[TMP8]] = add <8 x i64> [[TMP6]], [[VEC_PHI]]
1948+ ; CHECK-INTERLEAVED-NEXT: [[TMP9]] = add <8 x i64> [[TMP7]], [[VEC_PHI1]]
1949+ ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1950+ ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1951+ ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
1952+ ; CHECK-INTERLEAVED: middle.block:
1953+ ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <8 x i64> [[TMP9]], [[TMP8]]
1954+ ; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[BIN_RDX]])
1955+ ; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
1956+ ; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
1957+ ;
1958+ ; CHECK-MAXBW-LABEL: define i64 @not_dotp_ext_outside_plan(
1959+ ; CHECK-MAXBW-SAME: ptr [[A:%.*]], i16 [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
1960+ ; CHECK-MAXBW-NEXT: entry:
1961+ ; CHECK-MAXBW-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 0
1962+ ; CHECK-MAXBW-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[FOR_PH:%.*]]
1963+ ; CHECK-MAXBW: for.ph:
1964+ ; CHECK-MAXBW-NEXT: [[EXT_B:%.*]] = zext i16 [[B]] to i64
1965+ ; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1966+ ; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
1967+ ; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
1968+ ; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1969+ ; CHECK-MAXBW: vector.ph:
1970+ ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
1971+ ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
1972+ ; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
1973+ ; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
1974+ ; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
1975+ ; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
1976+ ; CHECK-MAXBW-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[EXT_B]], i64 0
1977+ ; CHECK-MAXBW-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
1978+ ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]]
1979+ ; CHECK-MAXBW: vector.body:
1980+ ; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1981+ ; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
1982+ ; CHECK-MAXBW-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0
1983+ ; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i16, ptr [[A]], i64 [[TMP6]]
1984+ ; CHECK-MAXBW-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP7]], i32 0
1985+ ; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i16>, ptr [[TMP8]], align 2
1986+ ; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = zext <vscale x 4 x i16> [[WIDE_LOAD]] to <vscale x 4 x i64>
1987+ ; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = mul nuw nsw <vscale x 4 x i64> [[TMP9]], [[BROADCAST_SPLAT]]
1988+ ; CHECK-MAXBW-NEXT: [[TMP11]] = add <vscale x 4 x i64> [[TMP10]], [[VEC_PHI]]
1989+ ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
1990+ ; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1991+ ; CHECK-MAXBW-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
1992+ ; CHECK-MAXBW: middle.block:
1993+ ; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> [[TMP11]])
1994+ ; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
1995+ ; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
1996+ ;
1997+ entry:
1998+ %cmp = icmp eq i64 %n , 0
1999+ br i1 %cmp , label %exit , label %for.ph
2000+
2001+ for.ph: ; preds = %entry
2002+ %ext.b = zext i16 %b to i64
2003+ br label %for.body
2004+
2005+ for.body: ; preds = %for.body.lr.ph, %for.body
2006+ %iv = phi i64 [ 0 , %for.ph ], [ %iv.next , %for.body ]
2007+ %accum = phi i64 [ 0 , %for.ph ], [ %add , %for.body ]
2008+ %gep.a = getelementptr inbounds nuw i16 , ptr %a , i64 %iv
2009+ %load.a = load i16 , ptr %gep.a , align 2
2010+ %ext.a = zext i16 %load.a to i64
2011+ %mul = mul nuw nsw i64 %ext.a , %ext.b
2012+ %add = add i64 %mul , %accum
2013+ %iv.next = add nuw nsw i64 %iv , 1
2014+ %cmp.1 = icmp eq i64 %iv.next , %n
2015+ br i1 %cmp.1 , label %exit , label %for.body
2016+
2017+ exit: ; preds = %for.cond.cleanup.loopexit, %entry
2018+ %result = phi i64 [ 0 , %entry ], [ %add , %for.body ]
2019+ ret i64 %result
2020+ }
2021+
18842022!7 = distinct !{!7 , !8 , !9 , !10 }
18852023!8 = !{!"llvm.loop.mustprogress" }
18862024!9 = !{!"llvm.loop.vectorize.predicate.enable" , i1 true }
0 commit comments