@@ -899,3 +899,156 @@ latch:
899
899
for.end:
900
900
ret void
901
901
}
902
+
903
+ ; Test for https://github.com/llvm/llvm-project/issues/159402. For invariant divisors,
904
+ ; selects can be introduced outside the vector loop and their cost should not be
905
+ ; considered for each loop iteration.
906
+ define i32 @udiv_sdiv_with_invariant_divisors (i8 %x , i16 %y , i1 %c ) {
907
+ ; CHECK-LABEL: @udiv_sdiv_with_invariant_divisors(
908
+ ; CHECK-NEXT: entry:
909
+ ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
910
+ ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i32 [[TMP0]], 1
911
+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 12, [[TMP1]]
912
+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
913
+ ; CHECK: vector.ph:
914
+ ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
915
+ ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP2]], 2
916
+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 12, [[TMP3]]
917
+ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 12, [[N_MOD_VF]]
918
+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i16> poison, i16 [[Y:%.*]], i64 0
919
+ ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
920
+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i8> poison, i8 [[X:%.*]], i64 0
921
+ ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i8> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
922
+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i1> poison, i1 [[C:%.*]], i64 0
923
+ ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x i1> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
924
+ ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i16
925
+ ; CHECK-NEXT: [[TMP4:%.*]] = add i16 -12, [[DOTCAST]]
926
+ ; CHECK-NEXT: [[DOTCAST5:%.*]] = trunc i32 [[N_VEC]] to i8
927
+ ; CHECK-NEXT: [[TMP5:%.*]] = add i8 -12, [[DOTCAST5]]
928
+ ; CHECK-NEXT: [[TMP6:%.*]] = select <vscale x 2 x i1> [[BROADCAST_SPLAT4]], <vscale x 2 x i8> splat (i8 1), <vscale x 2 x i8> [[BROADCAST_SPLAT2]]
929
+ ; CHECK-NEXT: [[TMP7:%.*]] = select <vscale x 2 x i1> [[BROADCAST_SPLAT4]], <vscale x 2 x i16> splat (i16 1), <vscale x 2 x i16> [[BROADCAST_SPLAT]]
930
+ ; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i8> @llvm.stepvector.nxv2i8()
931
+ ; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 2 x i8> [[TMP8]], splat (i8 1)
932
+ ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i8> splat (i8 -12), [[TMP9]]
933
+ ; CHECK-NEXT: [[TMP10:%.*]] = trunc i32 [[TMP3]] to i8
934
+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <vscale x 2 x i8> poison, i8 [[TMP10]], i64 0
935
+ ; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <vscale x 2 x i8> [[BROADCAST_SPLATINSERT6]], <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
936
+ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
937
+ ; CHECK: vector.body:
938
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
939
+ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i8> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
940
+ ; CHECK-NEXT: [[TMP11:%.*]] = udiv <vscale x 2 x i8> [[VEC_IND]], [[TMP6]]
941
+ ; CHECK-NEXT: [[TMP12:%.*]] = zext <vscale x 2 x i8> [[TMP11]] to <vscale x 2 x i16>
942
+ ; CHECK-NEXT: [[TMP13:%.*]] = sdiv <vscale x 2 x i16> [[TMP12]], [[TMP7]]
943
+ ; CHECK-NEXT: [[TMP14:%.*]] = sext <vscale x 2 x i16> [[TMP13]] to <vscale x 2 x i32>
944
+ ; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[BROADCAST_SPLAT4]], <vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> [[TMP14]]
945
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP3]]
946
+ ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i8> [[VEC_IND]], [[BROADCAST_SPLAT7]]
947
+ ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
948
+ ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
949
+ ; CHECK: middle.block:
950
+ ; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.vscale.i32()
951
+ ; CHECK-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP16]], 2
952
+ ; CHECK-NEXT: [[TMP18:%.*]] = sub i32 [[TMP17]], 1
953
+ ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <vscale x 2 x i32> [[PREDPHI]], i32 [[TMP18]]
954
+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 12, [[N_VEC]]
955
+ ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
956
+ ; CHECK: scalar.ph:
957
+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ -12, [[ENTRY:%.*]] ]
958
+ ; CHECK-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i8 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ -12, [[ENTRY]] ]
959
+ ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
960
+ ; CHECK: loop.header:
961
+ ; CHECK-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
962
+ ; CHECK-NEXT: [[NARROW_IV:%.*]] = phi i8 [ [[BC_RESUME_VAL8]], [[SCALAR_PH]] ], [ [[IV_NEXT_TRUNC:%.*]], [[LOOP_LATCH]] ]
963
+ ; CHECK-NEXT: br i1 [[C]], label [[LOOP_LATCH]], label [[THEN:%.*]]
964
+ ; CHECK: then:
965
+ ; CHECK-NEXT: [[UD:%.*]] = udiv i8 [[NARROW_IV]], [[X]]
966
+ ; CHECK-NEXT: [[UD_EXT:%.*]] = zext i8 [[UD]] to i16
967
+ ; CHECK-NEXT: [[SD:%.*]] = sdiv i16 [[UD_EXT]], [[Y]]
968
+ ; CHECK-NEXT: [[SD_EXT:%.*]] = sext i16 [[SD]] to i32
969
+ ; CHECK-NEXT: br label [[LOOP_LATCH]]
970
+ ; CHECK: loop.latch:
971
+ ; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[SD_EXT]], [[THEN]] ]
972
+ ; CHECK-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], 1
973
+ ; CHECK-NEXT: [[EC:%.*]] = icmp eq i16 [[IV_NEXT]], 0
974
+ ; CHECK-NEXT: [[IV_NEXT_TRUNC]] = trunc i16 [[IV_NEXT]] to i8
975
+ ; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP13:![0-9]+]]
976
+ ; CHECK: exit:
977
+ ; CHECK-NEXT: [[MERGE_LCSSA:%.*]] = phi i32 [ [[MERGE]], [[LOOP_LATCH]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ]
978
+ ; CHECK-NEXT: ret i32 [[MERGE_LCSSA]]
979
+ ;
980
+ ; FIXED-LABEL: @udiv_sdiv_with_invariant_divisors(
981
+ ; FIXED-NEXT: entry:
982
+ ; FIXED-NEXT: br label [[VECTOR_PH:%.*]]
983
+ ; FIXED: vector.ph:
984
+ ; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[Y:%.*]], i64 0
985
+ ; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
986
+ ; FIXED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[X:%.*]], i64 0
987
+ ; FIXED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
988
+ ; FIXED-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i1> poison, i1 [[C:%.*]], i64 0
989
+ ; FIXED-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT3]], <4 x i1> poison, <4 x i32> zeroinitializer
990
+ ; FIXED-NEXT: [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT4]], <4 x i8> splat (i8 1), <4 x i8> [[BROADCAST_SPLAT2]]
991
+ ; FIXED-NEXT: [[TMP1:%.*]] = select <4 x i1> [[BROADCAST_SPLAT4]], <4 x i16> splat (i16 1), <4 x i16> [[BROADCAST_SPLAT]]
992
+ ; FIXED-NEXT: br label [[VECTOR_BODY:%.*]]
993
+ ; FIXED: vector.body:
994
+ ; FIXED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
995
+ ; FIXED-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 -12, i8 -11, i8 -10, i8 -9>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
996
+ ; FIXED-NEXT: [[TMP2:%.*]] = udiv <4 x i8> [[VEC_IND]], [[TMP0]]
997
+ ; FIXED-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i16>
998
+ ; FIXED-NEXT: [[TMP4:%.*]] = sdiv <4 x i16> [[TMP3]], [[TMP1]]
999
+ ; FIXED-NEXT: [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32>
1000
+ ; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT4]], <4 x i32> zeroinitializer, <4 x i32> [[TMP5]]
1001
+ ; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
1002
+ ; FIXED-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
1003
+ ; FIXED-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
1004
+ ; FIXED-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
1005
+ ; FIXED: middle.block:
1006
+ ; FIXED-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[PREDPHI]], i32 3
1007
+ ; FIXED-NEXT: br label [[EXIT:%.*]]
1008
+ ; FIXED: scalar.ph:
1009
+ ; FIXED-NEXT: br label [[LOOP_HEADER:%.*]]
1010
+ ; FIXED: loop.header:
1011
+ ; FIXED-NEXT: [[IV:%.*]] = phi i16 [ -12, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1012
+ ; FIXED-NEXT: [[NARROW_IV:%.*]] = phi i8 [ -12, [[SCALAR_PH]] ], [ [[IV_NEXT_TRUNC:%.*]], [[LOOP_LATCH]] ]
1013
+ ; FIXED-NEXT: br i1 [[C]], label [[LOOP_LATCH]], label [[THEN:%.*]]
1014
+ ; FIXED: then:
1015
+ ; FIXED-NEXT: [[UD:%.*]] = udiv i8 [[NARROW_IV]], [[X]]
1016
+ ; FIXED-NEXT: [[UD_EXT:%.*]] = zext i8 [[UD]] to i16
1017
+ ; FIXED-NEXT: [[SD:%.*]] = sdiv i16 [[UD_EXT]], [[Y]]
1018
+ ; FIXED-NEXT: [[SD_EXT:%.*]] = sext i16 [[SD]] to i32
1019
+ ; FIXED-NEXT: br label [[LOOP_LATCH]]
1020
+ ; FIXED: loop.latch:
1021
+ ; FIXED-NEXT: [[MERGE:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[SD_EXT]], [[THEN]] ]
1022
+ ; FIXED-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], 1
1023
+ ; FIXED-NEXT: [[EC:%.*]] = icmp eq i16 [[IV_NEXT]], 0
1024
+ ; FIXED-NEXT: [[IV_NEXT_TRUNC]] = trunc i16 [[IV_NEXT]] to i8
1025
+ ; FIXED-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]]
1026
+ ; FIXED: exit:
1027
+ ; FIXED-NEXT: [[MERGE_LCSSA:%.*]] = phi i32 [ [[MERGE]], [[LOOP_LATCH]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
1028
+ ; FIXED-NEXT: ret i32 [[MERGE_LCSSA]]
1029
+ ;
1030
+ entry:
1031
+ br label %loop.header
1032
+
1033
+ loop.header:
1034
+ %iv = phi i16 [ -12 , %entry ], [ %iv.next , %loop.latch ]
1035
+ %narrow.iv = phi i8 [ -12 , %entry ], [ %iv.next.trunc , %loop.latch ]
1036
+ br i1 %c , label %loop.latch , label %then
1037
+
1038
+ then:
1039
+ %ud = udiv i8 %narrow.iv , %x
1040
+ %ud.ext = zext i8 %ud to i16
1041
+ %sd = sdiv i16 %ud.ext , %y
1042
+ %sd.ext = sext i16 %sd to i32
1043
+ br label %loop.latch
1044
+
1045
+ loop.latch:
1046
+ %merge = phi i32 [ 0 , %loop.header ], [ %sd.ext , %then ]
1047
+ %iv.next = add nsw i16 %iv , 1
1048
+ %ec = icmp eq i16 %iv.next , 0
1049
+ %iv.next.trunc = trunc i16 %iv.next to i8
1050
+ br i1 %ec , label %exit , label %loop.header
1051
+
1052
+ exit:
1053
+ ret i32 %merge
1054
+ }
0 commit comments