Skip to content

Commit edeee82

Browse files
authored
Reland [VectorUtils] Trivially vectorize ldexp, [l]lround (llvm#152476)
Changes: The original patch, landed as 1336675, was reverted due to a bug in LoopVectorize resulting in a crash. The bug has now been fixed by 95c32bf ([VPlan] Return invalid cost if any skeleton block has invalid costs), and this reland is identical to the original patch.
1 parent fee6e53 commit edeee82

File tree

5 files changed

+813
-0
lines changed

5 files changed

+813
-0
lines changed

llvm/lib/Analysis/VectorUtils.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
8181
case Intrinsic::exp:
8282
case Intrinsic::exp10:
8383
case Intrinsic::exp2:
84+
case Intrinsic::ldexp:
8485
case Intrinsic::log:
8586
case Intrinsic::log10:
8687
case Intrinsic::log2:
@@ -108,6 +109,8 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
108109
case Intrinsic::canonicalize:
109110
case Intrinsic::fptosi_sat:
110111
case Intrinsic::fptoui_sat:
112+
case Intrinsic::lround:
113+
case Intrinsic::llround:
111114
case Intrinsic::lrint:
112115
case Intrinsic::llrint:
113116
case Intrinsic::ucmp:
@@ -189,6 +192,8 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(
189192
switch (ID) {
190193
case Intrinsic::fptosi_sat:
191194
case Intrinsic::fptoui_sat:
195+
case Intrinsic::lround:
196+
case Intrinsic::llround:
192197
case Intrinsic::lrint:
193198
case Intrinsic::llrint:
194199
case Intrinsic::vp_lrint:
@@ -203,6 +208,7 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(
203208
case Intrinsic::vp_is_fpclass:
204209
return OpdIdx == 0;
205210
case Intrinsic::powi:
211+
case Intrinsic::ldexp:
206212
return OpdIdx == -1 || OpdIdx == 1;
207213
default:
208214
return OpdIdx == -1;

llvm/test/Transforms/LoopVectorize/intrinsic.ll

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,56 @@ for.end: ; preds = %for.body, %entry
324324

325325
declare double @llvm.exp2.f64(double)
326326

327+
define void @ldexp_f32i32(i32 %n, ptr %y, ptr %x, i32 %exp) {
328+
; CHECK-LABEL: @ldexp_f32i32(
329+
; CHECK: llvm.ldexp.v4f32.v4i32
330+
; CHECK: ret void
331+
;
332+
entry:
333+
br label %for.body
334+
335+
for.body: ; preds = %entry, %for.body
336+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
337+
%arrayidx = getelementptr inbounds float, ptr %y, i32 %iv
338+
%0 = load float, ptr %arrayidx, align 4
339+
%call = tail call float @llvm.ldexp.f32.i32(float %0, i32 %exp)
340+
%arrayidx2 = getelementptr inbounds float, ptr %x, i32 %iv
341+
store float %call, ptr %arrayidx2, align 4
342+
%iv.next = add i32 %iv, 1
343+
%exitcond = icmp eq i32 %iv.next, %n
344+
br i1 %exitcond, label %for.end, label %for.body
345+
346+
for.end: ; preds = %for.body, %entry
347+
ret void
348+
}
349+
350+
declare float @llvm.ldexp.f32.i32(float, i32)
351+
352+
define void @ldexp_f64i32(i32 %n, ptr %y, ptr %x, i32 %exp) {
353+
; CHECK-LABEL: @ldexp_f64i32(
354+
; CHECK: llvm.ldexp.v4f64.v4i32
355+
; CHECK: ret void
356+
;
357+
entry:
358+
br label %for.body
359+
360+
for.body: ; preds = %entry, %for.body
361+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
362+
%arrayidx = getelementptr inbounds double, ptr %y, i32 %iv
363+
%0 = load double, ptr %arrayidx, align 8
364+
%call = tail call double @llvm.ldexp.f64.i32(double %0, i32 %exp)
365+
%arrayidx2 = getelementptr inbounds double, ptr %x, i32 %iv
366+
store double %call, ptr %arrayidx2, align 8
367+
%iv.next = add i32 %iv, 1
368+
%exitcond = icmp eq i32 %iv.next, %n
369+
br i1 %exitcond, label %for.end, label %for.body
370+
371+
for.end: ; preds = %for.body, %entry
372+
ret void
373+
}
374+
375+
declare double @llvm.ldexp.f64.i32(double, i32)
376+
327377
define void @log_f32(i32 %n, ptr %y, ptr %x) {
328378
; CHECK-LABEL: @log_f32(
329379
; CHECK: llvm.log.v4f32
@@ -976,6 +1026,157 @@ for.end: ; preds = %for.body, %entry
9761026

9771027
declare double @llvm.roundeven.f64(double)
9781028

1029+
1030+
define void @lround_i32f32(i32 %n, ptr %y, ptr %x) {
1031+
; CHECK-LABEL: @lround_i32f32(
1032+
; CHECK: llvm.lround.v4i32.v4f32
1033+
; CHECK: ret void
1034+
;
1035+
entry:
1036+
br label %for.body
1037+
1038+
for.body: ; preds = %entry, %for.body
1039+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
1040+
%arrayidx = getelementptr inbounds float, ptr %y, i32 %iv
1041+
%0 = load float, ptr %arrayidx, align 4
1042+
%call = tail call i32 @llvm.lround.i32.f32(float %0)
1043+
%arrayidx2 = getelementptr inbounds i32, ptr %x, i32 %iv
1044+
store i32 %call, ptr %arrayidx2, align 4
1045+
%iv.next = add i32 %iv, 1
1046+
%exitcond = icmp eq i32 %iv.next, %n
1047+
br i1 %exitcond, label %for.end, label %for.body
1048+
1049+
for.end: ; preds = %for.body, %entry
1050+
ret void
1051+
}
1052+
1053+
declare i32 @llvm.lround.i32.f32(float)
1054+
1055+
define void @lround_i32f64(i32 %n, ptr %y, ptr %x) {
1056+
; CHECK-LABEL: @lround_i32f64(
1057+
; CHECK: llvm.lround.v4i32.v4f64
1058+
; CHECK: ret void
1059+
;
1060+
entry:
1061+
br label %for.body
1062+
1063+
for.body: ; preds = %entry, %for.body
1064+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
1065+
%arrayidx = getelementptr inbounds double, ptr %y, i32 %iv
1066+
%0 = load double, ptr %arrayidx, align 8
1067+
%call = tail call i32 @llvm.lround.i32.f64(double %0)
1068+
%arrayidx2 = getelementptr inbounds i32, ptr %x, i32 %iv
1069+
store i32 %call, ptr %arrayidx2, align 8
1070+
%iv.next = add i32 %iv, 1
1071+
%exitcond = icmp eq i32 %iv.next, %n
1072+
br i1 %exitcond, label %for.end, label %for.body
1073+
1074+
for.end: ; preds = %for.body, %entry
1075+
ret void
1076+
}
1077+
1078+
declare i32 @llvm.lround.i32.f64(double)
1079+
1080+
define void @lround_i64f32(i32 %n, ptr %y, ptr %x) {
1081+
; CHECK-LABEL: @lround_i64f32(
1082+
; CHECK: llvm.lround.v4i64.v4f32
1083+
; CHECK: ret void
1084+
;
1085+
entry:
1086+
br label %for.body
1087+
1088+
for.body: ; preds = %entry, %for.body
1089+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
1090+
%arrayidx = getelementptr inbounds float, ptr %y, i32 %iv
1091+
%0 = load float, ptr %arrayidx, align 4
1092+
%call = tail call i64 @llvm.lround.i64.f32(float %0)
1093+
%arrayidx2 = getelementptr inbounds i64, ptr %x, i32 %iv
1094+
store i64 %call, ptr %arrayidx2, align 4
1095+
%iv.next = add i32 %iv, 1
1096+
%exitcond = icmp eq i32 %iv.next, %n
1097+
br i1 %exitcond, label %for.end, label %for.body
1098+
1099+
for.end: ; preds = %for.body, %entry
1100+
ret void
1101+
}
1102+
1103+
declare i64 @llvm.lround.i64.f32(float)
1104+
1105+
define void @lround_i64f64(i32 %n, ptr %y, ptr %x) {
1106+
; CHECK-LABEL: @lround_i64f64(
1107+
; CHECK: llvm.lround.v4i64.v4f64
1108+
; CHECK: ret void
1109+
;
1110+
entry:
1111+
br label %for.body
1112+
1113+
for.body: ; preds = %entry, %for.body
1114+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
1115+
%arrayidx = getelementptr inbounds double, ptr %y, i32 %iv
1116+
%0 = load double, ptr %arrayidx, align 8
1117+
%call = tail call i64 @llvm.lround.i64.f64(double %0)
1118+
%arrayidx2 = getelementptr inbounds i64, ptr %x, i32 %iv
1119+
store i64 %call, ptr %arrayidx2, align 8
1120+
%iv.next = add i32 %iv, 1
1121+
%exitcond = icmp eq i32 %iv.next, %n
1122+
br i1 %exitcond, label %for.end, label %for.body
1123+
1124+
for.end: ; preds = %for.body, %entry
1125+
ret void
1126+
}
1127+
1128+
declare i64 @llvm.lround.i64.f64(double)
1129+
1130+
define void @llround_i64f32(i32 %n, ptr %y, ptr %x) {
1131+
; CHECK-LABEL: @llround_i64f32(
1132+
; CHECK: llvm.llround.v4i64.v4f32
1133+
; CHECK: ret void
1134+
;
1135+
entry:
1136+
br label %for.body
1137+
1138+
for.body: ; preds = %entry, %for.body
1139+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
1140+
%arrayidx = getelementptr inbounds float, ptr %y, i32 %iv
1141+
%0 = load float, ptr %arrayidx, align 4
1142+
%call = tail call i64 @llvm.llround.i64.f32(float %0)
1143+
%arrayidx2 = getelementptr inbounds i64, ptr %x, i32 %iv
1144+
store i64 %call, ptr %arrayidx2, align 4
1145+
%iv.next = add i32 %iv, 1
1146+
%exitcond = icmp eq i32 %iv.next, %n
1147+
br i1 %exitcond, label %for.end, label %for.body
1148+
1149+
for.end: ; preds = %for.body, %entry
1150+
ret void
1151+
}
1152+
1153+
declare i64 @llvm.llround.i64.f32(float)
1154+
1155+
define void @llround_i64f64(i32 %n, ptr %y, ptr %x) {
1156+
; CHECK-LABEL: @llround_i64f64(
1157+
; CHECK: llvm.llround.v4i64.v4f64
1158+
; CHECK: ret void
1159+
;
1160+
entry:
1161+
br label %for.body
1162+
1163+
for.body: ; preds = %entry, %for.body
1164+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ]
1165+
%arrayidx = getelementptr inbounds double, ptr %y, i32 %iv
1166+
%0 = load double, ptr %arrayidx, align 8
1167+
%call = tail call i64 @llvm.llround.i64.f64(double %0)
1168+
%arrayidx2 = getelementptr inbounds i64, ptr %x, i32 %iv
1169+
store i64 %call, ptr %arrayidx2, align 8
1170+
%iv.next = add i32 %iv, 1
1171+
%exitcond = icmp eq i32 %iv.next, %n
1172+
br i1 %exitcond, label %for.end, label %for.body
1173+
1174+
for.end: ; preds = %for.body, %entry
1175+
ret void
1176+
}
1177+
1178+
declare i64 @llvm.llround.i64.f64(double)
1179+
9791180
define void @fma_f32(i32 %n, ptr %y, ptr %x, ptr %z, ptr %w) {
9801181
; CHECK-LABEL: @fma_f32(
9811182
; CHECK: llvm.fma.v4f32

0 commit comments

Comments
 (0)