Skip to content

Commit 075b297

Browse files
fhahnLukacma
authored andcommitted
[LV] Check for TruncInsts in canTruncateToMinimalBitwidth.
TruncInst must truncate at most to their destination. Return false if MinBWs contains a destination size > the trunc result type size. Fixes llvm#162688.
1 parent 3e0766c commit 075b297

File tree

2 files changed

+55
-0
lines changed

2 files changed

+55
-0
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1011,6 +1011,10 @@ class LoopVectorizationCostModel {
10111011
/// \returns True if instruction \p I can be truncated to a smaller bitwidth
10121012
/// for vectorization factor \p VF.
10131013
bool canTruncateToMinimalBitwidth(Instruction *I, ElementCount VF) const {
1014+
// Truncs must truncate at most to their destination type.
1015+
if (isa_and_nonnull<TruncInst>(I) && MinBWs.contains(I) &&
1016+
I->getType()->getScalarSizeInBits() < MinBWs.lookup(I))
1017+
return false;
10141018
return VF.isVector() && MinBWs.contains(I) &&
10151019
!isProfitableToScalarize(I, VF) &&
10161020
!isScalarAfterVectorization(I, VF);

llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,57 @@ exit:
246246
ret void
247247
}
248248

249+
; Test for https://github.com/llvm/llvm-project/issues/162688.
250+
define void @test_minbws_for_trunc(i32 %n, ptr noalias %p1, ptr noalias %p2) {
251+
; CHECK-LABEL: define void @test_minbws_for_trunc(
252+
; CHECK-SAME: i32 [[N:%.*]], ptr noalias [[P1:%.*]], ptr noalias [[P2:%.*]]) #[[ATTR0]] {
253+
; CHECK-NEXT: [[ENTRY:.*]]:
254+
; CHECK-NEXT: br label %[[LOOP:.*]]
255+
; CHECK: [[LOOP]]:
256+
; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
257+
; CHECK-NEXT: [[IV_EXT:%.*]] = sext i16 [[IV]] to i64
258+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr [[P1]], i64 [[IV_EXT]]
259+
; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[GEP1]], align 4
260+
; CHECK-NEXT: [[V1_TRUNC:%.*]] = trunc i32 [[V1]] to i16
261+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr [1 x [1 x i16]], ptr [[P2]], i64 [[IV_EXT]]
262+
; CHECK-NEXT: store i16 [[V1_TRUNC]], ptr [[GEP2]], align 2
263+
; CHECK-NEXT: [[V1_TRUNC_I8:%.*]] = trunc i32 [[V1]] to i8
264+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[P2]], i64 [[IV_EXT]]
265+
; CHECK-NEXT: store i8 [[V1_TRUNC_I8]], ptr [[GEP3]], align 1
266+
; CHECK-NEXT: [[GEP4:%.*]] = getelementptr [1 x i64], ptr [[P2]], i64 [[IV_EXT]]
267+
; CHECK-NEXT: store i64 0, ptr [[GEP4]], align 8
268+
; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 4
269+
; CHECK-NEXT: [[IV_NEXT_EXT:%.*]] = sext i16 [[IV_NEXT]] to i32
270+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[IV_NEXT_EXT]], 1024
271+
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
272+
; CHECK: [[EXIT]]:
273+
; CHECK-NEXT: ret void
274+
;
275+
entry:
276+
br label %loop
277+
278+
loop:
279+
%iv = phi i16 [ 0, %entry ], [ %iv.next, %loop ]
280+
%iv.ext = sext i16 %iv to i64
281+
%gep1 = getelementptr i32, ptr %p1, i64 %iv.ext
282+
%v1 = load i32, ptr %gep1, align 4
283+
%v1.trunc = trunc i32 %v1 to i16
284+
%gep2 = getelementptr [1 x [1 x i16]], ptr %p2, i64 %iv.ext
285+
store i16 %v1.trunc, ptr %gep2, align 2
286+
%v1.trunc.i8 = trunc i32 %v1 to i8
287+
%gep3 = getelementptr i8, ptr %p2, i64 %iv.ext
288+
store i8 %v1.trunc.i8, ptr %gep3, align 1
289+
%gep4 = getelementptr [1 x i64], ptr %p2, i64 %iv.ext
290+
store i64 0, ptr %gep4, align 8
291+
%iv.next = add i16 %iv, 4
292+
%iv.next.ext = sext i16 %iv.next to i32
293+
%cmp = icmp ne i32 %iv.next.ext, 1024
294+
br i1 %cmp, label %loop, label %exit
295+
296+
exit:
297+
ret void
298+
}
299+
249300
attributes #0 = { "target-features"="+64bit,+v,+zvl256b" }
250301
attributes #1 = { "target-features"="+64bit,+v" }
251302

0 commit comments

Comments
 (0)