Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions llvm/lib/Transforms/Utils/SCCPSolver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2098,6 +2098,32 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) {
return (void)mergeInValue(ValueState[II], II,
ValueLatticeElement::getRange(Result));
}
if (II->getIntrinsicID() == Intrinsic::experimental_get_vector_length) {
unsigned BitWidth = CB.getType()->getScalarSizeInBits();
Value *CountArg = II->getArgOperand(0);
Value *VF = II->getArgOperand(1);
bool Scalable = cast<ConstantInt>(II->getArgOperand(2))->isOne();
ConstantRange Count = getValueState(CountArg)
.asConstantRange(CountArg->getType(), false)
.zextOrTrunc(BitWidth);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i32 @llvm.experimental.get.vector.length.i64(i64 2**33, i32 4, i1 false) will be folded to zero.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, changed it to use the larger of the two types in 67801c5. I think that matches how SelectionDAGBuilder.cpp expands the intrinsic if the target doesn't natively support it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
.zextOrTrunc(BitWidth);
.zext(BitWidth);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ConstantRange::zeroExtend unfortunately asserts that the new BitWidth needs to be larger, not just equal to in size. So I used zextOrTrunc which returns the input when OldBitWidth == NewBitWidth.

Should we relax the assertion? It seems to be from the early days in 2004. Allowing equal bitwidths would make it more consistent with IRBuilder::CreateZExt

ConstantRange MaxLanes =
getValueState(VF).asConstantRange(BitWidth, false);
if (Scalable)
MaxLanes =
MaxLanes.multiply(getVScaleRange(II->getFunction(), BitWidth));

// The result is always less than both Count and MaxLanes.
ConstantRange Result(
APInt::getZero(BitWidth),
APIntOps::umin(Count.getUpper(), MaxLanes.getUpper()));

// If Count <= MaxLanes, getvectorlength(Count, MaxLanes) = Count
if (Count.icmp(CmpInst::ICMP_ULE, MaxLanes))
Result = Count;

return (void)mergeInValue(ValueState[II], II,
ValueLatticeElement::getRange(Result));
}

if (ConstantRange::isIntrinsicSupported(II->getIntrinsicID())) {
// Compute result range for intrinsics supported by ConstantRange.
Expand Down
136 changes: 136 additions & 0 deletions llvm/test/Transforms/SCCP/get_vector_length-intrinsic.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt < %s -p sccp -S | FileCheck %s

define i1 @result_le_count() {
; CHECK-LABEL: define i1 @result_le_count() {
; CHECK-NEXT: ret i1 true
;
%x = call i32 @llvm.experimental.get.vector.length(i32 3, i32 4, i1 false)
%res = icmp ule i32 %x, 3
ret i1 %res
}

define i1 @result_le_max_lanes(i32 %count) {
; CHECK-LABEL: define i1 @result_le_max_lanes(
; CHECK-SAME: i32 [[COUNT:%.*]]) {
; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[COUNT]], i32 3, i1 false)
; CHECK-NEXT: ret i1 true
;
%x = call i32 @llvm.experimental.get.vector.length(i32 %count, i32 3, i1 false)
%res = icmp ule i32 %x, 3
ret i1 %res
}

define i1 @result_le_max_lanes_scalable(i32 %count) vscale_range(2, 4) {
; CHECK-LABEL: define i1 @result_le_max_lanes_scalable(
; CHECK-SAME: i32 [[COUNT:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[COUNT]], i32 4, i1 true)
; CHECK-NEXT: ret i1 true
;
%x = call i32 @llvm.experimental.get.vector.length(i32 %count, i32 4, i1 true)
%res = icmp ule i32 %x, 16
ret i1 %res
}

define i32 @count_le_max_lanes() {
; CHECK-LABEL: define i32 @count_le_max_lanes() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret i32 4
;
entry:
br label %loop

loop:
%iv = phi i32 [4, %entry], [%iv.next, %loop]
%x = call i32 @llvm.experimental.get.vector.length(i32 %iv, i32 4, i1 false)
%iv.next = sub i32 %iv, %x
%ec = icmp eq i32 %iv.next, 0
br i1 %ec, label %exit, label %loop

exit:
ret i32 %x
}

; Can't simplify because %iv isn't <= max lanes.
define i32 @count_not_le_max_lanes() {
; CHECK-LABEL: define range(i32 0, 5) i32 @count_not_le_max_lanes() {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 6, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[IV]], i32 4, i1 false)
; CHECK-NEXT: [[IV_NEXT]] = sub i32 [[IV]], [[X]]
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 0
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret i32 [[X]]
;
entry:
br label %loop

loop:
%iv = phi i32 [6, %entry], [%iv.next, %loop]
%x = call i32 @llvm.experimental.get.vector.length(i32 %iv, i32 4, i1 false)
%iv.next = sub i32 %iv, %x
%ec = icmp eq i32 %iv.next, 0
br i1 %ec, label %exit, label %loop

exit:
ret i32 %x
}

define i32 @count_le_max_lanes_scalable_known() vscale_range(4, 8) {
; CHECK-LABEL: define i32 @count_le_max_lanes_scalable_known(
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret i32 16
;
entry:
br label %loop

loop:
%iv = phi i32 [16, %entry], [%iv.next, %loop]
%x = call i32 @llvm.experimental.get.vector.length(i32 %iv, i32 4, i1 true)
%iv.next = sub i32 %iv, %x
%ec = icmp eq i32 %iv.next, 0
br i1 %ec, label %exit, label %loop

exit:
ret i32 %x
}

; Can't simplify because %iv isn't guaranteed <= max lanes.
define i32 @count_le_max_lanes_scalable_unknown() {
; CHECK-LABEL: define range(i32 0, -1) i32 @count_le_max_lanes_scalable_unknown() {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 16, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[IV]], i32 4, i1 true)
; CHECK-NEXT: [[IV_NEXT]] = sub i32 [[IV]], [[X]]
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 0
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret i32 [[X]]
;
entry:
br label %loop

loop:
%iv = phi i32 [16, %entry], [%iv.next, %loop]
%x = call i32 @llvm.experimental.get.vector.length(i32 %iv, i32 4, i1 true)
%iv.next = sub i32 %iv, %x
%ec = icmp eq i32 %iv.next, 0
br i1 %ec, label %exit, label %loop

exit:
ret i32 %x
}