Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions llvm/lib/Transforms/Utils/SCCPSolver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2098,6 +2098,38 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) {
return (void)mergeInValue(ValueState[II], II,
ValueLatticeElement::getRange(Result));
}
if (II->getIntrinsicID() == Intrinsic::experimental_get_vector_length) {
Value *CountArg = II->getArgOperand(0);
Value *VF = II->getArgOperand(1);
bool Scalable = cast<ConstantInt>(II->getArgOperand(2))->isOne();

// Computation happens in the larger type.
unsigned BitWidth = std::max(CountArg->getType()->getScalarSizeInBits(),
VF->getType()->getScalarSizeInBits());

ConstantRange Count = getValueState(CountArg)
.asConstantRange(CountArg->getType(), false)
.zextOrTrunc(BitWidth);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i32 @llvm.experimental.get.vector.length.i64(i64 2**33, i32 4, i1 false) will be folded to zero.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, changed it to use the larger of the two types in 67801c5. I think that matches how SelectionDAGBuilder.cpp expands the intrinsic if the target doesn't natively support it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
.zextOrTrunc(BitWidth);
.zext(BitWidth);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ConstantRange::zeroExtend unfortunately asserts that the new BitWidth needs to be larger, not just equal to in size. So I used zextOrTrunc which returns the input when OldBitWidth == NewBitWidth.

Should we relax the assertion? It seems to be from the early days in 2004. Allowing equal bitwidths would make it more consistent with IRBuilder::CreateZExt

ConstantRange MaxLanes = getValueState(VF)
.asConstantRange(VF->getType(), false)
.zextOrTrunc(BitWidth);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
.zextOrTrunc(BitWidth);
.zext(BitWidth);

if (Scalable)
MaxLanes =
MaxLanes.multiply(getVScaleRange(II->getFunction(), BitWidth));

// The result is always less than both Count and MaxLanes.
ConstantRange Result(
APInt::getZero(BitWidth),
APIntOps::umin(Count.getUpper(), MaxLanes.getUpper()));

// If Count <= MaxLanes, getvectorlength(Count, MaxLanes) = Count
if (Count.icmp(CmpInst::ICMP_ULE, MaxLanes))
Result = Count;

Result = Result.zextOrTrunc(II->getType()->getScalarSizeInBits());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this Trunc causes an issue in cases where:

  • Count is of a type > i32 (e.g. i64)
  • Vscale is such that Vscale * Count > 2^32
  • Count is > 2^32
define i32 @trunc() vscale_range(4, 4) {
  %x = call i32 @llvm.experimental.get.vector.length(i64 4294967296, i32 2147483647, i1 true)
  ret i32 %x
}

after running SCCP (built from 67801c5):

define i32 @trunc() #0 {
  ret i32 0
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think in this case the result is poison from the definition in the LangRef:

If the result value does not fit in the result type, then the result is a poison value.

MaxLanes is 0x1FFFFFFFC and Count is 0x100000000, and because Count <= MaxLanes the result is Count. But 0x100000000 is larger than 32 bits so it returns poison, so I think the transform should be valid

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MaxLanes * VScale should not overflow.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Result = Result.zextOrTrunc(II->getType()->getScalarSizeInBits());
Result = Result.trunc(II->getType()->getScalarSizeInBits());

return (void)mergeInValue(ValueState[II], II,
ValueLatticeElement::getRange(Result));
}

if (ConstantRange::isIntrinsicSupported(II->getIntrinsicID())) {
// Compute result range for intrinsics supported by ConstantRange.
Expand Down
147 changes: 147 additions & 0 deletions llvm/test/Transforms/SCCP/get_vector_length-intrinsic.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt < %s -p sccp -S | FileCheck %s

define i1 @result_le_count() {
; CHECK-LABEL: define i1 @result_le_count() {
; CHECK-NEXT: ret i1 true
;
%x = call i32 @llvm.experimental.get.vector.length(i32 3, i32 4, i1 false)
%res = icmp ule i32 %x, 3
ret i1 %res
}

define i1 @result_le_max_lanes(i32 %count) {
; CHECK-LABEL: define i1 @result_le_max_lanes(
; CHECK-SAME: i32 [[COUNT:%.*]]) {
; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[COUNT]], i32 3, i1 false)
; CHECK-NEXT: ret i1 true
;
%x = call i32 @llvm.experimental.get.vector.length(i32 %count, i32 3, i1 false)
%res = icmp ule i32 %x, 3
ret i1 %res
}

define i1 @result_le_max_lanes_scalable(i32 %count) vscale_range(2, 4) {
; CHECK-LABEL: define i1 @result_le_max_lanes_scalable(
; CHECK-SAME: i32 [[COUNT:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[COUNT]], i32 4, i1 true)
; CHECK-NEXT: ret i1 true
;
%x = call i32 @llvm.experimental.get.vector.length(i32 %count, i32 4, i1 true)
%res = icmp ule i32 %x, 16
ret i1 %res
}

define i32 @count_le_max_lanes() {
; CHECK-LABEL: define i32 @count_le_max_lanes() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret i32 4
;
entry:
br label %loop

loop:
%iv = phi i32 [4, %entry], [%iv.next, %loop]
%x = call i32 @llvm.experimental.get.vector.length(i32 %iv, i32 4, i1 false)
%iv.next = sub i32 %iv, %x
%ec = icmp eq i32 %iv.next, 0
br i1 %ec, label %exit, label %loop

exit:
ret i32 %x
}

; Can't simplify because %iv isn't <= max lanes.
define i32 @count_not_le_max_lanes() {
; CHECK-LABEL: define range(i32 0, 5) i32 @count_not_le_max_lanes() {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 6, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[IV]], i32 4, i1 false)
; CHECK-NEXT: [[IV_NEXT]] = sub i32 [[IV]], [[X]]
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 0
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret i32 [[X]]
;
entry:
br label %loop

loop:
%iv = phi i32 [6, %entry], [%iv.next, %loop]
%x = call i32 @llvm.experimental.get.vector.length(i32 %iv, i32 4, i1 false)
%iv.next = sub i32 %iv, %x
%ec = icmp eq i32 %iv.next, 0
br i1 %ec, label %exit, label %loop

exit:
ret i32 %x
}

define i32 @count_le_max_lanes_scalable_known() vscale_range(4, 8) {
; CHECK-LABEL: define i32 @count_le_max_lanes_scalable_known(
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret i32 16
;
entry:
br label %loop

loop:
%iv = phi i32 [16, %entry], [%iv.next, %loop]
%x = call i32 @llvm.experimental.get.vector.length(i32 %iv, i32 4, i1 true)
%iv.next = sub i32 %iv, %x
%ec = icmp eq i32 %iv.next, 0
br i1 %ec, label %exit, label %loop

exit:
ret i32 %x
}

; Can't simplify because %iv isn't guaranteed <= max lanes.
define i32 @count_le_max_lanes_scalable_unknown() {
; CHECK-LABEL: define range(i32 0, -1) i32 @count_le_max_lanes_scalable_unknown() {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 16, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[IV]], i32 4, i1 true)
; CHECK-NEXT: [[IV_NEXT]] = sub i32 [[IV]], [[X]]
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 0
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret i32 [[X]]
;
entry:
br label %loop

loop:
%iv = phi i32 [16, %entry], [%iv.next, %loop]
%x = call i32 @llvm.experimental.get.vector.length(i32 %iv, i32 4, i1 true)
%iv.next = sub i32 %iv, %x
%ec = icmp eq i32 %iv.next, 0
br i1 %ec, label %exit, label %loop

exit:
ret i32 %x
}

define i1 @result_le_overflow() {
; CHECK-LABEL: define i1 @result_le_overflow() {
; CHECK-NEXT: [[X:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 4294967296, i32 4, i1 false)
; CHECK-NEXT: [[RES:%.*]] = icmp ule i32 [[X]], 3
; CHECK-NEXT: ret i1 [[RES]]
;
%x = call i32 @llvm.experimental.get.vector.length(i64 u0x100000000, i32 4, i1 false)
%res = icmp ule i32 %x, 3
ret i1 %res
}