Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions llvm/lib/IR/Verifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6588,14 +6588,17 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
VectorType *AccTy = cast<VectorType>(Call.getArgOperand(0)->getType());
VectorType *VecTy = cast<VectorType>(Call.getArgOperand(1)->getType());

unsigned VecWidth = VecTy->getElementCount().getKnownMinValue();
unsigned AccWidth = AccTy->getElementCount().getKnownMinValue();
ElementCount VecWidth = VecTy->getElementCount();
ElementCount AccWidth = AccTy->getElementCount();

Check((VecWidth % AccWidth) == 0,
Check(VecWidth.hasKnownScalarFactor(AccWidth),
"Invalid vector widths for partial "
"reduction. The width of the input vector "
"must be a positive integer multiple of "
"the width of the accumulator vector.");
"must be a known integer multiple of "
"the width of the accumulator vector.", &Call);

Check(AccTy->getElementType() == VecTy->getElementType(),
"Elements type of accumulator and input type must match", &Call);
break;
}
case Intrinsic::experimental_noalias_scope_decl: {
Expand Down
106 changes: 0 additions & 106 deletions llvm/test/CodeGen/AArch64/complex-deinterleaving-cdot.ll
Original file line number Diff line number Diff line change
Expand Up @@ -913,112 +913,6 @@ middle.block: ; preds = %vector.body
ret i32 %0
}

define i16 @invalid_type(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b) {
; CHECK-SVE2-LABEL: define i16 @invalid_type(
; CHECK-SVE2-SAME: <vscale x 32 x i8> [[A:%.*]], <vscale x 32 x i8> [[B:%.*]]) #[[ATTR0]] {
; CHECK-SVE2-NEXT: [[ENTRY:.*]]:
; CHECK-SVE2-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-SVE2: [[VECTOR_BODY]]:
; CHECK-SVE2-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x i16> [ zeroinitializer, %[[ENTRY]] ], [ [[PARTIAL_REDUCE_SUB:%.*]], %[[VECTOR_BODY]] ]
; CHECK-SVE2-NEXT: [[A_DEINTERLEAVED:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> [[A]])
; CHECK-SVE2-NEXT: [[B_DEINTERLEAVED:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> [[B]])
; CHECK-SVE2-NEXT: [[A_REAL:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[A_DEINTERLEAVED]], 0
; CHECK-SVE2-NEXT: [[A_IMAG:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[A_DEINTERLEAVED]], 1
; CHECK-SVE2-NEXT: [[B_REAL:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[B_DEINTERLEAVED]], 0
; CHECK-SVE2-NEXT: [[B_IMAG:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[B_DEINTERLEAVED]], 1
; CHECK-SVE2-NEXT: [[A_REAL_EXT:%.*]] = sext <vscale x 16 x i8> [[A_REAL]] to <vscale x 16 x i32>
; CHECK-SVE2-NEXT: [[A_IMAG_EXT:%.*]] = sext <vscale x 16 x i8> [[A_IMAG]] to <vscale x 16 x i32>
; CHECK-SVE2-NEXT: [[B_REAL_EXT:%.*]] = sext <vscale x 16 x i8> [[B_REAL]] to <vscale x 16 x i32>
; CHECK-SVE2-NEXT: [[B_IMAG_EXT:%.*]] = sext <vscale x 16 x i8> [[B_IMAG]] to <vscale x 16 x i32>
; CHECK-SVE2-NEXT: [[REAL_MUL:%.*]] = mul <vscale x 16 x i32> [[B_REAL_EXT]], [[A_REAL_EXT]]
; CHECK-SVE2-NEXT: [[REAL_MUL_REDUCED:%.*]] = call <vscale x 8 x i16> @llvm.vector.partial.reduce.add.nxv8i16.nxv16i32(<vscale x 8 x i16> [[VEC_PHI]], <vscale x 16 x i32> [[REAL_MUL]])
; CHECK-SVE2-NEXT: [[IMAG_MUL:%.*]] = mul <vscale x 16 x i32> [[B_IMAG_EXT]], [[A_IMAG_EXT]]
; CHECK-SVE2-NEXT: [[IMAG_MUL_NEG:%.*]] = sub <vscale x 16 x i32> zeroinitializer, [[IMAG_MUL]]
; CHECK-SVE2-NEXT: [[PARTIAL_REDUCE_SUB]] = call <vscale x 8 x i16> @llvm.vector.partial.reduce.add.nxv8i16.nxv16i32(<vscale x 8 x i16> [[REAL_MUL_REDUCED]], <vscale x 16 x i32> [[IMAG_MUL_NEG]])
; CHECK-SVE2-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]]
; CHECK-SVE2: [[MIDDLE_BLOCK]]:
; CHECK-SVE2-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16> [[PARTIAL_REDUCE_SUB]])
; CHECK-SVE2-NEXT: ret i16 [[TMP0]]
;
; CHECK-SVE-LABEL: define i16 @invalid_type(
; CHECK-SVE-SAME: <vscale x 32 x i8> [[A:%.*]], <vscale x 32 x i8> [[B:%.*]]) #[[ATTR0]] {
; CHECK-SVE-NEXT: [[ENTRY:.*]]:
; CHECK-SVE-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-SVE: [[VECTOR_BODY]]:
; CHECK-SVE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x i16> [ zeroinitializer, %[[ENTRY]] ], [ [[PARTIAL_REDUCE_SUB:%.*]], %[[VECTOR_BODY]] ]
; CHECK-SVE-NEXT: [[A_DEINTERLEAVED:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> [[A]])
; CHECK-SVE-NEXT: [[B_DEINTERLEAVED:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> [[B]])
; CHECK-SVE-NEXT: [[A_REAL:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[A_DEINTERLEAVED]], 0
; CHECK-SVE-NEXT: [[A_IMAG:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[A_DEINTERLEAVED]], 1
; CHECK-SVE-NEXT: [[B_REAL:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[B_DEINTERLEAVED]], 0
; CHECK-SVE-NEXT: [[B_IMAG:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[B_DEINTERLEAVED]], 1
; CHECK-SVE-NEXT: [[A_REAL_EXT:%.*]] = sext <vscale x 16 x i8> [[A_REAL]] to <vscale x 16 x i32>
; CHECK-SVE-NEXT: [[A_IMAG_EXT:%.*]] = sext <vscale x 16 x i8> [[A_IMAG]] to <vscale x 16 x i32>
; CHECK-SVE-NEXT: [[B_REAL_EXT:%.*]] = sext <vscale x 16 x i8> [[B_REAL]] to <vscale x 16 x i32>
; CHECK-SVE-NEXT: [[B_IMAG_EXT:%.*]] = sext <vscale x 16 x i8> [[B_IMAG]] to <vscale x 16 x i32>
; CHECK-SVE-NEXT: [[REAL_MUL:%.*]] = mul <vscale x 16 x i32> [[B_REAL_EXT]], [[A_REAL_EXT]]
; CHECK-SVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call <vscale x 8 x i16> @llvm.vector.partial.reduce.add.nxv8i16.nxv16i32(<vscale x 8 x i16> [[VEC_PHI]], <vscale x 16 x i32> [[REAL_MUL]])
; CHECK-SVE-NEXT: [[IMAG_MUL:%.*]] = mul <vscale x 16 x i32> [[B_IMAG_EXT]], [[A_IMAG_EXT]]
; CHECK-SVE-NEXT: [[IMAG_MUL_NEG:%.*]] = sub <vscale x 16 x i32> zeroinitializer, [[IMAG_MUL]]
; CHECK-SVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call <vscale x 8 x i16> @llvm.vector.partial.reduce.add.nxv8i16.nxv16i32(<vscale x 8 x i16> [[REAL_MUL_REDUCED]], <vscale x 16 x i32> [[IMAG_MUL_NEG]])
; CHECK-SVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]]
; CHECK-SVE: [[MIDDLE_BLOCK]]:
; CHECK-SVE-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16> [[PARTIAL_REDUCE_SUB]])
; CHECK-SVE-NEXT: ret i16 [[TMP0]]
;
; CHECK-NOSVE-LABEL: define i16 @invalid_type(
; CHECK-NOSVE-SAME: <vscale x 32 x i8> [[A:%.*]], <vscale x 32 x i8> [[B:%.*]]) {
; CHECK-NOSVE-NEXT: [[ENTRY:.*]]:
; CHECK-NOSVE-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-NOSVE: [[VECTOR_BODY]]:
; CHECK-NOSVE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x i16> [ zeroinitializer, %[[ENTRY]] ], [ [[PARTIAL_REDUCE_SUB:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NOSVE-NEXT: [[A_DEINTERLEAVED:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> [[A]])
; CHECK-NOSVE-NEXT: [[B_DEINTERLEAVED:%.*]] = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> [[B]])
; CHECK-NOSVE-NEXT: [[A_REAL:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[A_DEINTERLEAVED]], 0
; CHECK-NOSVE-NEXT: [[A_IMAG:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[A_DEINTERLEAVED]], 1
; CHECK-NOSVE-NEXT: [[B_REAL:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[B_DEINTERLEAVED]], 0
; CHECK-NOSVE-NEXT: [[B_IMAG:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[B_DEINTERLEAVED]], 1
; CHECK-NOSVE-NEXT: [[A_REAL_EXT:%.*]] = sext <vscale x 16 x i8> [[A_REAL]] to <vscale x 16 x i32>
; CHECK-NOSVE-NEXT: [[A_IMAG_EXT:%.*]] = sext <vscale x 16 x i8> [[A_IMAG]] to <vscale x 16 x i32>
; CHECK-NOSVE-NEXT: [[B_REAL_EXT:%.*]] = sext <vscale x 16 x i8> [[B_REAL]] to <vscale x 16 x i32>
; CHECK-NOSVE-NEXT: [[B_IMAG_EXT:%.*]] = sext <vscale x 16 x i8> [[B_IMAG]] to <vscale x 16 x i32>
; CHECK-NOSVE-NEXT: [[REAL_MUL:%.*]] = mul <vscale x 16 x i32> [[B_REAL_EXT]], [[A_REAL_EXT]]
; CHECK-NOSVE-NEXT: [[REAL_MUL_REDUCED:%.*]] = call <vscale x 8 x i16> @llvm.vector.partial.reduce.add.nxv8i16.nxv16i32(<vscale x 8 x i16> [[VEC_PHI]], <vscale x 16 x i32> [[REAL_MUL]])
; CHECK-NOSVE-NEXT: [[IMAG_MUL:%.*]] = mul <vscale x 16 x i32> [[B_IMAG_EXT]], [[A_IMAG_EXT]]
; CHECK-NOSVE-NEXT: [[IMAG_MUL_NEG:%.*]] = sub <vscale x 16 x i32> zeroinitializer, [[IMAG_MUL]]
; CHECK-NOSVE-NEXT: [[PARTIAL_REDUCE_SUB]] = call <vscale x 8 x i16> @llvm.vector.partial.reduce.add.nxv8i16.nxv16i32(<vscale x 8 x i16> [[REAL_MUL_REDUCED]], <vscale x 16 x i32> [[IMAG_MUL_NEG]])
; CHECK-NOSVE-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]]
; CHECK-NOSVE: [[MIDDLE_BLOCK]]:
; CHECK-NOSVE-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16> [[PARTIAL_REDUCE_SUB]])
; CHECK-NOSVE-NEXT: ret i16 [[TMP0]]
;
entry:
br label %vector.body

vector.body: ; preds = %vector.body, %entry
%vec.phi = phi <vscale x 8 x i16> [ zeroinitializer, %entry ], [ %partial.reduce.sub, %vector.body ]
%a.deinterleaved = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.v32i8(<vscale x 32 x i8> %a)
%b.deinterleaved = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.vector.deinterleave2.v32i8(<vscale x 32 x i8> %b)
%a.real = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %a.deinterleaved, 0
%a.imag = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %a.deinterleaved, 1
%b.real = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %b.deinterleaved, 0
%b.imag = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } %b.deinterleaved, 1
%a.real.ext = sext <vscale x 16 x i8> %a.real to <vscale x 16 x i32>
%a.imag.ext = sext <vscale x 16 x i8> %a.imag to <vscale x 16 x i32>
%b.real.ext = sext <vscale x 16 x i8> %b.real to <vscale x 16 x i32>
%b.imag.ext = sext <vscale x 16 x i8> %b.imag to <vscale x 16 x i32>
%real.mul = mul <vscale x 16 x i32> %b.real.ext, %a.real.ext
%real.mul.reduced = call <vscale x 8 x i16> @llvm.vector.partial.reduce.add.nxv8i16.nxv16i32(<vscale x 8 x i16> %vec.phi, <vscale x 16 x i32> %real.mul)
%imag.mul = mul <vscale x 16 x i32> %b.imag.ext, %a.imag.ext
%imag.mul.neg = sub <vscale x 16 x i32> zeroinitializer, %imag.mul
%partial.reduce.sub = call <vscale x 8 x i16> @llvm.vector.partial.reduce.add.nxv8i16.nxv16i32(<vscale x 8 x i16> %real.mul.reduced, <vscale x 16 x i32> %imag.mul.neg)
br i1 true, label %middle.block, label %vector.body

middle.block: ; preds = %vector.body
%0 = call i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16> %partial.reduce.sub)
ret i16 %0
}

define i32 @not_cdotp_i8_rot0_fixed_length(<32 x i8> %a, <32 x i8> %b) {
; CHECK-SVE2-LABEL: define i32 @not_cdotp_i8_rot0_fixed_length(
; CHECK-SVE2-SAME: <32 x i8> [[A:%.*]], <32 x i8> [[B:%.*]]) #[[ATTR0]] {
Expand Down
19 changes: 19 additions & 0 deletions llvm/test/Verifier/partial-reduce.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s

define void @element_count_mismatch() {
; CHECK: Invalid vector widths for partial reduction. The width of the input vector must be a known integer multiple of the width of the accumulator vector.
call <3 x i32> @llvm.vector.partial.reduce.add(<3 x i32> poison, <8 x i32> poison)

; CHECK: Invalid vector widths for partial reduction. The width of the input vector must be a known integer multiple of the width of the accumulator vector.
call <vscale x 4 x i32> @llvm.vector.partial.reduce.add(<vscale x 4 x i32> poison, <8 x i32> poison)

; CHECK: Invalid vector widths for partial reduction. The width of the input vector must be a known integer multiple of the width of the accumulator vector.
call <4 x i32> @llvm.vector.partial.reduce.add(<4 x i32> poison, <vscale x 8 x i32> poison)
Comment on lines +10 to +11
Copy link
Member

@MacDue MacDue Oct 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason this case can't be allowed? Arguably, there is a known integer multiple of 2 x vscale. I believe this could be lowered (if there ever was any need for it).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree it could be lowered and is desirable to do so, but that's out of the scope of this PR because the practical interpretation of "known" by LLVM is that this is not supported. To support it, I'd also argue removing "known" from the LangRef, which currently states:

The second argument is a vector with a length that is a known integer multiple of the result’s type

where "known" can be interpreted as "known at compile-time". I'm just locking this down for now so that it isn't accidentally used, but we can relax this restriction in a follow-up.

Copy link
Member

@MacDue MacDue Oct 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO, the current lang-ref is ambiguous. The requirement for the intrinsic really is just that the input is a known to be an integer multiple of the accumulator, but not that the value of that multiple is known at compile-time.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we clarify the wording together with tightening the verifier?

ret void
}

define void @element_type_mismatch() {
; CHECK: Elements type of accumulator and input type must match
call <4 x i32> @llvm.vector.partial.reduce.add(<4 x i32> poison, <8 x i8> poison)
ret void
}
Loading