Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -3257,8 +3257,7 @@ def err_attribute_too_few_arguments : Error<
"%0 attribute takes at least %1 argument%s1">;
def err_attribute_invalid_vector_type : Error<"invalid vector element type %0">;
def err_attribute_invalid_bitint_vector_type : Error<
"'_BitInt' %select{vector|matrix}0 element width must be %select{a power of 2|"
"at least as wide as 'CHAR_BIT'}1">;
"'_BitInt' %select{vector|matrix}0 element width must be a power of 2">;
def err_attribute_invalid_matrix_type : Error<"invalid matrix element type %0">;
def err_attribute_bad_neon_vector_size : Error<
"Neon vector size must be 64 or 128 bits">;
Expand Down
6 changes: 2 additions & 4 deletions clang/lib/AST/ASTContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4577,8 +4577,7 @@ QualType ASTContext::getVectorType(QualType vecType, unsigned NumElts,
assert(vecType->isBuiltinType() ||
(vecType->isBitIntType() &&
// Only support _BitInt elements with byte-sized power of 2 NumBits.
llvm::isPowerOf2_32(vecType->castAs<BitIntType>()->getNumBits()) &&
vecType->castAs<BitIntType>()->getNumBits() >= 8));
llvm::isPowerOf2_32(vecType->castAs<BitIntType>()->getNumBits())));

// Check if we've already instantiated a vector of this type.
llvm::FoldingSetNodeID ID;
Expand Down Expand Up @@ -4650,8 +4649,7 @@ QualType ASTContext::getExtVectorType(QualType vecType,
assert(vecType->isBuiltinType() || vecType->isDependentType() ||
(vecType->isBitIntType() &&
// Only support _BitInt elements with byte-sized power of 2 NumBits.
llvm::isPowerOf2_32(vecType->castAs<BitIntType>()->getNumBits()) &&
vecType->castAs<BitIntType>()->getNumBits() >= 8));
llvm::isPowerOf2_32(vecType->castAs<BitIntType>()->getNumBits())));

// Check if we've already instantiated a vector of this type.
llvm::FoldingSetNodeID ID;
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/Sema/SemaType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2321,9 +2321,9 @@ static bool CheckBitIntElementType(Sema &S, SourceLocation AttrLoc,
bool ForMatrixType = false) {
// Only support _BitInt elements with byte-sized power of 2 NumBits.
unsigned NumBits = BIT->getNumBits();
if (!llvm::isPowerOf2_32(NumBits) || NumBits < 8)
if (!llvm::isPowerOf2_32(NumBits))
return S.Diag(AttrLoc, diag::err_attribute_invalid_bitint_vector_type)
<< ForMatrixType << (NumBits < 8);
<< ForMatrixType;
return false;
}

Expand Down
35 changes: 35 additions & 0 deletions clang/test/CodeGenCXX/ext-int.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,41 @@ void VectorTest(uint16_t4 first, uint16_t4 second) {
// CHECK: %[[ADD:.+]] = add <3 x i16> %[[Shuffle]], %[[Shuffle1]]
}

typedef unsigned _BitInt(4) uint4_t4 __attribute__((ext_vector_type(4)));
void VectorTest(uint4_t4 first, uint4_t4 second) {
// LIN64: define{{.*}} void @_Z10VectorTestDv4_DU4_S0_(i32 %{{.+}}, i32 %{{.+}})
// LIN32: define{{.*}} void @_Z10VectorTestDv4_DU4_S0_(<4 x i4> %{{.+}}, <4 x i4> %{{.+}})
// WIN64: define dso_local void @"?VectorTest@@YAXT?$__vector@U?$_UBitInt@$03@__clang@@$03@__clang@@0@Z"(<4 x i4> %{{.+}}, <4 x i4> %{{.+}})
// WIN32: define dso_local void @"?VectorTest@@YAXT?$__vector@U?$_UBitInt@$03@__clang@@$03@__clang@@0@Z"(<4 x i4> inreg %{{.+}}, <4 x i4> inreg %{{.+}})
first.xzw + second.zwx;
// CHECK: %[[Shuffle:.+]] = shufflevector <4 x i4> %{{.+}}, <4 x i4> poison, <3 x i32> <i32 0, i32 2, i32 3>
// CHECK: %[[Shuffle1:.+]] = shufflevector <4 x i4> %{{.+}}, <4 x i4> poison, <3 x i32> <i32 2, i32 3, i32 0>
// CHECK: %[[ADD:.+]] = add <3 x i4> %[[Shuffle]], %[[Shuffle1]]
}

typedef unsigned _BitInt(2) uint2_t2 __attribute__((ext_vector_type(2)));
uint2_t2 TestBitIntVector2x2Alloca(uint2_t2 v1, uint2_t2 v2) {
Copy link
Contributor Author

@MrSidims MrSidims May 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

name suggests, that I had also TestBitIntVector3/4/8x2Alloca tests, but they were not interesting (for me, as I've been expecting to see alloca i8 -> i16 etc there, but got alloca vector), just 3 element test case brough some expected shuffles, but it's tested elsewhere many times.

// LIN64: define dso_local i16 @_Z25TestBitIntVector2x2AllocaDv2_DU2_S0_(i16 %[[V1Coerce:.+]], i16 %[[V2Coerce:.+]])
// LIN64: %[[RetVal:.+]] = alloca <2 x i2>, align 2
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TBH I was surprised to see alloca 2xi2 and not alloca i8 here

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would have also expected i8 here but I’m not a codegen expert.

// LIN64: %[[V1Addr:.+]] = alloca <2 x i2>, align 2
// LIN64: %[[V2Addr:.+]] = alloca <2 x i2>, align 2
// LIN64: %[[RetValCoerce:.+]] = alloca i16, align 2
// LIN64: call void @llvm.memcpy.p0.p0.i64(ptr align 2 %[[RetValCoerce]], ptr align 2 %[[RetVal]], i64 1, i1 false)
// LIN64: %[[Ret:.+]] = load i16, ptr %[[RetValCoerce]], align 2
// LIN64: ret i16 %[[Ret]]

// LIN32: define dso_local <2 x i2> @_Z25TestBitIntVector2x2AllocaDv2_DU2_S0_(<2 x i2> %{{.+}}, <2 x i2> %{{.+}})
// LIN32: %[[V1Addr:.+]] = alloca <2 x i2>, align 2
// LIN32: %[[V2Addr:.+]] = alloca <2 x i2>, align 2
// LIN32: ret <2 x i2> %[[Ret:.+]]

// WIN: define dso_local <2 x i2> @"?TestBitIntVector2x2Alloca@@YAT?$__vector@U?$_UBitInt@$01@__clang@@$01@__clang@@T12@0@Z"(<2 x i2>{{.*}}, <2 x i2>{{.*}})
// WIN: %[[V1:.+]] = alloca <2 x i2>, align 2
// WIN: %[[V2:.+]] = alloca <2 x i2>, align 2
// WIN: ret <2 x i2> %[[Ret:.+]]
return v1 + v2;
}

// Ensure that these types don't alias the normal int types.
void TBAATest(_BitInt(sizeof(int) * 8) ExtInt,
unsigned _BitInt(sizeof(int) * 8) ExtUInt,
Expand Down
48 changes: 45 additions & 3 deletions clang/test/CodeGenCXX/matrix-vector-bit-int.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ using i32x3 = _BitInt(32) __attribute__((ext_vector_type(3)));
using i32x3x3 = _BitInt(32) __attribute__((matrix_type(3, 3)));
using i512x3 = _BitInt(512) __attribute__((ext_vector_type(3)));
using i512x3x3 = _BitInt(512) __attribute__((matrix_type(3, 3)));
using i4x3 = _BitInt(4) __attribute__((ext_vector_type(3)));
using i4x3x3 = _BitInt(4) __attribute__((matrix_type(3, 3)));

// CHECK-LABEL: define dso_local i32 @_Z2v1Dv3_DB8_(
// CHECK-SAME: i32 [[A_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
Expand Down Expand Up @@ -68,8 +70,34 @@ i512x3 v3(i512x3 a) {
return a + a;
}

// CHECK-LABEL: define dso_local i32 @_Z2v4Dv3_DB4_(
// CHECK-SAME: i32 [[A_COERCE:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <3 x i4>, align 4
// CHECK-NEXT: [[A:%.*]] = alloca <3 x i4>, align 4
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <3 x i4>, align 4
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca i32, align 4
// CHECK-NEXT: store i32 [[A_COERCE]], ptr [[A]], align 4
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i4>, ptr [[A]], align 4
// CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i4> [[LOADVECN]], <4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i4> [[A1]], <3 x i4> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
// CHECK-NEXT: store <4 x i4> [[EXTRACTVEC]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[LOADVECN2:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[EXTRACTVEC3:%.*]] = shufflevector <4 x i4> [[LOADVECN2]], <4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>
// CHECK-NEXT: [[LOADVECN4:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[EXTRACTVEC5:%.*]] = shufflevector <4 x i4> [[LOADVECN4]], <4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>
// CHECK-NEXT: [[ADD:%.*]] = add <3 x i4> [[EXTRACTVEC3]], [[EXTRACTVEC5]]
// CHECK-NEXT: store <3 x i4> [[ADD]], ptr [[RETVAL]], align 4
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[RETVAL_COERCE]], ptr align 4 [[RETVAL]], i64 2, i1 false)
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[RETVAL_COERCE]], align 4
// CHECK-NEXT: ret i32 [[TMP0]]
//
i4x3 v4(i4x3 a) {
return a + a;
}

// CHECK-LABEL: define dso_local noundef <9 x i8> @_Z2m1u11matrix_typeILm3ELm3EDB8_E(
// CHECK-SAME: <9 x i8> noundef [[A:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK-SAME: <9 x i8> noundef [[A:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca [9 x i8], align 1
// CHECK-NEXT: store <9 x i8> [[A]], ptr [[A_ADDR]], align 1
Expand All @@ -83,7 +111,7 @@ i8x3x3 m1(i8x3x3 a) {
}

// CHECK-LABEL: define dso_local noundef <9 x i32> @_Z2m2u11matrix_typeILm3ELm3EDB32_E(
// CHECK-SAME: <9 x i32> noundef [[A:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK-SAME: <9 x i32> noundef [[A:%.*]]) #[[ATTR5:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca [9 x i32], align 4
// CHECK-NEXT: store <9 x i32> [[A]], ptr [[A_ADDR]], align 4
Expand All @@ -97,7 +125,7 @@ i32x3x3 m2(i32x3x3 a) {
}

// CHECK-LABEL: define dso_local noundef <9 x i512> @_Z2m3u11matrix_typeILm3ELm3EDB512_E(
// CHECK-SAME: <9 x i512> noundef [[A:%.*]]) #[[ATTR5:[0-9]+]] {
// CHECK-SAME: <9 x i512> noundef [[A:%.*]]) #[[ATTR6:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca [9 x i512], align 8
// CHECK-NEXT: store <9 x i512> [[A]], ptr [[A_ADDR]], align 8
Expand All @@ -109,3 +137,17 @@ i32x3x3 m2(i32x3x3 a) {
i512x3x3 m3(i512x3x3 a) {
return a + a;
}

// CHECK-LABEL: define dso_local noundef <9 x i4> @_Z2m4u11matrix_typeILm3ELm3EDB4_E(
// CHECK-SAME: <9 x i4> noundef [[A:%.*]]) #[[ATTR7:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca [9 x i4], align 1
// CHECK-NEXT: store <9 x i4> [[A]], ptr [[A_ADDR]], align 1
// CHECK-NEXT: [[TMP0:%.*]] = load <9 x i4>, ptr [[A_ADDR]], align 1
// CHECK-NEXT: [[TMP1:%.*]] = load <9 x i4>, ptr [[A_ADDR]], align 1
// CHECK-NEXT: [[TMP2:%.*]] = add <9 x i4> [[TMP0]], [[TMP1]]
// CHECK-NEXT: ret <9 x i4> [[TMP2]]
//
i4x3x3 m4(i4x3x3 a) {
return a + a;
}
12 changes: 2 additions & 10 deletions clang/test/SemaCXX/ext-int.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,17 +84,9 @@ struct is_same<T,T> {
};

// Reject vector types:
// expected-error@+1{{'_BitInt' vector element width must be at least as wide as 'CHAR_BIT'}}
typedef _BitInt(2) __attribute__((vector_size(16))) VecTy;
// expected-error@+1{{'_BitInt' vector element width must be at least as wide as 'CHAR_BIT'}}
typedef _BitInt(2) __attribute__((ext_vector_type(32))) OtherVecTy;
// expected-error@+1{{'_BitInt' vector element width must be at least as wide as 'CHAR_BIT'}}
typedef _BitInt(4) __attribute__((vector_size(16))) VecTy2;
// expected-error@+1{{'_BitInt' vector element width must be at least as wide as 'CHAR_BIT'}}
typedef _BitInt(4) __attribute__((ext_vector_type(32))) OtherVecTy2;
// expected-error@+1{{'_BitInt' vector element width must be at least as wide as 'CHAR_BIT'}}
// expected-error@+1{{'_BitInt' vector element width must be a power of 2}}
typedef _BitInt(5) __attribute__((vector_size(16))) VecTy3;
// expected-error@+1{{'_BitInt' vector element width must be at least as wide as 'CHAR_BIT'}}
// expected-error@+1{{'_BitInt' vector element width must be a power of 2}}
typedef _BitInt(5) __attribute__((ext_vector_type(32))) OtherVecTy3;
// expected-error@+1{{'_BitInt' vector element width must be a power of 2}}
typedef _BitInt(37) __attribute__((vector_size(16))) VecTy4;
Expand Down
3 changes: 1 addition & 2 deletions clang/test/SemaCXX/matrix-type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,7 @@ void matrix_unsupported_element_type() {
}

void matrix_unsupported_bit_int() {
using m1 = _BitInt(2) __attribute__((matrix_type(4, 4))); // expected-error{{'_BitInt' matrix element width must be at least as wide as 'CHAR_BIT'}}
using m2 = _BitInt(7) __attribute__((matrix_type(4, 4))); // expected-error{{'_BitInt' matrix element width must be at least as wide as 'CHAR_BIT'}}
using m2 = _BitInt(7) __attribute__((matrix_type(4, 4))); // expected-error{{'_BitInt' matrix element width must be a power of 2}}
using m3 = _BitInt(9) __attribute__((matrix_type(4, 4))); // expected-error{{'_BitInt' matrix element width must be a power of 2}}
using m4 = _BitInt(12) __attribute__((matrix_type(4, 4))); // expected-error{{'_BitInt' matrix element width must be a power of 2}}
using m5 = _BitInt(8) __attribute__((matrix_type(4, 4)));
Expand Down
34 changes: 34 additions & 0 deletions llvm/test/CodeGen/X86/vec-2bit-int.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s

define dso_local <2 x i2> @foo(<2 x i2> %v1, <2 x i2> %v2) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq %xmm1, %rax
; CHECK-NEXT: andb $3, %al
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
; CHECK-NEXT: movq %xmm2, %rcx
; CHECK-NEXT: shlb $2, %cl
; CHECK-NEXT: orb %al, %cl
; CHECK-NEXT: andb $15, %cl
; CHECK-NEXT: movb %cl, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movq %xmm0, %rax
; CHECK-NEXT: andb $3, %al
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
; CHECK-NEXT: movq %xmm2, %rcx
; CHECK-NEXT: shlb $2, %cl
; CHECK-NEXT: orb %al, %cl
; CHECK-NEXT: andb $15, %cl
; CHECK-NEXT: movb %cl, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: paddq %xmm1, %xmm0
; CHECK-NEXT: retq
entry:
%v2.addr = alloca <2 x i2>, align 2
%v1.addr = alloca <2 x i2>, align 2
store <2 x i2> %v2, ptr %v2.addr, align 2
store <2 x i2> %v1, ptr %v1.addr, align 2
%0 = load <2 x i2>, ptr %v1.addr, align 2
%1 = load <2 x i2>, ptr %v2.addr, align 2
%add = add <2 x i2> %0, %1
ret <2 x i2> %add
}