Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions clang/lib/AST/ASTContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2093,10 +2093,15 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
case Type::ExtVector:
case Type::Vector: {
const auto *VT = cast<VectorType>(T);
TypeInfo EltInfo = getTypeInfo(VT->getElementType());
Width = VT->isPackedVectorBoolType(*this)
? VT->getNumElements()
: EltInfo.Width * VT->getNumElements();
QualType Elt = VT->getElementType();
uint64_t EltWidth = [&]() -> uint64_t {
Copy link
Collaborator

@shafik shafik Oct 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using an IILF here does not seem warranted, you could just set EltWidth to 1 and then eliminate the first if.

Copy link
Contributor Author

@steffenlarsen steffenlarsen Oct 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We definitely could, but I fear it would make the intention less clear. I assume you mean something along the lines of:

uint64_t EltWidth = 1;
if (Elt.getTypePtrOrNull() && Elt.getTypePtr()->isBitIntType())
  EltWidth = Elt.getTypePtr()->castAs<BitIntType>()->getNumBits();
else if (!VT->isPackedVectorBoolType(*this))
  EltWidth = getTypeInfo(Elt).Width;

Since the default case would now be the last branch, determined by it not being the other cases and not a packed bool vector, I don't think it is very readable.

Alternatively, we could make the default case the initialization:

uint64_t EltWidth = getTypeInfo(Elt).Width;
if (Elt.getTypePtrOrNull() && Elt.getTypePtr()->isBitIntType())
  EltWidth = Elt.getTypePtr()->castAs<BitIntType>()->getNumBits();
else if (VT->isPackedVectorBoolType(*this))
  EltWidth = 1;

Then we run the risk of the compiler not being able to find out that there is no side-effects from getTypeInfo() and having that initialization being a wasted operation. It should be smart enough though, so I reckon that should be fine.

I personally prefer the current style (using IILF), but I don't feel strongly one way or the other. If you prefer one of the above suggestions I will make the change.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While also fixing bit-casts, this lambda function was moved to its own function on the AST context.

if (VT->isPackedVectorBoolType(*this))
return 1;
if (Elt.getTypePtrOrNull() && Elt.getTypePtr()->isBitIntType())
return Elt.getTypePtr()->castAs<BitIntType>()->getNumBits();
return getTypeInfo(Elt).Width;
}();
Width = EltWidth * VT->getNumElements();
// Enforce at least byte size and alignment.
Width = std::max<unsigned>(8, Width);
Align = std::max<unsigned>(8, Width);
Expand Down
28 changes: 15 additions & 13 deletions clang/test/CodeGenCXX/ext-int.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,7 @@ void VectorTest(uint16_t4 first, uint16_t4 second) {

typedef unsigned _BitInt(4) uint4_t4 __attribute__((ext_vector_type(4)));
void VectorTest(uint4_t4 first, uint4_t4 second) {
// LIN64: define{{.*}} void @_Z10VectorTestDv4_DU4_S0_(i32 %{{.+}}, i32 %{{.+}})
// LIN64: define{{.*}} void @_Z10VectorTestDv4_DU4_S0_(i16 %{{.+}}, i16 %{{.+}})
// LIN32: define{{.*}} void @_Z10VectorTestDv4_DU4_S0_(<4 x i4> %{{.+}}, <4 x i4> %{{.+}})
// WIN64: define dso_local void @"?VectorTest@@YAXT?$__vector@U?$_UBitInt@$03@__clang@@$03@__clang@@0@Z"(<4 x i4> %{{.+}}, <4 x i4> %{{.+}})
// WIN32: define dso_local void @"?VectorTest@@YAXT?$__vector@U?$_UBitInt@$03@__clang@@$03@__clang@@0@Z"(<4 x i4> inreg %{{.+}}, <4 x i4> inreg %{{.+}})
Expand All @@ -585,23 +585,25 @@ void VectorTest(uint4_t4 first, uint4_t4 second) {

typedef unsigned _BitInt(2) uint2_t2 __attribute__((ext_vector_type(2)));
uint2_t2 TestBitIntVector2x2Alloca(uint2_t2 v1, uint2_t2 v2) {
// LIN64: define dso_local i16 @_Z25TestBitIntVector2x2AllocaDv2_DU2_S0_(i16 %[[V1Coerce:.+]], i16 %[[V2Coerce:.+]])
// LIN64: %[[RetVal:.+]] = alloca <2 x i2>, align 2
// LIN64: %[[V1Addr:.+]] = alloca <2 x i2>, align 2
// LIN64: %[[V2Addr:.+]] = alloca <2 x i2>, align 2
// LIN64: %[[RetValCoerce:.+]] = alloca i16, align 2
// LIN64: call void @llvm.memcpy.p0.p0.i64(ptr align 2 %[[RetValCoerce]], ptr align 2 %[[RetVal]], i64 1, i1 false)
// LIN64: %[[Ret:.+]] = load i16, ptr %[[RetValCoerce]], align 2
// LIN64: ret i16 %[[Ret]]
// LIN64: define dso_local i8 @_Z25TestBitIntVector2x2AllocaDv2_DU2_S0_(i8 %[[V1Coerce:.+]], i8 %[[V2Coerce:.+]])
// LIN64: %[[RetVal:.+]] = alloca <2 x i2>, align 1
// LIN64: %[[V1Addr:.+]] = alloca <2 x i2>, align 1
// LIN64: %[[V2Addr:.+]] = alloca <2 x i2>, align 1
// LIN64: %[[V1Val:.+]] = load <2 x i2>, ptr %[[V1Addr]], align 1
// LIN64: %[[V2Val:.+]] = load <2 x i2>, ptr %[[V2Addr]], align 1
// LIN64: %[[AddVal:.+]] = add <2 x i2> %0, %1
// LIN64: store <2 x i2> %[[AddVal]], ptr %[[RetVal]], align 1
// LIN64: %[[Ret:.+]] = load i8, ptr %[[RetVal]], align 1
// LIN64: ret i8 %[[Ret]]

// LIN32: define dso_local <2 x i2> @_Z25TestBitIntVector2x2AllocaDv2_DU2_S0_(<2 x i2> %{{.+}}, <2 x i2> %{{.+}})
// LIN32: %[[V1Addr:.+]] = alloca <2 x i2>, align 2
// LIN32: %[[V2Addr:.+]] = alloca <2 x i2>, align 2
// LIN32: %[[V1Addr:.+]] = alloca <2 x i2>, align 1
// LIN32: %[[V2Addr:.+]] = alloca <2 x i2>, align 1
// LIN32: ret <2 x i2> %[[Ret:.+]]

// WIN: define dso_local <2 x i2> @"?TestBitIntVector2x2Alloca@@YAT?$__vector@U?$_UBitInt@$01@__clang@@$01@__clang@@T12@0@Z"(<2 x i2>{{.*}}, <2 x i2>{{.*}})
// WIN: %[[V1:.+]] = alloca <2 x i2>, align 2
// WIN: %[[V2:.+]] = alloca <2 x i2>, align 2
// WIN: %[[V1:.+]] = alloca <2 x i2>, align 1
// WIN: %[[V2:.+]] = alloca <2 x i2>, align 1
// WIN: ret <2 x i2> %[[Ret:.+]]
return v1 + v2;
}
Expand Down
28 changes: 13 additions & 15 deletions clang/test/CodeGenCXX/matrix-vector-bit-int.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,27 +70,25 @@ i512x3 v3(i512x3 a) {
return a + a;
}

// CHECK-LABEL: define dso_local i32 @_Z2v4Dv3_DB4_(
// CHECK-SAME: i32 [[A_COERCE:%.*]]) #[[ATTR0]] {
// CHECK-LABEL: define dso_local i16 @_Z2v4Dv3_DB4_(
// CHECK-SAME: i16 [[A_COERCE:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <3 x i4>, align 4
// CHECK-NEXT: [[A:%.*]] = alloca <3 x i4>, align 4
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <3 x i4>, align 4
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca i32, align 4
// CHECK-NEXT: store i32 [[A_COERCE]], ptr [[A]], align 4
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i4>, ptr [[A]], align 4
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <3 x i4>, align 2
// CHECK-NEXT: [[A:%.*]] = alloca <3 x i4>, align 2
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <3 x i4>, align 2
// CHECK-NEXT: store i16 [[A_COERCE]], ptr [[A]], align 2
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i4>, ptr [[A]], align 2
// CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i4> [[LOADVECN]], <4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i4> [[A1]], <3 x i4> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
// CHECK-NEXT: store <4 x i4> [[EXTRACTVEC]], ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[LOADVECN2:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 4
// CHECK-NEXT: store <4 x i4> [[EXTRACTVEC]], ptr [[A_ADDR]], align 2
// CHECK-NEXT: [[LOADVECN2:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 2
// CHECK-NEXT: [[EXTRACTVEC3:%.*]] = shufflevector <4 x i4> [[LOADVECN2]], <4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>
// CHECK-NEXT: [[LOADVECN4:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 4
// CHECK-NEXT: [[LOADVECN4:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 2
// CHECK-NEXT: [[EXTRACTVEC5:%.*]] = shufflevector <4 x i4> [[LOADVECN4]], <4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>
// CHECK-NEXT: [[ADD:%.*]] = add <3 x i4> [[EXTRACTVEC3]], [[EXTRACTVEC5]]
// CHECK-NEXT: store <3 x i4> [[ADD]], ptr [[RETVAL]], align 4
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[RETVAL_COERCE]], ptr align 4 [[RETVAL]], i64 2, i1 false)
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[RETVAL_COERCE]], align 4
// CHECK-NEXT: ret i32 [[TMP0]]
// CHECK-NEXT: store <3 x i4> [[ADD]], ptr [[RETVAL]], align 2
// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[RETVAL]], align 2
// CHECK-NEXT: ret i16 [[TMP0]]
//
i4x3 v4(i4x3 a) {
return a + a;
Expand Down
30 changes: 30 additions & 0 deletions clang/test/SemaCXX/ext-int.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,3 +293,33 @@ void FromPaper1() {
void FromPaper2(_BitInt(8) a1, _BitInt(24) a2) {
static_assert(is_same<decltype(a1 * (_BitInt(32))a2), _BitInt(32)>::value, "");
}

// Check sub-byte integer vector size and alignment, expecting packing.
template <int Bits, int N>
using packed_vec_t = _BitInt(Bits) __attribute__((ext_vector_type(N)));
void SubByteVecPacking() {
static_assert(sizeof(packed_vec_t<2, 2>) == 1);
static_assert(sizeof(packed_vec_t<2, 3>) == 1);
static_assert(sizeof(packed_vec_t<2, 4>) == 1);
static_assert(sizeof(packed_vec_t<2, 8>) == 2);
static_assert(sizeof(packed_vec_t<2, 16>) == 4);
static_assert(sizeof(packed_vec_t<2, 32>) == 8);
static_assert(sizeof(packed_vec_t<4, 2>) == 1);
static_assert(sizeof(packed_vec_t<4, 4>) == 2);
static_assert(sizeof(packed_vec_t<4, 8>) == 4);
static_assert(sizeof(packed_vec_t<4, 16>) == 8);
static_assert(sizeof(packed_vec_t<4, 32>) == 16);

static_assert(alignof(packed_vec_t<2, 2>) == 1);
static_assert(alignof(packed_vec_t<2, 3>) == 1);
static_assert(alignof(packed_vec_t<2, 4>) == 1);
static_assert(alignof(packed_vec_t<2, 8>) == 2);
static_assert(alignof(packed_vec_t<2, 16>) == 4);
static_assert(alignof(packed_vec_t<2, 32>) == 8);
static_assert(alignof(packed_vec_t<4, 2>) == 1);
static_assert(alignof(packed_vec_t<4, 3>) == 2);
static_assert(alignof(packed_vec_t<4, 4>) == 2);
static_assert(alignof(packed_vec_t<4, 8>) == 4);
static_assert(alignof(packed_vec_t<4, 16>) == 8);
static_assert(alignof(packed_vec_t<4, 32>) == 16);
}