Skip to content

Commit 545a9df

Browse files
committed
[clang] Fix size and alignment of packed sub-byte integer vectors
When using sub-byte integer types in vectors, the data is packed into the first N bits, where N is the bit-size of the sub-byte integer type multiplied by the number of vector elements. However, currently clang reports the size as if each element is one byte wide, based on the element type being considered a single byte wide in separation. This commit fixes the reported size and alignment of the sub-byte vector types, so they correspond to the bit-packed layout they employ. Signed-off-by: Larsen, Steffen <[email protected]>
1 parent a7016c4 commit 545a9df

File tree

4 files changed

+67
-32
lines changed

4 files changed

+67
-32
lines changed

clang/lib/AST/ASTContext.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2093,10 +2093,15 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
20932093
case Type::ExtVector:
20942094
case Type::Vector: {
20952095
const auto *VT = cast<VectorType>(T);
2096-
TypeInfo EltInfo = getTypeInfo(VT->getElementType());
2097-
Width = VT->isPackedVectorBoolType(*this)
2098-
? VT->getNumElements()
2099-
: EltInfo.Width * VT->getNumElements();
2096+
QualType Elt = VT->getElementType();
2097+
uint64_t EltWidth = [&]() -> uint64_t {
2098+
if (VT->isPackedVectorBoolType(*this))
2099+
return 1;
2100+
if (Elt.getTypePtrOrNull() && Elt.getTypePtr()->isBitIntType())
2101+
return Elt.getTypePtr()->castAs<BitIntType>()->getNumBits();
2102+
return getTypeInfo(Elt).Width;
2103+
}();
2104+
Width = EltWidth * VT->getNumElements();
21002105
// Enforce at least byte size and alignment.
21012106
Width = std::max<unsigned>(8, Width);
21022107
Align = std::max<unsigned>(8, Width);

clang/test/CodeGenCXX/ext-int.cpp

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -573,7 +573,7 @@ void VectorTest(uint16_t4 first, uint16_t4 second) {
573573

574574
typedef unsigned _BitInt(4) uint4_t4 __attribute__((ext_vector_type(4)));
575575
void VectorTest(uint4_t4 first, uint4_t4 second) {
576-
// LIN64: define{{.*}} void @_Z10VectorTestDv4_DU4_S0_(i32 %{{.+}}, i32 %{{.+}})
576+
// LIN64: define{{.*}} void @_Z10VectorTestDv4_DU4_S0_(i16 %{{.+}}, i16 %{{.+}})
577577
// LIN32: define{{.*}} void @_Z10VectorTestDv4_DU4_S0_(<4 x i4> %{{.+}}, <4 x i4> %{{.+}})
578578
// WIN64: define dso_local void @"?VectorTest@@YAXT?$__vector@U?$_UBitInt@$03@__clang@@$03@__clang@@0@Z"(<4 x i4> %{{.+}}, <4 x i4> %{{.+}})
579579
// WIN32: define dso_local void @"?VectorTest@@YAXT?$__vector@U?$_UBitInt@$03@__clang@@$03@__clang@@0@Z"(<4 x i4> inreg %{{.+}}, <4 x i4> inreg %{{.+}})
@@ -585,23 +585,25 @@ void VectorTest(uint4_t4 first, uint4_t4 second) {
585585

586586
typedef unsigned _BitInt(2) uint2_t2 __attribute__((ext_vector_type(2)));
587587
uint2_t2 TestBitIntVector2x2Alloca(uint2_t2 v1, uint2_t2 v2) {
588-
// LIN64: define dso_local i16 @_Z25TestBitIntVector2x2AllocaDv2_DU2_S0_(i16 %[[V1Coerce:.+]], i16 %[[V2Coerce:.+]])
589-
// LIN64: %[[RetVal:.+]] = alloca <2 x i2>, align 2
590-
// LIN64: %[[V1Addr:.+]] = alloca <2 x i2>, align 2
591-
// LIN64: %[[V2Addr:.+]] = alloca <2 x i2>, align 2
592-
// LIN64: %[[RetValCoerce:.+]] = alloca i16, align 2
593-
// LIN64: call void @llvm.memcpy.p0.p0.i64(ptr align 2 %[[RetValCoerce]], ptr align 2 %[[RetVal]], i64 1, i1 false)
594-
// LIN64: %[[Ret:.+]] = load i16, ptr %[[RetValCoerce]], align 2
595-
// LIN64: ret i16 %[[Ret]]
588+
// LIN64: define dso_local i8 @_Z25TestBitIntVector2x2AllocaDv2_DU2_S0_(i8 %[[V1Coerce:.+]], i8 %[[V2Coerce:.+]])
589+
// LIN64: %[[RetVal:.+]] = alloca <2 x i2>, align 1
590+
// LIN64: %[[V1Addr:.+]] = alloca <2 x i2>, align 1
591+
// LIN64: %[[V2Addr:.+]] = alloca <2 x i2>, align 1
592+
// LIN64: %[[V1Val:.+]] = load <2 x i2>, ptr %[[V1Addr]], align 1
593+
// LIN64: %[[V2Val:.+]] = load <2 x i2>, ptr %[[V2Addr]], align 1
594+
// LIN64: %[[AddVal:.+]] = add <2 x i2> %0, %1
595+
// LIN64: store <2 x i2> %[[AddVal]], ptr %[[RetVal]], align 1
596+
// LIN64: %[[Ret:.+]] = load i8, ptr %[[RetVal]], align 1
597+
// LIN64: ret i8 %[[Ret]]
596598

597599
// LIN32: define dso_local <2 x i2> @_Z25TestBitIntVector2x2AllocaDv2_DU2_S0_(<2 x i2> %{{.+}}, <2 x i2> %{{.+}})
598-
// LIN32: %[[V1Addr:.+]] = alloca <2 x i2>, align 2
599-
// LIN32: %[[V2Addr:.+]] = alloca <2 x i2>, align 2
600+
// LIN32: %[[V1Addr:.+]] = alloca <2 x i2>, align 1
601+
// LIN32: %[[V2Addr:.+]] = alloca <2 x i2>, align 1
600602
// LIN32: ret <2 x i2> %[[Ret:.+]]
601603

602604
// WIN: define dso_local <2 x i2> @"?TestBitIntVector2x2Alloca@@YAT?$__vector@U?$_UBitInt@$01@__clang@@$01@__clang@@T12@0@Z"(<2 x i2>{{.*}}, <2 x i2>{{.*}})
603-
// WIN: %[[V1:.+]] = alloca <2 x i2>, align 2
604-
// WIN: %[[V2:.+]] = alloca <2 x i2>, align 2
605+
// WIN: %[[V1:.+]] = alloca <2 x i2>, align 1
606+
// WIN: %[[V2:.+]] = alloca <2 x i2>, align 1
605607
// WIN: ret <2 x i2> %[[Ret:.+]]
606608
return v1 + v2;
607609
}

clang/test/CodeGenCXX/matrix-vector-bit-int.cpp

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -70,27 +70,25 @@ i512x3 v3(i512x3 a) {
7070
return a + a;
7171
}
7272

73-
// CHECK-LABEL: define dso_local i32 @_Z2v4Dv3_DB4_(
74-
// CHECK-SAME: i32 [[A_COERCE:%.*]]) #[[ATTR0]] {
73+
// CHECK-LABEL: define dso_local i16 @_Z2v4Dv3_DB4_(
74+
// CHECK-SAME: i16 [[A_COERCE:%.*]]) #[[ATTR0]] {
7575
// CHECK-NEXT: [[ENTRY:.*:]]
76-
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <3 x i4>, align 4
77-
// CHECK-NEXT: [[A:%.*]] = alloca <3 x i4>, align 4
78-
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <3 x i4>, align 4
79-
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca i32, align 4
80-
// CHECK-NEXT: store i32 [[A_COERCE]], ptr [[A]], align 4
81-
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i4>, ptr [[A]], align 4
76+
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <3 x i4>, align 2
77+
// CHECK-NEXT: [[A:%.*]] = alloca <3 x i4>, align 2
78+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <3 x i4>, align 2
79+
// CHECK-NEXT: store i16 [[A_COERCE]], ptr [[A]], align 2
80+
// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i4>, ptr [[A]], align 2
8281
// CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i4> [[LOADVECN]], <4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>
8382
// CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i4> [[A1]], <3 x i4> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
84-
// CHECK-NEXT: store <4 x i4> [[EXTRACTVEC]], ptr [[A_ADDR]], align 4
85-
// CHECK-NEXT: [[LOADVECN2:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 4
83+
// CHECK-NEXT: store <4 x i4> [[EXTRACTVEC]], ptr [[A_ADDR]], align 2
84+
// CHECK-NEXT: [[LOADVECN2:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 2
8685
// CHECK-NEXT: [[EXTRACTVEC3:%.*]] = shufflevector <4 x i4> [[LOADVECN2]], <4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>
87-
// CHECK-NEXT: [[LOADVECN4:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 4
86+
// CHECK-NEXT: [[LOADVECN4:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 2
8887
// CHECK-NEXT: [[EXTRACTVEC5:%.*]] = shufflevector <4 x i4> [[LOADVECN4]], <4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>
8988
// CHECK-NEXT: [[ADD:%.*]] = add <3 x i4> [[EXTRACTVEC3]], [[EXTRACTVEC5]]
90-
// CHECK-NEXT: store <3 x i4> [[ADD]], ptr [[RETVAL]], align 4
91-
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[RETVAL_COERCE]], ptr align 4 [[RETVAL]], i64 2, i1 false)
92-
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[RETVAL_COERCE]], align 4
93-
// CHECK-NEXT: ret i32 [[TMP0]]
89+
// CHECK-NEXT: store <3 x i4> [[ADD]], ptr [[RETVAL]], align 2
90+
// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[RETVAL]], align 2
91+
// CHECK-NEXT: ret i16 [[TMP0]]
9492
//
9593
i4x3 v4(i4x3 a) {
9694
return a + a;

clang/test/SemaCXX/ext-int.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,3 +293,33 @@ void FromPaper1() {
293293
void FromPaper2(_BitInt(8) a1, _BitInt(24) a2) {
294294
static_assert(is_same<decltype(a1 * (_BitInt(32))a2), _BitInt(32)>::value, "");
295295
}
296+
297+
// Check sub-byte integer vector size and alignment, expecting packing.
298+
template <int Bits, int N>
299+
using packed_vec_t = _BitInt(Bits) __attribute__((ext_vector_type(N)));
300+
void SubByteVecPacking() {
301+
static_assert(sizeof(packed_vec_t<2, 2>) == 1);
302+
static_assert(sizeof(packed_vec_t<2, 3>) == 1);
303+
static_assert(sizeof(packed_vec_t<2, 4>) == 1);
304+
static_assert(sizeof(packed_vec_t<2, 8>) == 2);
305+
static_assert(sizeof(packed_vec_t<2, 16>) == 4);
306+
static_assert(sizeof(packed_vec_t<2, 32>) == 8);
307+
static_assert(sizeof(packed_vec_t<4, 2>) == 1);
308+
static_assert(sizeof(packed_vec_t<4, 4>) == 2);
309+
static_assert(sizeof(packed_vec_t<4, 8>) == 4);
310+
static_assert(sizeof(packed_vec_t<4, 16>) == 8);
311+
static_assert(sizeof(packed_vec_t<4, 32>) == 16);
312+
313+
static_assert(alignof(packed_vec_t<2, 2>) == 1);
314+
static_assert(alignof(packed_vec_t<2, 3>) == 1);
315+
static_assert(alignof(packed_vec_t<2, 4>) == 1);
316+
static_assert(alignof(packed_vec_t<2, 8>) == 2);
317+
static_assert(alignof(packed_vec_t<2, 16>) == 4);
318+
static_assert(alignof(packed_vec_t<2, 32>) == 8);
319+
static_assert(alignof(packed_vec_t<4, 2>) == 1);
320+
static_assert(alignof(packed_vec_t<4, 3>) == 2);
321+
static_assert(alignof(packed_vec_t<4, 4>) == 2);
322+
static_assert(alignof(packed_vec_t<4, 8>) == 4);
323+
static_assert(alignof(packed_vec_t<4, 16>) == 8);
324+
static_assert(alignof(packed_vec_t<4, 32>) == 16);
325+
}

0 commit comments

Comments
 (0)