diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 12351e98e5a2b..7caccf2a22f0a 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -2677,6 +2677,16 @@ class ASTContext : public RefCountedBase { return getTypeSizeInCharsIfKnown(QualType(Ty, 0)); } + /// Return the size of an element inside a given vector type. + uint64_t getVectorElementSize(const VectorType *VTy) const { + QualType EltTy = VTy->getElementType(); + if (VTy->isPackedVectorBoolType(*this)) + return 1; + if (EltTy->isBitIntType()) + return EltTy->castAs()->getNumBits(); + return getTypeSize(EltTy); + } + /// Return the ABI-specified alignment of a (complete) type \p T, in /// bits. unsigned getTypeAlign(QualType T) const { return getTypeInfo(T).Align; } diff --git a/clang/include/clang/AST/TypeBase.h b/clang/include/clang/AST/TypeBase.h index 6786b2f6cbc78..283af7d97960f 100644 --- a/clang/include/clang/AST/TypeBase.h +++ b/clang/include/clang/AST/TypeBase.h @@ -2640,6 +2640,8 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase { // Extended vector type with bool element that is packed. HLSL doesn't pack // its bool vectors. bool isPackedVectorBoolType(const ASTContext &ctx) const; + // Vector type with packed _BitInt elements. + bool isBitIntVectorType() const; bool isSubscriptableVectorType() const; bool isMatrixType() const; // Matrix type. bool isConstantMatrixType() const; // Constant matrix type. @@ -8681,6 +8683,11 @@ inline bool Type::isExtVectorBoolType() const { return cast(CanonicalType)->getElementType()->isBooleanType(); } +inline bool Type::isBitIntVectorType() const { + return isVectorType() && + cast(CanonicalType)->getElementType()->isBitIntType(); +} + inline bool Type::isSubscriptableVectorType() const { return isVectorType() || isSveVLSBuiltinType(); } diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 056bfe36b2a0a..cc91960cfe01b 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -2093,10 +2093,7 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const { case Type::ExtVector: case Type::Vector: { const auto *VT = cast(T); - TypeInfo EltInfo = getTypeInfo(VT->getElementType()); - Width = VT->isPackedVectorBoolType(*this) - ? VT->getNumElements() - : EltInfo.Width * VT->getNumElements(); + Width = getVectorElementSize(VT) * VT->getNumElements(); // Enforce at least byte size and alignment. Width = std::max(8, Width); Align = std::max(8, Width); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index b706b14945b6d..4fdea501e0b57 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -7589,8 +7589,8 @@ class APValueToBufferConverter { QualType EltTy = VTy->getElementType(); unsigned NElts = VTy->getNumElements(); - if (VTy->isPackedVectorBoolType(Info.Ctx)) { - // Special handling for OpenCL bool vectors: + if (VTy->isPackedVectorBoolType(Info.Ctx) || VTy->isBitIntVectorType()) { + // Special handling for OpenCL bool and sub-byte vectors: // Since these vectors are stored as packed bits, but we can't write // individual bits to the BitCastBuffer, we'll buffer all of the elements // together into an appropriately sized APInt and write them all out at @@ -7599,18 +7599,21 @@ class APValueToBufferConverter { // have to worry about writing data which should have been left // uninitialized. bool BigEndian = Info.Ctx.getTargetInfo().isBigEndian(); + uint64_t EltSize = Info.Ctx.getVectorElementSize(VTy); - llvm::APInt Res = llvm::APInt::getZero(NElts); + llvm::APInt Res = llvm::APInt::getZero(NElts * EltSize); for (unsigned I = 0; I < NElts; ++I) { const llvm::APSInt &EltAsInt = Val.getVectorElt(I).getInt(); - assert(EltAsInt.isUnsigned() && EltAsInt.getBitWidth() == 1 && - "bool vector element must be 1-bit unsigned integer!"); - - Res.insertBits(EltAsInt, BigEndian ? (NElts - I - 1) : I); + assert(!VTy->isPackedVectorBoolType(Info.Ctx) || + (EltAsInt.isUnsigned() && EltAsInt.getBitWidth()) == 1 && + "bool vector element must be 1-bit unsigned integer!"); + uint64_t BitOffset = EltSize * (BigEndian ? (NElts - I - 1) : I); + Res.insertBits(EltAsInt, BitOffset); } - SmallVector Bytes(NElts / 8); - llvm::StoreIntToMemory(Res, &*Bytes.begin(), NElts / 8); + uint64_t NumBytes = NElts * EltSize / 8; + SmallVector Bytes(NumBytes); + llvm::StoreIntToMemory(Res, &*Bytes.begin(), NumBytes); Buffer.writeObject(Offset, Bytes); } else { // Iterate over each of the elements and write them out to the buffer at @@ -7852,13 +7855,11 @@ class BufferToAPValueConverter { std::optional visit(const VectorType *VTy, CharUnits Offset) { QualType EltTy = VTy->getElementType(); unsigned NElts = VTy->getNumElements(); - unsigned EltSize = - VTy->isPackedVectorBoolType(Info.Ctx) ? 1 : Info.Ctx.getTypeSize(EltTy); SmallVector Elts; Elts.reserve(NElts); - if (VTy->isPackedVectorBoolType(Info.Ctx)) { - // Special handling for OpenCL bool vectors: + if (VTy->isPackedVectorBoolType(Info.Ctx) || VTy->isBitIntVectorType()) { + // Special handling for OpenCL bool and sub-byte vectors: // Since these vectors are stored as packed bits, but we can't read // individual bits from the BitCastBuffer, we'll buffer all of the // elements together into an appropriately sized APInt and write them all @@ -7867,20 +7868,22 @@ class BufferToAPValueConverter { // we don't have to worry about reading any padding data which didn't // actually need to be accessed. bool BigEndian = Info.Ctx.getTargetInfo().isBigEndian(); + uint64_t EltSize = Info.Ctx.getVectorElementSize(VTy); + bool IsSigned = EltTy->isSignedIntegerType(); + uint64_t NumBytes = NElts * EltSize / 8; SmallVector Bytes; - Bytes.reserve(NElts / 8); - if (!Buffer.readObject(Offset, CharUnits::fromQuantity(NElts / 8), Bytes)) + Bytes.reserve(NumBytes); + if (!Buffer.readObject(Offset, CharUnits::fromQuantity(NumBytes), Bytes)) return std::nullopt; - APSInt SValInt(NElts, true); - llvm::LoadIntFromMemory(SValInt, &*Bytes.begin(), Bytes.size()); + APSInt SValInt(NElts * EltSize); + llvm::LoadIntFromMemory(SValInt, Bytes.data(), Bytes.size()); for (unsigned I = 0; I < NElts; ++I) { - llvm::APInt Elt = - SValInt.extractBits(1, (BigEndian ? NElts - I - 1 : I) * EltSize); - Elts.emplace_back( - APSInt(std::move(Elt), !EltTy->isSignedIntegerType())); + uint64_t BitOffset = EltSize * (BigEndian ? (NElts - I - 1) : I); + llvm::APInt Elt = SValInt.extractBits(EltSize, BitOffset); + Elts.emplace_back(APSInt(std::move(Elt), !IsSigned)); } } else { // Iterate over each of the elements and read them from the buffer at @@ -7986,8 +7989,7 @@ static bool checkBitCastConstexprEligibilityType(SourceLocation Loc, if (const auto *VTy = Ty->getAs()) { QualType EltTy = VTy->getElementType(); unsigned NElts = VTy->getNumElements(); - unsigned EltSize = - VTy->isPackedVectorBoolType(Ctx) ? 1 : Ctx.getTypeSize(EltTy); + unsigned EltSize = Ctx.getVectorElementSize(VTy); if ((NElts * EltSize) % Ctx.getCharWidth() != 0) { // The vector's size in bits is not a multiple of the target's byte size, diff --git a/clang/test/CodeGenCXX/ext-int.cpp b/clang/test/CodeGenCXX/ext-int.cpp index a75b3701e36ef..0454363ca7f80 100644 --- a/clang/test/CodeGenCXX/ext-int.cpp +++ b/clang/test/CodeGenCXX/ext-int.cpp @@ -573,7 +573,7 @@ void VectorTest(uint16_t4 first, uint16_t4 second) { typedef unsigned _BitInt(4) uint4_t4 __attribute__((ext_vector_type(4))); void VectorTest(uint4_t4 first, uint4_t4 second) { - // LIN64: define{{.*}} void @_Z10VectorTestDv4_DU4_S0_(i32 %{{.+}}, i32 %{{.+}}) + // LIN64: define{{.*}} void @_Z10VectorTestDv4_DU4_S0_(i16 %{{.+}}, i16 %{{.+}}) // LIN32: define{{.*}} void @_Z10VectorTestDv4_DU4_S0_(<4 x i4> %{{.+}}, <4 x i4> %{{.+}}) // WIN64: define dso_local void @"?VectorTest@@YAXT?$__vector@U?$_UBitInt@$03@__clang@@$03@__clang@@0@Z"(<4 x i4> %{{.+}}, <4 x i4> %{{.+}}) // WIN32: define dso_local void @"?VectorTest@@YAXT?$__vector@U?$_UBitInt@$03@__clang@@$03@__clang@@0@Z"(<4 x i4> inreg %{{.+}}, <4 x i4> inreg %{{.+}}) @@ -585,23 +585,25 @@ void VectorTest(uint4_t4 first, uint4_t4 second) { typedef unsigned _BitInt(2) uint2_t2 __attribute__((ext_vector_type(2))); uint2_t2 TestBitIntVector2x2Alloca(uint2_t2 v1, uint2_t2 v2) { - // LIN64: define dso_local i16 @_Z25TestBitIntVector2x2AllocaDv2_DU2_S0_(i16 %[[V1Coerce:.+]], i16 %[[V2Coerce:.+]]) - // LIN64: %[[RetVal:.+]] = alloca <2 x i2>, align 2 - // LIN64: %[[V1Addr:.+]] = alloca <2 x i2>, align 2 - // LIN64: %[[V2Addr:.+]] = alloca <2 x i2>, align 2 - // LIN64: %[[RetValCoerce:.+]] = alloca i16, align 2 - // LIN64: call void @llvm.memcpy.p0.p0.i64(ptr align 2 %[[RetValCoerce]], ptr align 2 %[[RetVal]], i64 1, i1 false) - // LIN64: %[[Ret:.+]] = load i16, ptr %[[RetValCoerce]], align 2 - // LIN64: ret i16 %[[Ret]] + // LIN64: define dso_local i8 @_Z25TestBitIntVector2x2AllocaDv2_DU2_S0_(i8 %[[V1Coerce:.+]], i8 %[[V2Coerce:.+]]) + // LIN64: %[[RetVal:.+]] = alloca <2 x i2>, align 1 + // LIN64: %[[V1Addr:.+]] = alloca <2 x i2>, align 1 + // LIN64: %[[V2Addr:.+]] = alloca <2 x i2>, align 1 + // LIN64: %[[V1Val:.+]] = load <2 x i2>, ptr %[[V1Addr]], align 1 + // LIN64: %[[V2Val:.+]] = load <2 x i2>, ptr %[[V2Addr]], align 1 + // LIN64: %[[AddVal:.+]] = add <2 x i2> %0, %1 + // LIN64: store <2 x i2> %[[AddVal]], ptr %[[RetVal]], align 1 + // LIN64: %[[Ret:.+]] = load i8, ptr %[[RetVal]], align 1 + // LIN64: ret i8 %[[Ret]] // LIN32: define dso_local <2 x i2> @_Z25TestBitIntVector2x2AllocaDv2_DU2_S0_(<2 x i2> %{{.+}}, <2 x i2> %{{.+}}) - // LIN32: %[[V1Addr:.+]] = alloca <2 x i2>, align 2 - // LIN32: %[[V2Addr:.+]] = alloca <2 x i2>, align 2 + // LIN32: %[[V1Addr:.+]] = alloca <2 x i2>, align 1 + // LIN32: %[[V2Addr:.+]] = alloca <2 x i2>, align 1 // LIN32: ret <2 x i2> %[[Ret:.+]] // WIN: define dso_local <2 x i2> @"?TestBitIntVector2x2Alloca@@YAT?$__vector@U?$_UBitInt@$01@__clang@@$01@__clang@@T12@0@Z"(<2 x i2>{{.*}}, <2 x i2>{{.*}}) - // WIN: %[[V1:.+]] = alloca <2 x i2>, align 2 - // WIN: %[[V2:.+]] = alloca <2 x i2>, align 2 + // WIN: %[[V1:.+]] = alloca <2 x i2>, align 1 + // WIN: %[[V2:.+]] = alloca <2 x i2>, align 1 // WIN: ret <2 x i2> %[[Ret:.+]] return v1 + v2; } diff --git a/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp b/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp index 2e7531b334ecb..98b868fcd5bc2 100644 --- a/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp +++ b/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp @@ -70,27 +70,25 @@ i512x3 v3(i512x3 a) { return a + a; } -// CHECK-LABEL: define dso_local i32 @_Z2v4Dv3_DB4_( -// CHECK-SAME: i32 [[A_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-LABEL: define dso_local i16 @_Z2v4Dv3_DB4_( +// CHECK-SAME: i16 [[A_COERCE:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[RETVAL:%.*]] = alloca <3 x i4>, align 4 -// CHECK-NEXT: [[A:%.*]] = alloca <3 x i4>, align 4 -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <3 x i4>, align 4 -// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store i32 [[A_COERCE]], ptr [[A]], align 4 -// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i4>, ptr [[A]], align 4 +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <3 x i4>, align 2 +// CHECK-NEXT: [[A:%.*]] = alloca <3 x i4>, align 2 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <3 x i4>, align 2 +// CHECK-NEXT: store i16 [[A_COERCE]], ptr [[A]], align 2 +// CHECK-NEXT: [[LOADVECN:%.*]] = load <4 x i4>, ptr [[A]], align 2 // CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i4> [[LOADVECN]], <4 x i4> poison, <3 x i32> // CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i4> [[A1]], <3 x i4> poison, <4 x i32> -// CHECK-NEXT: store <4 x i4> [[EXTRACTVEC]], ptr [[A_ADDR]], align 4 -// CHECK-NEXT: [[LOADVECN2:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 4 +// CHECK-NEXT: store <4 x i4> [[EXTRACTVEC]], ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[LOADVECN2:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 2 // CHECK-NEXT: [[EXTRACTVEC3:%.*]] = shufflevector <4 x i4> [[LOADVECN2]], <4 x i4> poison, <3 x i32> -// CHECK-NEXT: [[LOADVECN4:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 4 +// CHECK-NEXT: [[LOADVECN4:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 2 // CHECK-NEXT: [[EXTRACTVEC5:%.*]] = shufflevector <4 x i4> [[LOADVECN4]], <4 x i4> poison, <3 x i32> // CHECK-NEXT: [[ADD:%.*]] = add <3 x i4> [[EXTRACTVEC3]], [[EXTRACTVEC5]] -// CHECK-NEXT: store <3 x i4> [[ADD]], ptr [[RETVAL]], align 4 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[RETVAL_COERCE]], ptr align 4 [[RETVAL]], i64 2, i1 false) -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[RETVAL_COERCE]], align 4 -// CHECK-NEXT: ret i32 [[TMP0]] +// CHECK-NEXT: store <3 x i4> [[ADD]], ptr [[RETVAL]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[RETVAL]], align 2 +// CHECK-NEXT: ret i16 [[TMP0]] // i4x3 v4(i4x3 a) { return a + a; diff --git a/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp b/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp index 7a6d7cb353158..893abf0636a5d 100644 --- a/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp +++ b/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp @@ -510,6 +510,56 @@ constexpr bool9 bad_short_to_bool9 = __builtin_bit_cast(bool9, static_cast using packed_vec_t = _BitInt(Bits) __attribute__((ext_vector_type(N))); + +static_assert(round_trip>(static_cast(0)), ""); +static_assert(round_trip>(static_cast(1)), ""); +static_assert(round_trip>(static_cast(0x55)), ""); +static_assert(round_trip>(static_cast(0)), ""); +static_assert(round_trip>(static_cast(-1)), ""); +static_assert(round_trip>(static_cast(0x5555)), ""); + +static_assert(bit_cast(packed_vec_t<2, 4>{1, -2, 0, -1}) == (LITTLE_END ? 0xC9 : 0x63), ""); +static_assert(bit_cast(packed_vec_t<2, 8>{1, -2, 0, -1, -2, -1, 1, 0}) == (LITTLE_END ? 0x1EC9 : 0x63B4), ""); + +static_assert(round_trip>(static_cast(0)), ""); +static_assert(round_trip>(static_cast(1)), ""); +static_assert(round_trip>(static_cast(0x55)), ""); +static_assert(round_trip>(static_cast(0)), ""); +static_assert(round_trip>(static_cast(-1)), ""); +static_assert(round_trip>(static_cast(0x5555)), ""); + +static_assert(bit_cast(packed_vec_t<4, 2>{-4, -7}) == (LITTLE_END ? 0x9C : 0xC9), ""); +static_assert(bit_cast(packed_vec_t<4, 4>{3, -5, -1, 7}) == (LITTLE_END ? 0x7FB3 : 0x3BF7), ""); + +// expected-error@+2 {{constexpr variable 'bad_packed_vec_2_3_to_char' must be initialized by a constant expression}} +// expected-note@+1 {{bit_cast involving type '_BitInt(2) __attribute__((ext_vector_type(3)))' (vector of 3 '_BitInt(2)' values) is not allowed in a constant expression; element size 2 * element count 3 is not a multiple of the byte size 8}} +constexpr unsigned char bad_packed_vec_2_3_to_char = __builtin_bit_cast(unsigned char, packed_vec_t<2, 3>{1, 0, 1}); +// expected-error@+2 {{constexpr variable 'bad_char_to_packed_vec_2_3' must be initialized by a constant expression}} +// expected-note@+1 {{bit_cast involving type '_BitInt(2) __attribute__((ext_vector_type(3)))' (vector of 3 '_BitInt(2)' values) is not allowed in a constant expression; element size 2 * element count 3 is not a multiple of the byte size 8}} +constexpr packed_vec_t<2, 3> bad_char_to_packed_vec_2_3 = __builtin_bit_cast(packed_vec_t<2, 3>, static_cast(0)); + +// expected-error@+2 {{constexpr variable 'bad_packed_vec_2_6_to_short' must be initialized by a constant expression}} +// expected-note@+1 {{bit_cast involving type '_BitInt(2) __attribute__((ext_vector_type(6)))' (vector of 6 '_BitInt(2)' values) is not allowed in a constant expression; element size 2 * element count 6 is not a multiple of the byte size 8}} +constexpr unsigned short bad_packed_vec_2_6_to_short = __builtin_bit_cast(unsigned short, packed_vec_t<2, 6>{1, 0, 1}); +// expected-error@+2 {{constexpr variable 'bad_short_to_packed_vec_2_6' must be initialized by a constant expression}} +// expected-note@+1 {{bit_cast involving type '_BitInt(2) __attribute__((ext_vector_type(6)))' (vector of 6 '_BitInt(2)' values) is not allowed in a constant expression; element size 2 * element count 6 is not a multiple of the byte size 8}} +constexpr packed_vec_t<2, 6> bad_short_to_packed_vec_2_6 = __builtin_bit_cast(packed_vec_t<2, 6>, static_cast(0)); + +// expected-error@+2 {{constexpr variable 'bad_packed_vec_4_3_to_short' must be initialized by a constant expression}} +// expected-note@+1 {{bit_cast involving type '_BitInt(4) __attribute__((ext_vector_type(3)))' (vector of 3 '_BitInt(4)' values) is not allowed in a constant expression; element size 4 * element count 3 is not a multiple of the byte size 8}} +constexpr unsigned short bad_packed_vec_4_3_to_short = __builtin_bit_cast(unsigned short, packed_vec_t<4, 3>{1, 0, 1}); +// expected-error@+2 {{constexpr variable 'bad_short_to_packed_vec_4_3' must be initialized by a constant expression}} +// expected-note@+1 {{bit_cast involving type '_BitInt(4) __attribute__((ext_vector_type(3)))' (vector of 3 '_BitInt(4)' values) is not allowed in a constant expression; element size 4 * element count 3 is not a multiple of the byte size 8}} +constexpr packed_vec_t<4, 3> bad_short_to_packed_vec_4_3 = __builtin_bit_cast(packed_vec_t<4, 3>, static_cast(0)); + +// expected-error@+2 {{constexpr variable 'bad_packed_vec_4_5_to_int' must be initialized by a constant expression}} +// expected-note@+1 {{bit_cast involving type '_BitInt(4) __attribute__((ext_vector_type(5)))' (vector of 5 '_BitInt(4)' values) is not allowed in a constant expression; element size 4 * element count 5 is not a multiple of the byte size 8}} +constexpr unsigned int bad_packed_vec_4_5_to_int = __builtin_bit_cast(unsigned int, packed_vec_t<4, 5>{1, 0, 1}); +// expected-error@+2 {{constexpr variable 'bad_int_to_packed_vec_4_5' must be initialized by a constant expression}} +// expected-note@+1 {{bit_cast involving type '_BitInt(4) __attribute__((ext_vector_type(5)))' (vector of 5 '_BitInt(4)' values) is not allowed in a constant expression; element size 4 * element count 5 is not a multiple of the byte size 8}} +constexpr packed_vec_t<4, 5> bad_int_to_packed_vec_4_5 = __builtin_bit_cast(packed_vec_t<4, 5>, static_cast(0)); + } namespace test_complex { diff --git a/clang/test/SemaCXX/ext-int.cpp b/clang/test/SemaCXX/ext-int.cpp index 5c566dafed931..d3b72761402d0 100644 --- a/clang/test/SemaCXX/ext-int.cpp +++ b/clang/test/SemaCXX/ext-int.cpp @@ -293,3 +293,33 @@ void FromPaper1() { void FromPaper2(_BitInt(8) a1, _BitInt(24) a2) { static_assert(is_same::value, ""); } + +// Check sub-byte integer vector size and alignment, expecting packing. +template +using packed_vec_t = _BitInt(Bits) __attribute__((ext_vector_type(N))); +void SubByteVecPacking() { + static_assert(sizeof(packed_vec_t<2, 2>) == 1); + static_assert(sizeof(packed_vec_t<2, 3>) == 1); + static_assert(sizeof(packed_vec_t<2, 4>) == 1); + static_assert(sizeof(packed_vec_t<2, 8>) == 2); + static_assert(sizeof(packed_vec_t<2, 16>) == 4); + static_assert(sizeof(packed_vec_t<2, 32>) == 8); + static_assert(sizeof(packed_vec_t<4, 2>) == 1); + static_assert(sizeof(packed_vec_t<4, 4>) == 2); + static_assert(sizeof(packed_vec_t<4, 8>) == 4); + static_assert(sizeof(packed_vec_t<4, 16>) == 8); + static_assert(sizeof(packed_vec_t<4, 32>) == 16); + + static_assert(alignof(packed_vec_t<2, 2>) == 1); + static_assert(alignof(packed_vec_t<2, 3>) == 1); + static_assert(alignof(packed_vec_t<2, 4>) == 1); + static_assert(alignof(packed_vec_t<2, 8>) == 2); + static_assert(alignof(packed_vec_t<2, 16>) == 4); + static_assert(alignof(packed_vec_t<2, 32>) == 8); + static_assert(alignof(packed_vec_t<4, 2>) == 1); + static_assert(alignof(packed_vec_t<4, 3>) == 2); + static_assert(alignof(packed_vec_t<4, 4>) == 2); + static_assert(alignof(packed_vec_t<4, 8>) == 4); + static_assert(alignof(packed_vec_t<4, 16>) == 8); + static_assert(alignof(packed_vec_t<4, 32>) == 16); +}