Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions clang/lib/AST/ASTContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2016,8 +2016,9 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
case Type::Vector: {
const auto *VT = cast<VectorType>(T);
TypeInfo EltInfo = getTypeInfo(VT->getElementType());
Width = VT->isExtVectorBoolType() ? VT->getNumElements()
: EltInfo.Width * VT->getNumElements();
Width = (VT->isExtVectorBoolType() && !getLangOpts().HLSL)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we should add VectorType::isPackedBoolType() or something like that? Then we can refactor the code so it doesn't explicitly check for HLSL all over the place.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will investigate doing this because I also don't like all the special case checking for HLSL.

? VT->getNumElements()
: EltInfo.Width * VT->getNumElements();
// Enforce at least byte size and alignment.
Width = std::max<unsigned>(8, Width);
Align = std::max<unsigned>(8, Width);
Expand Down
32 changes: 31 additions & 1 deletion clang/lib/CodeGen/CGExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1986,6 +1986,10 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
if (const auto *ClangVecTy = Ty->getAs<VectorType>()) {
// Boolean vectors use `iN` as storage type.
if (ClangVecTy->isExtVectorBoolType()) {
if (getLangOpts().HLSL) {
llvm::Value *Value = Builder.CreateLoad(Addr, Volatile, "load_boolvec");
return EmitFromMemory(Value, Ty);
}
llvm::Type *ValTy = ConvertType(Ty);
unsigned ValNumElems =
cast<llvm::FixedVectorType>(ValTy)->getNumElements();
Expand Down Expand Up @@ -2064,6 +2068,9 @@ llvm::Value *CodeGenFunction::EmitToMemory(llvm::Value *Value, QualType Ty) {

if (Ty->isExtVectorBoolType()) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you replaced a bunch of isExtVectorBoolType() with isPackedVectorBoolType. We are only doing HLSL modifications on the isExtVectorBoolType(). But isExtVectorBoolType doesn't mean the vector is not packed. Is the Zero extend and truncation to get them into a form that they will unpack?

Copy link
Contributor Author

@spall spall Feb 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My intention was to replace calls to 'isExtVectorBoolType' with 'isPackedVectorBoolType' anywhere we want an hlsl boolean vector to follow the normal handling path for vectors; Hopefully reviews will verify I got this right.
Here we can't follow the normal vector path because it returns the value unchanged, and we need to convert a vec of i1s to a vec of i32s, which is why we zero extend here. The normally "boolean vector packing" does something different.

llvm::Type *StoreTy = convertTypeForLoadStore(Ty, Value->getType());
if (getLangOpts().HLSL)
return Builder.CreateZExt(Value, StoreTy);

// Expand to the memory bit width.
unsigned MemNumElems = StoreTy->getPrimitiveSizeInBits();
// <N x i1> --> <P x i1>.
Expand All @@ -2081,6 +2088,9 @@ llvm::Value *CodeGenFunction::EmitToMemory(llvm::Value *Value, QualType Ty) {
llvm::Value *CodeGenFunction::EmitFromMemory(llvm::Value *Value, QualType Ty) {
if (Ty->isExtVectorBoolType()) {
const auto *RawIntTy = Value->getType();
if (getLangOpts().HLSL)
return Builder.CreateTrunc(Value, ConvertType(Ty), "loadedv");

// Bitcast iP --> <P x i1>.
auto *PaddedVecTy = llvm::FixedVectorType::get(
Builder.getInt1Ty(), RawIntTy->getPrimitiveSizeInBits());
Expand Down Expand Up @@ -2343,7 +2353,13 @@ RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) {
if (!ExprVT) {
unsigned InIdx = getAccessedFieldNo(0, Elts);
llvm::Value *Elt = llvm::ConstantInt::get(SizeTy, InIdx);
return RValue::get(Builder.CreateExtractElement(Vec, Elt));

llvm::Value *Element = Builder.CreateExtractElement(Vec, Elt);

if (getLangOpts().HLSL && LV.getType()->isBooleanType())
Element = Builder.CreateTrunc(Element, ConvertType(LV.getType()));

return RValue::get(Element);
}

// Always use shuffle vector to try to retain the original program structure
Expand All @@ -2354,6 +2370,10 @@ RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) {
Mask.push_back(getAccessedFieldNo(i, Elts));

Vec = Builder.CreateShuffleVector(Vec, Mask);

if (getLangOpts().HLSL && LV.getType()->isExtVectorBoolType())
Vec = EmitFromMemory(Vec, LV.getType());

return RValue::get(Vec);
}

Expand Down Expand Up @@ -2407,6 +2427,12 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst,
// Read/modify/write the vector, inserting the new element.
llvm::Value *Vec = Builder.CreateLoad(Dst.getVectorAddress(),
Dst.isVolatileQualified());
llvm::Type *OldVecTy = Vec->getType();
if (getLangOpts().HLSL && Dst.getType()->isExtVectorBoolType())

Vec =
Builder.CreateTrunc(Vec, ConvertType(Dst.getType()), "truncboolv");

auto *IRStoreTy = dyn_cast<llvm::IntegerType>(Vec->getType());
if (IRStoreTy) {
auto *IRVecTy = llvm::FixedVectorType::get(
Expand All @@ -2420,6 +2446,10 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst,
// <N x i1> --> <iN>.
Vec = Builder.CreateBitCast(Vec, IRStoreTy);
}

if (getLangOpts().HLSL && Dst.getType()->isExtVectorBoolType())
Vec = Builder.CreateZExt(Vec, OldVecTy);

Builder.CreateStore(Vec, Dst.getVectorAddress(),
Dst.isVolatileQualified());
return;
Expand Down
9 changes: 9 additions & 0 deletions clang/lib/CodeGen/CGExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1984,6 +1984,15 @@ llvm::Constant *ConstantEmitter::emitForMemory(CodeGenModule &CGM,
return Res;
}

// In HLSL bool vectors are stored in memory as a vector of i32
if (destType->isExtVectorBoolType() && CGM.getContext().getLangOpts().HLSL) {
llvm::Type *boolVecTy = CGM.getTypes().ConvertTypeForMem(destType);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need special handling for non-hlsl ext-bool-vector types?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't believe so. This code is necessary for HLSL because the constant 'C' is a <2 x i1> but HLSL needs it to be a <2 x i32> hence the zero extension. For non HLSL vectors the expected form would be <2 x i1> so nothing needs to be done.
https://hlsl.godbolt.org/z/nsb6jd1vn

llvm::Constant *Res = llvm::ConstantFoldCastOperand(
llvm::Instruction::ZExt, C, boolVecTy, CGM.getDataLayout());
assert(Res && "Constant folding must succeed");
return Res;
}

if (destType->isBitIntType()) {
ConstantAggregateBuilder Builder(CGM);
llvm::Type *LoadStoreTy = CGM.getTypes().convertTypeForLoadStore(destType);
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/CodeGen/CodeGenTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,12 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) {
// Check for the boolean vector case.
if (T->isExtVectorBoolType()) {
auto *FixedVT = cast<llvm::FixedVectorType>(R);

if (Context.getLangOpts().HLSL) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could use the hasPackedBoolVectors accessor I suggested elsewhere.

llvm::Type *IRElemTy = ConvertTypeForMem(Context.BoolTy);
return llvm::FixedVectorType::get(IRElemTy, FixedVT->getNumElements());
}

// Pad to at least one byte.
uint64_t BytePadded = std::max<uint64_t>(FixedVT->getNumElements(), 8);
return llvm::IntegerType::get(FixedVT->getContext(), BytePadded);
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Sema/SemaExprMember.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1697,7 +1697,7 @@ static ExprResult LookupMemberExpr(Sema &S, LookupResult &R,
QualType(), false);
}

if (BaseType->isExtVectorBoolType()) {
if (BaseType->isExtVectorBoolType() && !S.Context.getLangOpts().HLSL) {
// We disallow element access for ext_vector_type bool. There is no way to
// materialize a reference to a vector element as a pointer (each element is
// one bit in the vector).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,28 +91,26 @@ void l4_to_i2() {

// CHECK-LABEL: i2_to_b2
// CHECK: [[l2:%.*]] = alloca <2 x i32>
// CHECK: [[b2:%.*]] = alloca i8
// CHECK: [[b2:%.*]] = alloca <2 x i32>
// CHECK: store <2 x i32> splat (i32 8), ptr [[i2]]
// CHECK: [[veci2:%.*]] = load <2 x i32>, ptr [[i2]]
// CHECK: [[vecb2:%.*]] = icmp ne <2 x i32> [[veci2]], zeroinitializer
// CHECK: [[vecb8:%.*]] = shufflevector <2 x i1> [[vecb2]], <2 x i1> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
// CHECK: [[i8:%.*]] = bitcast <8 x i1> [[vecb8]] to i8
// CHECK: store i8 [[i8]], ptr [[b2]]
// CHECK: [[vecb8:%.*]] = zext <2 x i1> [[vecb2]] to <2 x i32>
// CHECK: store <2 x i32> [[vecb8]], ptr [[b2]]
void i2_to_b2() {
vector<int, 2> i2 = 8;
vector<bool, 2> b2 = i2;
}

// CHECK-LABEL: d4_to_b2
// CHECK: [[d4:%.*]] = alloca <4 x double>
// CHECK: [[b2:%.*]] = alloca i8
// CHECK: [[b2:%.*]] = alloca <2 x i32>
// CHECK: store <4 x double> splat (double 9.000000e+00), ptr [[d4]]
// CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]]
// CHECK: [[vecb4:%.*]] = fcmp reassoc nnan ninf nsz arcp afn une <4 x double> [[vecd4]], zeroinitializer
// CHECK: [[vecd2:%.*]] = shufflevector <4 x i1> [[vecb4]], <4 x i1> poison, <2 x i32> <i32 0, i32 1>
// CHECK: [[vecb8:%.*]] = shufflevector <2 x i1> [[vecd2]], <2 x i1> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
// CHECK: [[i8:%.*]] = bitcast <8 x i1> [[vecb8]] to i8
// CHECK: store i8 [[i8]], ptr [[b2]]
// CHECK: [[vecb8:%.*]] = zext <2 x i1> [[vecd2]] to <2 x i32>
// CHECK: store <2 x i32> [[vecb8]], ptr [[b2]]
void d4_to_b2() {
vector<double,4> d4 = 9.0;
vector<bool, 2> b2 = d4;
Expand Down
52 changes: 52 additions & 0 deletions clang/test/CodeGenHLSL/BoolVector.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s

// CHECK: %struct.S = type { <2 x i32>, float }
// CHECK: [[ConstS:@.*]] = private unnamed_addr constant %struct.S { <2 x i32> splat (i32 1), float 1.000000e+00 }, align 8
struct S {
bool2 bv;
float f;
};

// CHECK-LABEL: define noundef i1 {{.*}}fn1{{.*}}
// CHECK: [[B:%.*]] = alloca <2 x i32>, align 8
// CHECK-NEXT: store <2 x i32> splat (i32 1), ptr [[B]], align 8
// CHECK-NEXT: [[BoolVec:%.*]] = load <2 x i32>, ptr [[B]], align 8
// CHECK-NEXT: [[L:%.*]] = trunc <2 x i32> [[BoolVec:%.*]] to <2 x i1>
// CHECK-NEXT: [[VecExt:%.*]] = extractelement <2 x i1> [[L]], i32 0
// CHECK-NEXT: ret i1 [[VecExt]]
bool fn1() {
bool2 B = {true,true};
return B[0];
}

// CHECK-LABEL: define noundef <2 x i1> {{.*}}fn2{{.*}}
// CHECK: [[VAddr:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[A:%.*]] = alloca <2 x i32>, align 8
// CHECK-NEXT: [[StoreV:%.*]] = zext i1 {{.*}} to i32
// CHECK-NEXT: store i32 [[StoreV]], ptr [[VAddr]], align 4
// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[VAddr]], align 4
// CHECK-NEXT: [[LoadV:%.*]] = trunc i32 [[L]] to i1
// CHECK-NEXT: [[Vec:%.*]] = insertelement <2 x i1> poison, i1 [[LoadV]], i32 0
// CHECK-NEXT: [[Vec1:%.*]] = insertelement <2 x i1> [[Vec]], i1 true, i32 1
// CHECK-NEXT: [[Z:%.*]] = zext <2 x i1> [[Vec1]] to <2 x i32>
// CHECK-NEXT: store <2 x i32> [[Z]], ptr [[A]], align 8
// CHECK-NEXT: [[LoadBV:%.*]] = load <2 x i32>, ptr [[A]], align 8
// CHECK-NEXT: [[LoadV2:%.*]] = trunc <2 x i32> [[LoadBV]] to <2 x i1>
// CHECK-NEXT: ret <2 x i1> [[LoadV2]]
bool2 fn2(bool V) {
bool2 A = {V,true};
return A;
}

// CHECK-LABEL: define noundef i1 {{.*}}fn3{{.*}}
// CHECK: [[s:%.*]] = alloca %struct.S, align 8
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[s]], ptr align 8 [[ConstS]], i32 16, i1 false)
// CHECK-NEXT: [[BV:%.*]] = getelementptr inbounds nuw %struct.S, ptr [[s]], i32 0, i32 0
// CHECK-NEXT: [[LBV:%.*]] = load <2 x i32>, ptr [[BV]], align 8
// CHECK-NEXT: [[LV:%.*]] = trunc <2 x i32> [[LBV]] to <2 x i1>
// CHECK-NEXT: [[VX:%.*]] = extractelement <2 x i1> [[LV]], i32 0
// CHECK-NEXT: ret i1 [[VX]]
bool fn3() {
S s = {{true,true}, 1.0};
return s.bv[0];
}
79 changes: 77 additions & 2 deletions clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,23 @@ int2 ToTwoInts(int V){
}

// CHECK-LABEL: ToFourFloats
// [[splat:%.*]] = insertelement <1 x float> poison, float {{.*}}, i64 0
// [[vec4:%.*]] = shufflevector <1 x float> [[splat]], <1 x float> poison, <4 x i32> zeroinitializer
// CHECK: [[splat:%.*]] = insertelement <1 x float> poison, float {{.*}}, i64 0
// CHECK: [[vec4:%.*]] = shufflevector <1 x float> [[splat]], <1 x float> poison, <4 x i32> zeroinitializer
// ret <4 x float> [[vec4]]
float4 ToFourFloats(float V){
return V.rrrr;
}

// CHECK-LABEL: ToFourBools
// CHECK: {{%.*}} = zext i1 {{.*}} to i32
// CHECK: [[splat:%.*]] = insertelement <1 x i32> poison, i32 {{.*}}, i64 0
// CHECK-NEXT: [[vec4:%.*]] = shufflevector <1 x i32> [[splat]], <1 x i32> poison, <4 x i32> zeroinitializer
// CHECK-NEXT: [[vec2Ret:%.*]] = trunc <4 x i32> [[vec4]] to <4 x i1>
// CHECK-NEXT: ret <4 x i1> [[vec2Ret]]
bool4 ToFourBools(bool V) {
return V.rrrr;
}

// CHECK-LABEL: FillOne
// CHECK: [[vec1Ptr:%.*]] = alloca <1 x i32>, align 4
// CHECK: store <1 x i32> splat (i32 1), ptr [[vec1Ptr]], align 4
Expand Down Expand Up @@ -93,6 +103,17 @@ vector<float, 1> FillOneHalfFloat(){
return .5f.r;
}

// CHECK-LABEL: FillTrue
// CHECK: [[Tmp:%.*]] = alloca <1 x i32>, align 1
// CHECK-NEXT: store <1 x i1> splat (i1 true), ptr [[Tmp]], align 1
// CHECK-NEXT: [[Vec1:%.*]] = load <1 x i32>, ptr [[Tmp]], align 1
// CHECK-NEXT: [[Vec2:%.*]] = shufflevector <1 x i32> [[Vec1]], <1 x i32> poison, <2 x i32> zeroinitializer
// CHECK-NEXT: [[Vec2Ret:%.*]] = trunc <2 x i32> [[Vec2]] to <2 x i1>
// CHECK-NEXT: ret <2 x i1> [[Vec2Ret]]
bool2 FillTrue() {
return true.xx;
}

// The initial codegen for this case is correct but a bit odd. The IR optimizer
// cleans this up very nicely.

Expand All @@ -110,6 +131,24 @@ float2 HowManyFloats(float V) {
return V.rr.rr;
}

// CHECK-LABEL: HowManyBools
// CHECK: [[VAddr:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[Vec2Ptr:%.*]] = alloca <2 x i32>, align 1
// CHECK-NEXT: [[Tmp:%.*]] = zext i1 {{.*}} to i32
// CHECK-NEXT: store i32 [[Tmp]], ptr [[VAddr]], align 4
// CHECK-NEXT: [[VVal:%.*]] = load i32, ptr [[VAddr]], align 4
// CHECK-NEXT: [[Splat:%.*]] = insertelement <1 x i32> poison, i32 [[VVal]], i64 0
// CHECK-NEXT: [[Vec2:%.*]] = shufflevector <1 x i32> [[Splat]], <1 x i32> poison, <2 x i32> zeroinitializer
// CHECK-NEXT: [[Trunc:%.*]] = trunc <2 x i32> [[Vec2]] to <2 x i1>
// CHECK-NEXT: store <2 x i1> [[Trunc]], ptr [[Vec2Ptr]], align 1
// CHECK-NEXT: [[V2:%.*]] = load <2 x i32>, ptr [[Vec2Ptr]], align 1
// CHECK-NEXT: [[V3:%.*]] = shufflevector <2 x i32> [[V2]], <2 x i32> poison, <2 x i32> zeroinitializer
// CHECK-NEXT: [[LV1:%.*]] = trunc <2 x i32> [[V3]] to <2 x i1>
// CHECK-NEXT: ret <2 x i1> [[LV1]]
bool2 HowManyBools(bool V) {
return V.rr.rr;
}

// This codegen is gnarly because `1.l` is a double, so this creates double
// vectors that need to be truncated down to floats. The optimizer cleans this
// up nicely too.
Expand Down Expand Up @@ -166,3 +205,39 @@ int AssignInt(int V){
X.x = V.x + V.x;
return X;
}

// CHECK-LABEL: AssignBool
// CHECK: [[VAddr:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[XAddr:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[Zext:%.*]] = zext i1 %V to i32
// CHECK-NEXT: store i32 [[Zext]], ptr [[VAddr]], align 4
// CHECK-NEXT: [[X:%.*]] = load i32, ptr [[VAddr]], align 4
// CHECK-NEXT: [[Splat:%.*]] = insertelement <1 x i32> poison, i32 [[X]], i64 0
// CHECK-NEXT: [[Y:%.*]] = extractelement <1 x i32> [[Splat]], i32 0
// CHECK-NEXT: [[Z:%.*]] = trunc i32 [[Y]] to i1
// CHECK-NEXT: [[A:%.*]] = zext i1 [[Z]] to i32
// CHECK-NEXT: store i32 [[A]], ptr [[XAddr]], align 4
// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[VAddr]], align 4
// CHECK-NEXT: [[Splat2:%.*]] = insertelement <1 x i32> poison, i32 [[B]], i64 0
// CHECK-NEXT: [[C:%.*]] = extractelement <1 x i32> [[Splat2]], i32 0
// CHECK-NEXT: [[D:%.*]] = trunc i32 [[C]] to i1
// CHECK-NEXT: br i1 [[D]], label %lor.end, label %lor.rhs

// CHECK: lor.rhs:
// CHECK-NEXT: [[E:%.*]] = load i32, ptr [[VAddr]], align 4
// CHECK-NEXT: [[Splat3:%.*]] = insertelement <1 x i32> poison, i32 [[E]], i64 0
// CHECK-NEXT: [[F:%.*]] = extractelement <1 x i32> [[Splat3]], i32 0
// CHECK-NEXT: [[G:%.*]] = trunc i32 [[F]] to i1
// CHECK-NEXT: br label %lor.end

// CHECK: lor.end:
// CHECK-NEXT: [[H:%.*]] = phi i1 [ true, %entry ], [ [[G]], %lor.rhs ]
// CHECK-NEXT: store i1 [[H]], ptr [[XAddr]], align 4
// CHECK-NEXT: [[I:%.*]] = load i32, ptr [[XAddr]], align 4
// CHECK-NEXT: [[LoadV:%.*]] = trunc i32 [[I]] to i1
// CHECK-NEXT: ret i1 [[LoadV]]
bool AssignBool(bool V) {
bool X = V.x;
X.x = V.x || V.x;
return X;
}