Skip to content

Commit 7a0ccdd

Browse files
committed
addressing pr comments + more needed tests
1 parent 3d22ed0 commit 7a0ccdd

File tree

3 files changed

+139
-29
lines changed

3 files changed

+139
-29
lines changed

clang/lib/CodeGen/CGExpr.cpp

Lines changed: 35 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2064,7 +2064,8 @@ llvm::Value *CodeGenFunction::EmitToMemory(llvm::Value *Value, QualType Ty) {
20642064

20652065
if (Ty->isExtVectorBoolType()) {
20662066
llvm::Type *StoreTy = convertTypeForLoadStore(Ty, Value->getType());
2067-
if (getLangOpts().HLSL)
2067+
if (StoreTy->isVectorTy() && StoreTy->getScalarSizeInBits() >
2068+
Value->getType()->getScalarSizeInBits())
20682069
return Builder.CreateZExt(Value, StoreTy);
20692070

20702071
// Expand to the memory bit width.
@@ -2082,10 +2083,8 @@ llvm::Value *CodeGenFunction::EmitToMemory(llvm::Value *Value, QualType Ty) {
20822083
/// by convertTypeForLoadStore) to its primary IR type (as returned
20832084
/// by ConvertType).
20842085
llvm::Value *CodeGenFunction::EmitFromMemory(llvm::Value *Value, QualType Ty) {
2085-
if (Ty->isExtVectorBoolType()) {
2086+
if (Ty->isPackedVectorBoolType(getContext())) {
20862087
const auto *RawIntTy = Value->getType();
2087-
if (getLangOpts().HLSL)
2088-
return Builder.CreateTrunc(Value, ConvertType(Ty), "loadedv");
20892088

20902089
// Bitcast iP --> <P x i1>.
20912090
auto *PaddedVecTy = llvm::FixedVectorType::get(
@@ -2097,10 +2096,10 @@ llvm::Value *CodeGenFunction::EmitFromMemory(llvm::Value *Value, QualType Ty) {
20972096
return emitBoolVecConversion(V, ValNumElems, "extractvec");
20982097
}
20992098

2100-
if (hasBooleanRepresentation(Ty) || Ty->isBitIntType()) {
2101-
llvm::Type *ResTy = ConvertType(Ty);
2099+
llvm::Type *ResTy = ConvertType(Ty);
2100+
if (hasBooleanRepresentation(Ty) || Ty->isBitIntType() ||
2101+
Ty->isExtVectorBoolType())
21022102
return Builder.CreateTrunc(Value, ResTy, "loadedv");
2103-
}
21042103

21052104
return Value;
21062105
}
@@ -2353,8 +2352,10 @@ RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) {
23532352

23542353
llvm::Value *Element = Builder.CreateExtractElement(Vec, Elt);
23552354

2356-
if (getLangOpts().HLSL && LV.getType()->isBooleanType())
2357-
Element = Builder.CreateTrunc(Element, ConvertType(LV.getType()));
2355+
llvm::Type *LVTy = ConvertType(LV.getType());
2356+
if (Element->getType()->getPrimitiveSizeInBits() >
2357+
LVTy->getPrimitiveSizeInBits())
2358+
Element = Builder.CreateTrunc(Element, LVTy);
23582359

23592360
return RValue::get(Element);
23602361
}
@@ -2368,8 +2369,8 @@ RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) {
23682369

23692370
Vec = Builder.CreateShuffleVector(Vec, Mask);
23702371

2371-
if (getLangOpts().HLSL && LV.getType()->isExtVectorBoolType())
2372-
Vec = EmitFromMemory(Vec, LV.getType());
2372+
if (LV.getType()->isExtVectorBoolType())
2373+
Vec = Builder.CreateTrunc(Vec, ConvertType(LV.getType()), "truncv");
23732374

23742375
return RValue::get(Vec);
23752376
}
@@ -2424,11 +2425,12 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst,
24242425
// Read/modify/write the vector, inserting the new element.
24252426
llvm::Value *Vec = Builder.CreateLoad(Dst.getVectorAddress(),
24262427
Dst.isVolatileQualified());
2427-
llvm::Type *OldVecTy = Vec->getType();
2428-
if (getLangOpts().HLSL && Dst.getType()->isExtVectorBoolType())
2428+
llvm::Type *VecTy = Vec->getType();
2429+
llvm::Value *SrcVal = Src.getScalarVal();
24292430

2430-
Vec =
2431-
Builder.CreateTrunc(Vec, ConvertType(Dst.getType()), "truncboolv");
2431+
if (SrcVal->getType()->getPrimitiveSizeInBits() <
2432+
VecTy->getScalarSizeInBits())
2433+
SrcVal = Builder.CreateZExt(SrcVal, VecTy->getScalarType());
24322434

24332435
auto *IRStoreTy = dyn_cast<llvm::IntegerType>(Vec->getType());
24342436
if (IRStoreTy) {
@@ -2437,16 +2439,13 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst,
24372439
Vec = Builder.CreateBitCast(Vec, IRVecTy);
24382440
// iN --> <N x i1>.
24392441
}
2440-
Vec = Builder.CreateInsertElement(Vec, Src.getScalarVal(),
2441-
Dst.getVectorIdx(), "vecins");
2442+
Vec = Builder.CreateInsertElement(Vec, SrcVal, Dst.getVectorIdx(),
2443+
"vecins");
24422444
if (IRStoreTy) {
24432445
// <N x i1> --> <iN>.
24442446
Vec = Builder.CreateBitCast(Vec, IRStoreTy);
24452447
}
24462448

2447-
if (getLangOpts().HLSL && Dst.getType()->isExtVectorBoolType())
2448-
Vec = Builder.CreateZExt(Vec, OldVecTy);
2449-
24502449
Builder.CreateStore(Vec, Dst.getVectorAddress(),
24512450
Dst.isVolatileQualified());
24522451
return;
@@ -2643,14 +2642,13 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
26432642
// This access turns into a read/modify/write of the vector. Load the input
26442643
// value now.
26452644
llvm::Value *Vec = Builder.CreateLoad(DstAddr, Dst.isVolatileQualified());
2645+
llvm::Type *VecTy = Vec->getType();
26462646
const llvm::Constant *Elts = Dst.getExtVectorElts();
26472647

2648-
llvm::Value *SrcVal = Src.getScalarVal();
2649-
26502648
if (const VectorType *VTy = Dst.getType()->getAs<VectorType>()) {
26512649
unsigned NumSrcElts = VTy->getNumElements();
26522650
unsigned NumDstElts =
2653-
cast<llvm::FixedVectorType>(Vec->getType())->getNumElements();
2651+
cast<llvm::FixedVectorType>(VecTy)->getNumElements();
26542652
if (NumDstElts == NumSrcElts) {
26552653
// Use shuffle vector is the src and destination are the same number of
26562654
// elements and restore the vector mask since it is on the side it will be
@@ -2659,6 +2657,10 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
26592657
for (unsigned i = 0; i != NumSrcElts; ++i)
26602658
Mask[getAccessedFieldNo(i, Elts)] = i;
26612659

2660+
llvm::Value *SrcVal = Src.getScalarVal();
2661+
if (VecTy->getScalarSizeInBits() > SrcVal->getType()->getScalarSizeInBits())
2662+
SrcVal = Builder.CreateZExt(SrcVal, VecTy);
2663+
26622664
Vec = Builder.CreateShuffleVector(SrcVal, Mask);
26632665
} else if (NumDstElts > NumSrcElts) {
26642666
// Extended the source vector to the same length and then shuffle it
@@ -2669,7 +2671,7 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
26692671
for (unsigned i = 0; i != NumSrcElts; ++i)
26702672
ExtMask.push_back(i);
26712673
ExtMask.resize(NumDstElts, -1);
2672-
llvm::Value *ExtSrcVal = Builder.CreateShuffleVector(SrcVal, ExtMask);
2674+
llvm::Value *ExtSrcVal = Builder.CreateShuffleVector(Src.getScalarVal(), ExtMask);
26732675
// build identity
26742676
SmallVector<int, 4> Mask;
26752677
for (unsigned i = 0; i != NumDstElts; ++i)
@@ -2694,6 +2696,11 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
26942696
// be updating one element.
26952697
unsigned InIdx = getAccessedFieldNo(0, Elts);
26962698
llvm::Value *Elt = llvm::ConstantInt::get(SizeTy, InIdx);
2699+
2700+
llvm::Value *SrcVal = Src.getScalarVal();
2701+
if (VecTy->getScalarSizeInBits() > SrcVal->getType()->getScalarSizeInBits())
2702+
SrcVal = Builder.CreateZExt(SrcVal, VecTy->getScalarType());
2703+
26972704
Vec = Builder.CreateInsertElement(Vec, SrcVal, Elt);
26982705
}
26992706

@@ -4718,12 +4725,11 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) {
47184725

47194726
// Store the vector to memory (because LValue wants an address).
47204727
Address VecMem = CreateMemTemp(E->getBase()->getType());
4721-
// To be consistent need to zero extend an hlsl boolean vector to store it
4722-
// back to memory
4728+
// need to zero extend an hlsl boolean vector to store it back to memory
47234729
QualType Ty = E->getBase()->getType();
4724-
if (Ty->isExtVectorBoolType() && getLangOpts().HLSL)
4725-
Vec =
4726-
Builder.CreateZExt(Vec, convertTypeForLoadStore(Ty, Vec->getType()));
4730+
llvm::Type *LTy = convertTypeForLoadStore(Ty, Vec->getType());
4731+
if (LTy->getScalarSizeInBits() > Vec->getType()->getScalarSizeInBits())
4732+
Vec = Builder.CreateZExt(Vec, LTy);
47274733
Builder.CreateStore(Vec, VecMem);
47284734
Base = MakeAddrLValue(VecMem, Ty, AlignmentSource::Decl);
47294735
}

clang/test/CodeGenHLSL/BoolVector.hlsl

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,3 +65,47 @@ bool fn4() {
6565
bool2 Arr[2] = {{true,true}, {false,false}};
6666
return Arr[0][1];
6767
}
68+
69+
// CHECK-LABEL: define void {{.*}}fn5{{.*}}
70+
// CHECK: [[Arr:%.*]] = alloca <2 x i32>, align 8
71+
// CHECK-NEXT: store <2 x i32> splat (i32 1), ptr [[Arr]], align 8
72+
// CHECK-NEXT: [[L:%.*]] = load <2 x i32>, ptr [[Arr]], align 8
73+
// CHECK-NEXT: [[V:%.*]] = insertelement <2 x i32> [[L]], i32 0, i32 1
74+
// CHECK-NEXT: store <2 x i32> [[V]], ptr [[Arr]], align 8
75+
// CHECK-NEXT: ret void
76+
void fn5() {
77+
bool2 Arr = {true,true};
78+
Arr[1] = false;
79+
}
80+
81+
// CHECK-LABEL: define void {{.*}}fn6{{.*}}
82+
// CHECK: [[V:%.*]] = alloca i32, align 4
83+
// CHECK-NEXT: [[S:%.*]] = alloca %struct.S, align 8
84+
// CHECK-NEXT: store i32 0, ptr [[V]], align 4
85+
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[S]], ptr align 8 {{.*}}, i32 16, i1 false)
86+
// CHECK-NEXT: [[Y:%.*]] = load i32, ptr [[V]], align 4
87+
// CHECK-NEXT: [[LV:%.*]] = trunc i32 [[Y]] to i1
88+
// CHECK-NEXT: [[BV:%.*]] = getelementptr inbounds nuw %struct.S, ptr [[S]], i32 0, i32 0
89+
// CHECK-NEXT: [[X:%.*]] = load <2 x i32>, ptr [[BV]], align 8
90+
// CHECK-NEXT: [[Z:%.*]] = zext i1 [[LV]] to i32
91+
// CHECK-NEXT: [[VI:%.*]] = insertelement <2 x i32> [[X]], i32 [[Z]], i32 1
92+
// CHECK-NEXT: store <2 x i32> [[VI]], ptr [[BV]], align 8
93+
// CHECK-NEXT: ret void
94+
void fn6() {
95+
bool V = false;
96+
S s = {{true,true}, 1.0};
97+
s.bv[1] = V;
98+
}
99+
100+
// CHECK-LABEL: define void {{.*}}fn7{{.*}}
101+
// CHECK: [[Arr:%.*]] = alloca [2 x <2 x i32>], align 8
102+
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[Arr]], ptr align 8 {{.*}}, i32 16, i1 false)
103+
// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[Arr]], i32 0, i32 0
104+
// CHECK-NEXT: [[X:%.*]] = load <2 x i32>, ptr [[Idx]], align 8
105+
// CHECK-NEXT: [[VI:%.*]] = insertelement <2 x i32> [[X]], i32 0, i32 1
106+
// CHECK-NEXT: store <2 x i32> [[VI]], ptr [[Idx]], align 8
107+
// CHECK-NEXT: ret void
108+
void fn7() {
109+
bool2 Arr[2] = {{true,true}, {false,false}};
110+
Arr[0][1] = false;
111+
}

clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,3 +242,63 @@ bool AssignBool(bool V) {
242242
X.x = V.x || V.x;
243243
return X;
244244
}
245+
246+
// CHECK-LABEL: AssignBool2
247+
// CHECK: [[VAdddr:%.*]] = alloca i32, align 4
248+
// CHECK-NEXT: [[X:%.*]] = alloca <2 x i32>, align 8
249+
// CHECK-NEXT: [[Tmp:%.*]] = alloca <1 x i32>, align 4
250+
// CHECK-NEXT: [[SV:%.*]] = zext i1 %V to i32
251+
// CHECK-NEXT: store i32 [[SV]], ptr [[VAddr]], align 4
252+
// CHECK-NEXT: store <1 x i32> splat (i32 1), ptr [[Tmp]], align 4
253+
// CHECK-NEXT: [[Y:%.*]] = load <1 x i32>, ptr [[Tmp]], align 4
254+
// CHECK-NEXT: [[Z:%.*]] = shufflevector <1 x i32> [[Y]], <1 x i32> poison, <2 x i32> zeroinitializer
255+
// CHECK-NEXT: [[LV:%.*]] = trunc <2 x i32> [[Z]] to <2 x i1>
256+
// CHECK-NEXT: [[A:%.*]] = zext <2 x i1> [[LV]] to <2 x i32>
257+
// CHECK-NEXT: store <2 x i32> [[A]], ptr [[X]], align 8
258+
// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[VAddr]], align 4
259+
// CHECK-NEXT: [[LV1:%.*]] = trunc i32 [[B]] to i1
260+
// CHECK-NEXT: [[C:%.*]] = load <2 x i32>, ptr [[X]], align 8
261+
// CHECK-NEXT: [[D:%.*]] = zext i1 [[LV1]] to i32
262+
// CHECK-NEXT: [[E:%.*]] = insertelement <2 x i32> [[C]], i32 [[D]], i32 1
263+
// CHECK-NEXT: store <2 x i32> [[E]], ptr [[X]], align 8
264+
// CHECK-NEXT: ret void
265+
void AssignBool2(bool V) {
266+
bool2 X = true.xx;
267+
X.y = V;
268+
}
269+
270+
// CHECK-LABEL: AssignBool3
271+
// CHECK: [[VAddr:%.*]] = alloca <2 x i32>, align 8
272+
// CHECK-NEXT: [[X:%.*]] = alloca <2 x i32>, align 8
273+
// CHECK-NEXT: [[Y:%.*]] = zext <2 x i1> %V to <2 x i32>
274+
// CHECK-NEXT: store <2 x i32> [[Y]], ptr [[VAddr]], align 8
275+
// CHECK-NEXT: store <2 x i32> splat (i32 1), ptr [[X]], align 8
276+
// CHECK-NEXT: [[Z:%.*]] = load <2 x i32>, ptr [[VAddr]], align 8
277+
// CHECK-NEXT: [[LV:%.*]] = trunc <2 x i32> [[Z]] to <2 x i1>
278+
// CHECK-NEXT: [[A:%.*]] = load <2 x i32>, ptr [[X]], align 8
279+
// CHECK-NEXT: [[B:%.*]] = zext <2 x i1> [[LV]] to <2 x i32>
280+
// CHECK-NEXT: [[C:%.*]] = shufflevector <2 x i32> [[B]], <2 x i32> poison, <2 x i32> <i32 0, i32 1>
281+
// CHECK-NEXT: store <2 x i32> [[C]], ptr [[X]], align 8
282+
// CHECK-NEXT: ret void
283+
void AssignBool3(bool2 V) {
284+
bool2 X = {true,true};
285+
X.xy = V;
286+
}
287+
288+
// CHECK-LABEL: AccessBools
289+
// CHECK: [[X:%.*]] = alloca <4 x i32>, align 16
290+
// CHECK-NEXT: [[Tmp:%.*]] = alloca <1 x i32>, align 4
291+
// CHECK-NEXT: store <1 x i32> splat (i32 1), ptr [[Tmp]], align 4
292+
// CHECK-NEXT: [[Y:%.*]] = load <1 x i32>, ptr [[Tmp]], align 4
293+
// CHECK-NEXT: [[Z:%.*]] = shufflevector <1 x i32> [[Y]], <1 x i32> poison, <4 x i32> zeroinitializer
294+
// CHECK-NEXT: [[LV:%.*]] = trunc <4 x i32> [[Z]] to <4 x i1>
295+
// CHECK-NEXT: [[A:%.*]] = zext <4 x i1> [[LV]] to <4 x i32>
296+
// CHECK-NEXT: store <4 x i32> [[A]], ptr [[X]], align 16
297+
// CHECK-NEXT: [[B:%.*]] = load <4 x i32>, ptr [[X]], align 16
298+
// CHECK-NEXT: [[C:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
299+
// CHECK-NEXT: [[LV1:%.*]] = trunc <2 x i32> [[C]] to <2 x i1>
300+
// CHECK-NEXT: ret <2 x i1> [[LV1]]
301+
bool2 AccessBools() {
302+
bool4 X = true.xxxx;
303+
return X.zw;
304+
}

0 commit comments

Comments
 (0)