Skip to content

Commit 34ae69d

Browse files
committed
zext at top of function instead of in each case
update tests + new test
1 parent 9b93c9a commit 34ae69d

File tree

2 files changed

+31
-16
lines changed

2 files changed

+31
-16
lines changed

clang/lib/CodeGen/CGExpr.cpp

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2694,14 +2694,20 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst,
26942694

26952695
void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
26962696
LValue Dst) {
2697+
llvm::Value *SrcVal = Src.getScalarVal();
2698+
Address DstAddr = Dst.getExtVectorAddress();
2699+
if (DstAddr.getElementType()->getScalarSizeInBits() >
2700+
SrcVal->getType()->getScalarSizeInBits())
2701+
SrcVal = Builder.CreateZExt(
2702+
SrcVal, convertTypeForLoadStore(Dst.getType(), SrcVal->getType()));
2703+
26972704
// HLSL allows storing to scalar values through ExtVector component LValues.
26982705
// To support this we need to handle the case where the destination address is
26992706
// a scalar.
2700-
Address DstAddr = Dst.getExtVectorAddress();
27012707
if (!DstAddr.getElementType()->isVectorTy()) {
27022708
assert(!Dst.getType()->isVectorType() &&
27032709
"this should only occur for non-vector l-values");
2704-
Builder.CreateStore(Src.getScalarVal(), DstAddr, Dst.isVolatileQualified());
2710+
Builder.CreateStore(SrcVal, DstAddr, Dst.isVolatileQualified());
27052711
return;
27062712
}
27072713

@@ -2722,11 +2728,6 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
27222728
for (unsigned i = 0; i != NumSrcElts; ++i)
27232729
Mask[getAccessedFieldNo(i, Elts)] = i;
27242730

2725-
llvm::Value *SrcVal = Src.getScalarVal();
2726-
if (VecTy->getScalarSizeInBits() >
2727-
SrcVal->getType()->getScalarSizeInBits())
2728-
SrcVal = Builder.CreateZExt(SrcVal, VecTy);
2729-
27302731
Vec = Builder.CreateShuffleVector(SrcVal, Mask);
27312732
} else if (NumDstElts > NumSrcElts) {
27322733
// Extended the source vector to the same length and then shuffle it
@@ -2737,8 +2738,7 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
27372738
for (unsigned i = 0; i != NumSrcElts; ++i)
27382739
ExtMask.push_back(i);
27392740
ExtMask.resize(NumDstElts, -1);
2740-
llvm::Value *ExtSrcVal =
2741-
Builder.CreateShuffleVector(Src.getScalarVal(), ExtMask);
2741+
llvm::Value *ExtSrcVal = Builder.CreateShuffleVector(SrcVal, ExtMask);
27422742
// build identity
27432743
SmallVector<int, 4> Mask;
27442744
for (unsigned i = 0; i != NumDstElts; ++i)
@@ -2764,10 +2764,6 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
27642764
unsigned InIdx = getAccessedFieldNo(0, Elts);
27652765
llvm::Value *Elt = llvm::ConstantInt::get(SizeTy, InIdx);
27662766

2767-
llvm::Value *SrcVal = Src.getScalarVal();
2768-
if (VecTy->getScalarSizeInBits() > SrcVal->getType()->getScalarSizeInBits())
2769-
SrcVal = Builder.CreateZExt(SrcVal, VecTy->getScalarType());
2770-
27712767
Vec = Builder.CreateInsertElement(Vec, SrcVal, Elt);
27722768
}
27732769

clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,8 @@ int AssignInt(int V){
233233

234234
// CHECK: lor.end:
235235
// CHECK-NEXT: [[H:%.*]] = phi i1 [ true, %entry ], [ [[G]], %lor.rhs ]
236-
// CHECK-NEXT: store i1 [[H]], ptr [[XAddr]], align 4
236+
// CHECK-NEXT: [[J:%.*]] = zext i1 %9 to i32
237+
// CHECK-NEXT: store i32 [[J]], ptr [[XAddr]], align 4
237238
// CHECK-NEXT: [[I:%.*]] = load i32, ptr [[XAddr]], align 4
238239
// CHECK-NEXT: [[LoadV:%.*]] = trunc i32 [[I]] to i1
239240
// CHECK-NEXT: ret i1 [[LoadV]]
@@ -257,8 +258,8 @@ bool AssignBool(bool V) {
257258
// CHECK-NEXT: store <2 x i32> [[A]], ptr [[X]], align 8
258259
// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[VAddr]], align 4
259260
// CHECK-NEXT: [[LV1:%.*]] = trunc i32 [[B]] to i1
260-
// CHECK-NEXT: [[C:%.*]] = load <2 x i32>, ptr [[X]], align 8
261261
// CHECK-NEXT: [[D:%.*]] = zext i1 [[LV1]] to i32
262+
// CHECK-NEXT: [[C:%.*]] = load <2 x i32>, ptr [[X]], align 8
262263
// CHECK-NEXT: [[E:%.*]] = insertelement <2 x i32> [[C]], i32 [[D]], i32 1
263264
// CHECK-NEXT: store <2 x i32> [[E]], ptr [[X]], align 8
264265
// CHECK-NEXT: ret void
@@ -275,8 +276,8 @@ void AssignBool2(bool V) {
275276
// CHECK-NEXT: store <2 x i32> splat (i32 1), ptr [[X]], align 8
276277
// CHECK-NEXT: [[Z:%.*]] = load <2 x i32>, ptr [[VAddr]], align 8
277278
// CHECK-NEXT: [[LV:%.*]] = trunc <2 x i32> [[Z]] to <2 x i1>
278-
// CHECK-NEXT: [[A:%.*]] = load <2 x i32>, ptr [[X]], align 8
279279
// CHECK-NEXT: [[B:%.*]] = zext <2 x i1> [[LV]] to <2 x i32>
280+
// CHECK-NEXT: [[A:%.*]] = load <2 x i32>, ptr [[X]], align 8
280281
// CHECK-NEXT: [[C:%.*]] = shufflevector <2 x i32> [[B]], <2 x i32> poison, <2 x i32> <i32 0, i32 1>
281282
// CHECK-NEXT: store <2 x i32> [[C]], ptr [[X]], align 8
282283
// CHECK-NEXT: ret void
@@ -302,3 +303,21 @@ bool2 AccessBools() {
302303
bool4 X = true.xxxx;
303304
return X.zw;
304305
}
306+
307+
// CHECK-LABEL: define void {{.*}}BoolSizeMismatch{{.*}}
308+
// CHECK: [[B:%.*]] = alloca <4 x i32>, align 16
309+
// CHECK-NEXT: [[Tmp:%.*]] = alloca <1 x i32>, align 4
310+
// CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[B]], align 16
311+
// CHECK-NEXT: store <1 x i32> zeroinitializer, ptr [[Tmp]], align 4
312+
// CHECK-NEXT: [[L0:%.*]] = load <1 x i32>, ptr [[Tmp]], align 4
313+
// CHECK-NEXT: [[L1:%.*]] = shufflevector <1 x i32> [[L0]], <1 x i32> poison, <3 x i32> zeroinitializer
314+
// CHECK-NEXT: [[TruncV:%.*]] = trunc <3 x i32> [[L1]] to <3 x i1>
315+
// CHECK-NEXT: [[L2:%.*]] = zext <3 x i1> [[TruncV]] to <3 x i32>
316+
// CHECK-NEXT: [[L3:%.*]] = load <4 x i32>, ptr [[B]], align 16
317+
// CHECK-NEXT: [[L4:%.*]] = shufflevector <3 x i32> [[L2]], <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
318+
// CHECK-NEXT: [[L5:%.*]] = shufflevector <4 x i32> [[L3]], <4 x i32> [[L4]], <4 x i32> <i32 4, i32 5, i32 6, i32 3>
319+
// CHECK-NEXT: store <4 x i32> [[L5]], ptr [[B]], align 16
320+
void BoolSizeMismatch() {
321+
bool4 B = {true,true,true,true};
322+
B.xyz = false.xxx;
323+
}

0 commit comments

Comments
 (0)