Skip to content

Commit f4819b8

Browse files
committed
Self Review.
1 parent c5650ed commit f4819b8

File tree

9 files changed

+100
-91
lines changed

9 files changed

+100
-91
lines changed

clang/lib/CodeGen/CGExpr.cpp

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -6360,30 +6360,6 @@ LValue CodeGenFunction::EmitPseudoObjectLValue(const PseudoObjectExpr *E) {
63606360
return emitPseudoObjectExpr(*this, E, true, AggValueSlot::ignored()).LV;
63616361
}
63626362

6363-
llvm::Value *
6364-
CodeGenFunction::PerformLoad(std::pair<Address, llvm::Value *> &GEP) {
6365-
Address GEPAddress = GEP.first;
6366-
llvm::Value *Idx = GEP.second;
6367-
llvm::Value *V = Builder.CreateLoad(GEPAddress, "load");
6368-
if (Idx) { // loading from a vector so perform an extract as well
6369-
return Builder.CreateExtractElement(V, Idx, "vec.load");
6370-
}
6371-
return V;
6372-
}
6373-
6374-
llvm::Value *
6375-
CodeGenFunction::PerformStore(std::pair<Address, llvm::Value *> &GEP,
6376-
llvm::Value *Val) {
6377-
Address GEPAddress = GEP.first;
6378-
llvm::Value *Idx = GEP.second;
6379-
if (Idx) {
6380-
llvm::Value *V = Builder.CreateLoad(GEPAddress, "load.for.insert");
6381-
return Builder.CreateInsertElement(V, Val, Idx);
6382-
} else {
6383-
return Builder.CreateStore(Val, GEPAddress);
6384-
}
6385-
}
6386-
63876363
void CodeGenFunction::FlattenAccessAndType(
63886364
Address Val, QualType SrcTy, SmallVector<llvm::Value *, 4> &IdxList,
63896365
SmallVector<std::pair<Address, llvm::Value *>, 16> &GEPList,

clang/lib/CodeGen/CGExprAgg.cpp

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,9 @@ static void EmitHLSLScalarFlatCast(CodeGenFunction &CGF, Address DestVal,
498498
// Flatten our destination
499499
SmallVector<QualType> DestTypes; // Flattened type
500500
SmallVector<llvm::Value *, 4> IdxList;
501+
IdxList.push_back(
502+
llvm::ConstantInt::get(llvm::IntegerType::get(CGF.getLLVMContext(), 32),
503+
0)); // because an Address is a pointer
501504
SmallVector<std::pair<Address, llvm::Value *>, 16> StoreGEPList;
502505
// ^^ Flattened accesses to DestVal we want to store into
503506
CGF.FlattenAccessAndType(DestVal, DestTy, IdxList, StoreGEPList, DestTypes);
@@ -513,7 +516,15 @@ static void EmitHLSLScalarFlatCast(CodeGenFunction &CGF, Address DestVal,
513516
CGF.Builder.CreateExtractElement(SrcVal, i, "vec.load");
514517
llvm::Value *Cast =
515518
CGF.EmitScalarConversion(Load, SrcTy, DestTypes[i], Loc);
516-
CGF.PerformStore(StoreGEPList[i], Cast);
519+
520+
// store back
521+
llvm::Value *Idx = StoreGEPList[i].second;
522+
if (Idx) {
523+
llvm::Value *V =
524+
CGF.Builder.CreateLoad(StoreGEPList[i].first, "load.for.insert");
525+
Cast = CGF.Builder.CreateInsertElement(V, Cast, Idx);
526+
}
527+
CGF.Builder.CreateStore(Cast, StoreGEPList[i].first);
517528
}
518529
return;
519530
}
@@ -527,6 +538,9 @@ static void EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address DestVal,
527538
// Flatten our destination
528539
SmallVector<QualType> DestTypes; // Flattened type
529540
SmallVector<llvm::Value *, 4> IdxList;
541+
IdxList.push_back(
542+
llvm::ConstantInt::get(llvm::IntegerType::get(CGF.getLLVMContext(), 32),
543+
0)); // Because an Address is a pointer
530544
SmallVector<std::pair<Address, llvm::Value *>, 16> StoreGEPList;
531545
// ^^ Flattened accesses to DestVal we want to store into
532546
CGF.FlattenAccessAndType(DestVal, DestTy, IdxList, StoreGEPList, DestTypes);
@@ -535,6 +549,9 @@ static void EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address DestVal,
535549
SmallVector<std::pair<Address, llvm::Value *>, 16> LoadGEPList;
536550
// ^^ Flattened accesses to SrcVal we want to load from
537551
IdxList.clear();
552+
IdxList.push_back(
553+
llvm::ConstantInt::get(llvm::IntegerType::get(CGF.getLLVMContext(), 32),
554+
0)); // Because an Address is a pointer
538555
CGF.FlattenAccessAndType(SrcVal, SrcTy, IdxList, LoadGEPList, SrcTypes);
539556

540557
assert(StoreGEPList.size() <= LoadGEPList.size() &&
@@ -543,10 +560,21 @@ static void EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address DestVal,
543560
// apply casts to what we load from LoadGEPList
544561
// and store result in Dest
545562
for (unsigned i = 0; i < StoreGEPList.size(); i++) {
546-
llvm::Value *Load = CGF.PerformLoad(LoadGEPList[i]);
563+
llvm::Value *Idx = LoadGEPList[i].second;
564+
llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[i].first, "load");
565+
Load =
566+
Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract") : Load;
547567
llvm::Value *Cast =
548568
CGF.EmitScalarConversion(Load, SrcTypes[i], DestTypes[i], Loc);
549-
CGF.PerformStore(StoreGEPList[i], Cast);
569+
570+
// store back
571+
Idx = StoreGEPList[i].second;
572+
if (Idx) {
573+
llvm::Value *V =
574+
CGF.Builder.CreateLoad(StoreGEPList[i].first, "load.for.insert");
575+
Cast = CGF.Builder.CreateInsertElement(V, Cast, Idx);
576+
}
577+
CGF.Builder.CreateStore(Cast, StoreGEPList[i].first);
550578
}
551579
}
552580

clang/lib/CodeGen/CGExprScalar.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2267,6 +2267,9 @@ static Value *EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address RHSVal,
22672267
QualType RHSTy, QualType LHSTy,
22682268
SourceLocation Loc) {
22692269
SmallVector<llvm::Value *, 4> IdxList;
2270+
IdxList.push_back(
2271+
llvm::ConstantInt::get(llvm::IntegerType::get(CGF.getLLVMContext(), 32),
2272+
0)); // because an Address is a pointer
22702273
SmallVector<std::pair<Address, llvm::Value *>, 16> LoadGEPList;
22712274
SmallVector<QualType> SrcTypes; // Flattened type
22722275
CGF.FlattenAccessAndType(RHSVal, RHSTy, IdxList, LoadGEPList, SrcTypes);
@@ -2277,7 +2280,10 @@ static Value *EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address RHSVal,
22772280
CGF.Builder.CreateLoad(CGF.CreateIRTemp(LHSTy, "flatcast.tmp"));
22782281
// write to V.
22792282
for (unsigned i = 0; i < VecTy->getNumElements(); i++) {
2280-
llvm::Value *Load = CGF.PerformLoad(LoadGEPList[i]);
2283+
llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[i].first, "load");
2284+
llvm::Value *Idx = LoadGEPList[i].second;
2285+
Load = Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract")
2286+
: Load;
22812287
llvm::Value *Cast = CGF.EmitScalarConversion(
22822288
Load, SrcTypes[i], VecTy->getElementType(), Loc);
22832289
V = CGF.Builder.CreateInsertElement(V, Cast, i);
@@ -2288,8 +2294,11 @@ static Value *EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address RHSVal,
22882294
assert(LHSTy->isBuiltinType() &&
22892295
"Destination type must be a vector or builtin type.");
22902296
// TODO add asserts about things being long enough
2291-
return CGF.EmitScalarConversion(CGF.PerformLoad(LoadGEPList[0]), LHSTy,
2292-
SrcTypes[0], Loc);
2297+
llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[0].first, "load");
2298+
llvm::Value *Idx = LoadGEPList[0].second;
2299+
Load =
2300+
Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract") : Load;
2301+
return CGF.EmitScalarConversion(Load, LHSTy, SrcTypes[0], Loc);
22932302
}
22942303

22952304
// VisitCastExpr - Emit code for an explicit or implicit cast. Implicit casts

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4359,9 +4359,6 @@ class CodeGenFunction : public CodeGenTypeCache {
43594359
AggValueSlot slot = AggValueSlot::ignored());
43604360
LValue EmitPseudoObjectLValue(const PseudoObjectExpr *e);
43614361

4362-
llvm::Value *PerformLoad(std::pair<Address, llvm::Value *> &GEP);
4363-
llvm::Value *PerformStore(std::pair<Address, llvm::Value *> &GEP,
4364-
llvm::Value *Val);
43654362
void FlattenAccessAndType(
43664363
Address Val, QualType SrcTy, SmallVector<llvm::Value *, 4> &IdxList,
43674364
SmallVector<std::pair<Address, llvm::Value *>, 16> &GEPList,

clang/lib/Sema/SemaCast.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2772,10 +2772,8 @@ void CastOperation::CheckCXXCStyleCast(bool FunctionalStyle,
27722772
CheckedConversionKind CCK = FunctionalStyle
27732773
? CheckedConversionKind::FunctionalCast
27742774
: CheckedConversionKind::CStyleCast;
2775-
// todo what else should i be doing lvalue to rvalue cast for?
2776-
// why dont they do it for records below?
27772775
// This case should not trigger on regular vector splat
2778-
// Or vector cast or vector truncation.
2776+
// vector cast, vector truncation, or special hlsl splat cases
27792777
QualType SrcTy = SrcExpr.get()->getType();
27802778
if (Self.getLangOpts().HLSL &&
27812779
Self.HLSL().CanPerformAggregateCast(SrcExpr.get(), DestType)) {

clang/lib/Sema/SemaHLSL.cpp

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2412,34 +2412,26 @@ bool SemaHLSL::CheckCompatibleParameterABI(FunctionDecl *New,
24122412
return HadError;
24132413
}
24142414

2415-
// Follows PerformScalarCast
2415+
// Generally follows PerformScalarCast, with cases reordered for
2416+
// clarity of what types are supported
24162417
bool SemaHLSL::CanPerformScalarCast(QualType SrcTy, QualType DestTy) {
24172418

24182419
if (SemaRef.getASTContext().hasSameUnqualifiedType(SrcTy, DestTy))
24192420
return true;
24202421

24212422
switch (Type::ScalarTypeKind SrcKind = SrcTy->getScalarTypeKind()) {
2422-
case Type::STK_MemberPointer:
2423-
case Type::STK_CPointer:
2424-
case Type::STK_BlockPointer:
2425-
case Type::STK_ObjCObjectPointer:
2426-
llvm_unreachable("HLSL doesn't support pointers.");
2427-
2428-
case Type::STK_FixedPoint:
2429-
llvm_unreachable("HLSL doesn't support fixed point types.");
2430-
24312423
case Type::STK_Bool: // casting from bool is like casting from an integer
24322424
case Type::STK_Integral:
24332425
switch (DestTy->getScalarTypeKind()) {
2426+
case Type::STK_Bool:
2427+
case Type::STK_Integral:
2428+
case Type::STK_Floating:
2429+
return true;
24342430
case Type::STK_CPointer:
24352431
case Type::STK_ObjCObjectPointer:
24362432
case Type::STK_BlockPointer:
24372433
case Type::STK_MemberPointer:
24382434
llvm_unreachable("HLSL doesn't support pointers.");
2439-
case Type::STK_Bool:
2440-
case Type::STK_Integral:
2441-
case Type::STK_Floating:
2442-
return true;
24432435
case Type::STK_IntegralComplex:
24442436
case Type::STK_FloatingComplex:
24452437
llvm_unreachable("HLSL doesn't support complex types.");
@@ -2467,6 +2459,15 @@ bool SemaHLSL::CanPerformScalarCast(QualType SrcTy, QualType DestTy) {
24672459
}
24682460
llvm_unreachable("Should have returned before this");
24692461

2462+
case Type::STK_MemberPointer:
2463+
case Type::STK_CPointer:
2464+
case Type::STK_BlockPointer:
2465+
case Type::STK_ObjCObjectPointer:
2466+
llvm_unreachable("HLSL doesn't support pointers.");
2467+
2468+
case Type::STK_FixedPoint:
2469+
llvm_unreachable("HLSL doesn't support fixed point types.");
2470+
24702471
case Type::STK_FloatingComplex:
24712472
case Type::STK_IntegralComplex:
24722473
llvm_unreachable("HLSL doesn't support complex types.");

clang/test/CodeGenHLSL/BasicFeatures/ArrayFlatCast.hlsl

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 8, i1 false)
99
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 {{.*}}, i32 4, i1 false)
1010
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false)
11-
// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[B]], i32 0
12-
// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0
13-
// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 1
11+
// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[B]], i32 0, i32 0
12+
// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 0
13+
// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 1
1414
// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G2]], align 4
1515
// CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4
1616
export void call1() {
@@ -27,8 +27,8 @@ export void call1() {
2727
// CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 4 [[A]], i8 0, i32 4, i1 false)
2828
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 {{.*}}, i32 4, i1 false)
2929
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 4, i1 false)
30-
// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x float], ptr [[B]], i32 0
31-
// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [1 x i32], ptr [[Tmp]], i32 0
30+
// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x float], ptr [[B]], i32 0, i32 0
31+
// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [1 x i32], ptr [[Tmp]], i32 0, i32 0
3232
// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G2]], align 4
3333
// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L]] to float
3434
// CHECK-NEXT: store float [[C]], ptr [[G1]], align 4
@@ -45,7 +45,7 @@ export void call2() {
4545
// CHECK-NEXT: store <1 x float> splat (float 0x3FF3333340000000), ptr [[A]], align 4
4646
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 {{.*}}, i32 4, i1 false)
4747
// CHECK-NEXT: [[C:%.*]] = load <1 x float>, ptr [[A]], align 4
48-
// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[B]], i32 0
48+
// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[B]], i32 0, i32 0
4949
// CHECK-NEXT: [[V:%.*]] = extractelement <1 x float> [[C]], i64 0
5050
// CHECK-NEXT: [[C:%.*]] = fptosi float [[V]] to i32
5151
// CHECK-NEXT: store i32 [[C]], ptr [[G1]], align 4
@@ -63,9 +63,9 @@ export void call3() {
6363
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[A]], ptr align 8 {{.*}}, i32 8, i1 false)
6464
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 {{.*}}, i32 8, i1 false)
6565
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[Tmp]], ptr align 8 [[A]], i32 8, i1 false)
66-
// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 0
67-
// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 1
68-
// CHECK-NEXT: [[VG:%.*]] = getelementptr inbounds [1 x <2 x float>], ptr [[Tmp]], i32 0
66+
// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 0, i32 0
67+
// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 0, i32 1
68+
// CHECK-NEXT: [[VG:%.*]] = getelementptr inbounds [1 x <2 x float>], ptr [[Tmp]], i32 0, i32 0
6969
// CHECK-NEXT: [[L:%.*]] = load <2 x float>, ptr [[VG]], align 8
7070
// CHECK-NEXT: [[VL:%.*]] = extractelement <2 x float> [[L]], i32 0
7171
// CHECK-NEXT: [[C:%.*]] = fptosi float [[VL]] to i32
@@ -88,10 +88,10 @@ export void call5() {
8888
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 8, i1 false)
8989
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 {{.*}}, i32 8, i1 false)
9090
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false)
91-
// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 0
92-
// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 1
93-
// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr [[Tmp]], i32 0, i32 0
94-
// CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr [[Tmp]], i32 1, i32 0
91+
// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 0, i32 0
92+
// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 0, i32 1
93+
// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr [[Tmp]], i32 0, i32 0, i32 0
94+
// CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr [[Tmp]], i32 0, i32 1, i32 0
9595
// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4
9696
// CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4
9797
// CHECK-NEXT: [[L4:%.*]] = load i32, ptr [[G4]], align 4
@@ -115,9 +115,9 @@ struct S {
115115
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[s]], ptr align 4 {{.*}}, i32 8, i1 false)
116116
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 4, i1 false)
117117
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[s]], i32 8, i1 false)
118-
// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[A]], i32 0
119-
// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0
120-
// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 1
118+
// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[A]], i32 0, i32 0
119+
// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0, i32 0
120+
// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0, i32 1
121121
// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G2]], align 4
122122
// CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4
123123
export void call7() {

0 commit comments

Comments
 (0)