Self Review.

spall · spall · commit f4819b802120 · 2024-12-09T23:16:02.000Z
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
@@ -6360,30 +6360,6 @@ LValue CodeGenFunction::EmitPseudoObjectLValue(const PseudoObjectExpr *E) {
   return emitPseudoObjectExpr(*this, E, true, AggValueSlot::ignored()).LV;
 }
 
-llvm::Value *
-CodeGenFunction::PerformLoad(std::pair<Address, llvm::Value *> &GEP) {
-  Address GEPAddress = GEP.first;
-  llvm::Value *Idx = GEP.second;
-  llvm::Value *V = Builder.CreateLoad(GEPAddress, "load");
-  if (Idx) { // loading from a vector so perform an extract as well
-    return Builder.CreateExtractElement(V, Idx, "vec.load");
-  }
-  return V;
-}
-
-llvm::Value *
-CodeGenFunction::PerformStore(std::pair<Address, llvm::Value *> &GEP,
-                              llvm::Value *Val) {
-  Address GEPAddress = GEP.first;
-  llvm::Value *Idx = GEP.second;
-  if (Idx) {
-    llvm::Value *V = Builder.CreateLoad(GEPAddress, "load.for.insert");
-    return Builder.CreateInsertElement(V, Val, Idx);
-  } else {
-    return Builder.CreateStore(Val, GEPAddress);
-  }
-}
-
 void CodeGenFunction::FlattenAccessAndType(
     Address Val, QualType SrcTy, SmallVector<llvm::Value *, 4> &IdxList,
     SmallVector<std::pair<Address, llvm::Value *>, 16> &GEPList,
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -498,6 +498,9 @@ static void EmitHLSLScalarFlatCast(CodeGenFunction &CGF, Address DestVal,
   // Flatten our destination
   SmallVector<QualType> DestTypes; // Flattened type
   SmallVector<llvm::Value *, 4> IdxList;
+  IdxList.push_back(
+      llvm::ConstantInt::get(llvm::IntegerType::get(CGF.getLLVMContext(), 32),
+                             0)); // because an Address is a pointer
   SmallVector<std::pair<Address, llvm::Value *>, 16> StoreGEPList;
   // ^^ Flattened accesses to DestVal we want to store into
   CGF.FlattenAccessAndType(DestVal, DestTy, IdxList, StoreGEPList, DestTypes);
@@ -513,7 +516,15 @@ static void EmitHLSLScalarFlatCast(CodeGenFunction &CGF, Address DestVal,
           CGF.Builder.CreateExtractElement(SrcVal, i, "vec.load");
       llvm::Value *Cast =
           CGF.EmitScalarConversion(Load, SrcTy, DestTypes[i], Loc);
-      CGF.PerformStore(StoreGEPList[i], Cast);
+
+      // store back
+      llvm::Value *Idx = StoreGEPList[i].second;
+      if (Idx) {
+        llvm::Value *V =
+            CGF.Builder.CreateLoad(StoreGEPList[i].first, "load.for.insert");
+        Cast = CGF.Builder.CreateInsertElement(V, Cast, Idx);
+      }
+      CGF.Builder.CreateStore(Cast, StoreGEPList[i].first);
     }
     return;
   }
@@ -527,6 +538,9 @@ static void EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address DestVal,
   // Flatten our destination
   SmallVector<QualType> DestTypes; // Flattened type
   SmallVector<llvm::Value *, 4> IdxList;
+  IdxList.push_back(
+      llvm::ConstantInt::get(llvm::IntegerType::get(CGF.getLLVMContext(), 32),
+                             0)); // Because an Address is a pointer
   SmallVector<std::pair<Address, llvm::Value *>, 16> StoreGEPList;
   // ^^ Flattened accesses to DestVal we want to store into
   CGF.FlattenAccessAndType(DestVal, DestTy, IdxList, StoreGEPList, DestTypes);
@@ -535,6 +549,9 @@ static void EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address DestVal,
   SmallVector<std::pair<Address, llvm::Value *>, 16> LoadGEPList;
   // ^^ Flattened accesses to SrcVal we want to load from
   IdxList.clear();
+  IdxList.push_back(
+      llvm::ConstantInt::get(llvm::IntegerType::get(CGF.getLLVMContext(), 32),
+                             0)); // Because an Address is a pointer
   CGF.FlattenAccessAndType(SrcVal, SrcTy, IdxList, LoadGEPList, SrcTypes);
 
   assert(StoreGEPList.size() <= LoadGEPList.size() &&
@@ -543,10 +560,21 @@ static void EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address DestVal,
   // apply casts to what we load from LoadGEPList
   // and store result in Dest
   for (unsigned i = 0; i < StoreGEPList.size(); i++) {
-    llvm::Value *Load = CGF.PerformLoad(LoadGEPList[i]);
+    llvm::Value *Idx = LoadGEPList[i].second;
+    llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[i].first, "load");
+    Load =
+        Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract") : Load;
     llvm::Value *Cast =
         CGF.EmitScalarConversion(Load, SrcTypes[i], DestTypes[i], Loc);
-    CGF.PerformStore(StoreGEPList[i], Cast);
+
+    // store back
+    Idx = StoreGEPList[i].second;
+    if (Idx) {
+      llvm::Value *V =
+          CGF.Builder.CreateLoad(StoreGEPList[i].first, "load.for.insert");
+      Cast = CGF.Builder.CreateInsertElement(V, Cast, Idx);
+    }
+    CGF.Builder.CreateStore(Cast, StoreGEPList[i].first);
   }
 }
 
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2267,6 +2267,9 @@ static Value *EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address RHSVal,
                                         QualType RHSTy, QualType LHSTy,
                                         SourceLocation Loc) {
   SmallVector<llvm::Value *, 4> IdxList;
+  IdxList.push_back(
+      llvm::ConstantInt::get(llvm::IntegerType::get(CGF.getLLVMContext(), 32),
+                             0)); // because an Address is a pointer
   SmallVector<std::pair<Address, llvm::Value *>, 16> LoadGEPList;
   SmallVector<QualType> SrcTypes; // Flattened type
   CGF.FlattenAccessAndType(RHSVal, RHSTy, IdxList, LoadGEPList, SrcTypes);
@@ -2277,7 +2280,10 @@ static Value *EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address RHSVal,
         CGF.Builder.CreateLoad(CGF.CreateIRTemp(LHSTy, "flatcast.tmp"));
     // write to V.
     for (unsigned i = 0; i < VecTy->getNumElements(); i++) {
-      llvm::Value *Load = CGF.PerformLoad(LoadGEPList[i]);
+      llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[i].first, "load");
+      llvm::Value *Idx = LoadGEPList[i].second;
+      Load = Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract")
+                 : Load;
       llvm::Value *Cast = CGF.EmitScalarConversion(
           Load, SrcTypes[i], VecTy->getElementType(), Loc);
       V = CGF.Builder.CreateInsertElement(V, Cast, i);
@@ -2288,8 +2294,11 @@ static Value *EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address RHSVal,
   assert(LHSTy->isBuiltinType() &&
          "Destination type must be a vector or builtin type.");
   // TODO add asserts about things being long enough
-  return CGF.EmitScalarConversion(CGF.PerformLoad(LoadGEPList[0]), LHSTy,
-                                  SrcTypes[0], Loc);
+  llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[0].first, "load");
+  llvm::Value *Idx = LoadGEPList[0].second;
+  Load =
+      Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract") : Load;
+  return CGF.EmitScalarConversion(Load, LHSTy, SrcTypes[0], Loc);
 }
 
 // VisitCastExpr - Emit code for an explicit or implicit cast.  Implicit casts
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4359,9 +4359,6 @@ class CodeGenFunction : public CodeGenTypeCache {
                                 AggValueSlot slot = AggValueSlot::ignored());
   LValue EmitPseudoObjectLValue(const PseudoObjectExpr *e);
 
-  llvm::Value *PerformLoad(std::pair<Address, llvm::Value *> &GEP);
-  llvm::Value *PerformStore(std::pair<Address, llvm::Value *> &GEP,
-                            llvm::Value *Val);
   void FlattenAccessAndType(
       Address Val, QualType SrcTy, SmallVector<llvm::Value *, 4> &IdxList,
       SmallVector<std::pair<Address, llvm::Value *>, 16> &GEPList,
diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp
@@ -2772,10 +2772,8 @@ void CastOperation::CheckCXXCStyleCast(bool FunctionalStyle,
   CheckedConversionKind CCK = FunctionalStyle
                                   ? CheckedConversionKind::FunctionalCast
                                   : CheckedConversionKind::CStyleCast;
-  // todo what else should i be doing lvalue to rvalue cast for?
-  // why dont they do it for records below?
   // This case should not trigger on regular vector splat
-  // Or vector cast or vector truncation.
+  // vector cast, vector truncation, or special hlsl splat cases
   QualType SrcTy = SrcExpr.get()->getType();
   if (Self.getLangOpts().HLSL &&
       Self.HLSL().CanPerformAggregateCast(SrcExpr.get(), DestType)) {
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
@@ -2412,34 +2412,26 @@ bool SemaHLSL::CheckCompatibleParameterABI(FunctionDecl *New,
   return HadError;
 }
 
-// Follows PerformScalarCast
+// Generally follows PerformScalarCast, with cases reordered for
+// clarity of what types are supported
 bool SemaHLSL::CanPerformScalarCast(QualType SrcTy, QualType DestTy) {
 
   if (SemaRef.getASTContext().hasSameUnqualifiedType(SrcTy, DestTy))
     return true;
 
   switch (Type::ScalarTypeKind SrcKind = SrcTy->getScalarTypeKind()) {
-  case Type::STK_MemberPointer:
-  case Type::STK_CPointer:
-  case Type::STK_BlockPointer:
-  case Type::STK_ObjCObjectPointer:
-    llvm_unreachable("HLSL doesn't support pointers.");
-
-  case Type::STK_FixedPoint:
-    llvm_unreachable("HLSL doesn't support fixed point types.");
-
   case Type::STK_Bool: // casting from bool is like casting from an integer
   case Type::STK_Integral:
     switch (DestTy->getScalarTypeKind()) {
+    case Type::STK_Bool:
+    case Type::STK_Integral:
+    case Type::STK_Floating:
+      return true;
     case Type::STK_CPointer:
     case Type::STK_ObjCObjectPointer:
     case Type::STK_BlockPointer:
     case Type::STK_MemberPointer:
       llvm_unreachable("HLSL doesn't support pointers.");
-    case Type::STK_Bool:
-    case Type::STK_Integral:
-    case Type::STK_Floating:
-      return true;
     case Type::STK_IntegralComplex:
     case Type::STK_FloatingComplex:
       llvm_unreachable("HLSL doesn't support complex types.");
@@ -2467,6 +2459,15 @@ bool SemaHLSL::CanPerformScalarCast(QualType SrcTy, QualType DestTy) {
     }
     llvm_unreachable("Should have returned before this");
 
+  case Type::STK_MemberPointer:
+  case Type::STK_CPointer:
+  case Type::STK_BlockPointer:
+  case Type::STK_ObjCObjectPointer:
+    llvm_unreachable("HLSL doesn't support pointers.");
+
+  case Type::STK_FixedPoint:
+    llvm_unreachable("HLSL doesn't support fixed point types.");
+
   case Type::STK_FloatingComplex:
   case Type::STK_IntegralComplex:
     llvm_unreachable("HLSL doesn't support complex types.");
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/ArrayFlatCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/ArrayFlatCast.hlsl
@@ -8,9 +8,9 @@
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 8, i1 false)
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 {{.*}}, i32 4, i1 false)
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false)
-// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[B]], i32 0
-// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0
-// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 1
+// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 0
+// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 1
 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G2]], align 4
 // CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4
 export void call1() {
@@ -27,8 +27,8 @@ export void call1() {
 // CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 4 [[A]], i8 0, i32 4, i1 false)
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 {{.*}}, i32 4, i1 false)
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 4, i1 false)
-// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x float], ptr [[B]], i32 0
-// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [1 x i32], ptr [[Tmp]], i32 0
+// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x float], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [1 x i32], ptr [[Tmp]], i32 0, i32 0
 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G2]], align 4
 // CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G1]], align 4
@@ -45,7 +45,7 @@ export void call2() {
 // CHECK-NEXT: store <1 x float> splat (float 0x3FF3333340000000), ptr [[A]], align 4
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 {{.*}}, i32 4, i1 false)
 // CHECK-NEXT: [[C:%.*]] = load <1 x float>, ptr [[A]], align 4
-// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[B]], i32 0
+// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[B]], i32 0, i32 0
 // CHECK-NEXT: [[V:%.*]] = extractelement <1 x float> [[C]], i64 0
 // CHECK-NEXT: [[C:%.*]] = fptosi float [[V]] to i32
 // CHECK-NEXT: store i32 [[C]], ptr [[G1]], align 4
@@ -63,9 +63,9 @@ export void call3() {
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[A]], ptr align 8 {{.*}}, i32 8, i1 false)
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 {{.*}}, i32 8, i1 false)
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[Tmp]], ptr align 8 [[A]], i32 8, i1 false)
-// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 0
-// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 1
-// CHECK-NEXT: [[VG:%.*]] = getelementptr inbounds [1 x <2 x float>], ptr [[Tmp]], i32 0
+// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 0, i32 1
+// CHECK-NEXT: [[VG:%.*]] = getelementptr inbounds [1 x <2 x float>], ptr [[Tmp]], i32 0, i32 0
 // CHECK-NEXT: [[L:%.*]] = load <2 x float>, ptr [[VG]], align 8
 // CHECK-NEXT: [[VL:%.*]] = extractelement <2 x float> [[L]], i32 0
 // CHECK-NEXT: [[C:%.*]] = fptosi float [[VL]] to i32
@@ -88,10 +88,10 @@ export void call5() {
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 8, i1 false)
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 {{.*}}, i32 8, i1 false)
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false)
-// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 0
-// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 1
-// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr [[Tmp]], i32 0, i32 0
-// CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr [[Tmp]], i32 1, i32 0
+// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 0, i32 0
+// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 0, i32 1
+// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr [[Tmp]], i32 0, i32 0, i32 0
+// CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr [[Tmp]], i32 0, i32 1, i32 0
 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4
 // CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4
 // CHECK-NEXT: [[L4:%.*]] = load i32, ptr [[G4]], align 4
@@ -115,9 +115,9 @@ struct S {
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[s]], ptr align 4 {{.*}}, i32 8, i1 false)
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 4, i1 false)
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[s]], i32 8, i1 false)
-// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[A]], i32 0
-// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0
-// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 1
+// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[A]], i32 0, i32 0
+// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0, i32 0
+// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0, i32 1
 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G2]], align 4
 // CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4
 export void call7() {
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/StructFlatCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/StructFlatCast.hlsl
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/VectorFlatCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/VectorFlatCast.hlsl