Skip to content

Commit 5c5d7c5

Browse files
committed
address pr comments
1 parent 9df363d commit 5c5d7c5

File tree

6 files changed

+87
-28
lines changed

6 files changed

+87
-28
lines changed

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4358,7 +4358,7 @@ def warn_unknown_sanitizer_ignored : Warning<
43584358

43594359
def warn_impcast_matrix_scalar : Warning<
43604360
"implicit conversion turns matrix to scalar: %0 to %1">,
4361-
InGroup<MatrixConversion>, DefaultIgnore;
4361+
InGroup<MatrixConversion>;
43624362
def warn_impcast_vector_scalar : Warning<
43634363
"implicit conversion turns vector to scalar: %0 to %1">,
43644364
InGroup<Conversion>, DefaultIgnore;

clang/lib/CodeGen/CGExprScalar.cpp

Lines changed: 40 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2426,17 +2426,26 @@ static Value *EmitHLSLElementwiseCast(CodeGenFunction &CGF, LValue SrcVal,
24262426
assert(LoadList.size() >= MatTy->getNumElementsFlattened() &&
24272427
"Flattened type on RHS must have the same number or more elements "
24282428
"than vector on LHS.");
2429+
24292430
llvm::Value *V =
24302431
CGF.Builder.CreateLoad(CGF.CreateIRTemp(DestTy, "flatcast.tmp"));
24312432
// write to V.
2432-
for (unsigned I = 0, E = MatTy->getNumElementsFlattened(); I < E; I++) {
2433-
RValue RVal = CGF.EmitLoadOfLValue(LoadList[I], Loc);
2434-
assert(RVal.isScalar() &&
2435-
"All flattened source values should be scalars.");
2436-
llvm::Value *Cast =
2437-
CGF.EmitScalarConversion(RVal.getScalarVal(), LoadList[I].getType(),
2438-
MatTy->getElementType(), Loc);
2439-
V = CGF.Builder.CreateInsertElement(V, Cast, I);
2433+
unsigned NumCols = MatTy->getNumColumns();
2434+
unsigned NumRows = MatTy->getNumRows();
2435+
unsigned ColOffset = NumCols;
2436+
if (auto *SrcMatTy = SrcVal.getType()->getAs<ConstantMatrixType>())
2437+
ColOffset = SrcMatTy->getNumColumns();
2438+
for (unsigned R = 0; R < NumRows; R++) {
2439+
for (unsigned C = 0; C < NumCols; C++) {
2440+
unsigned I = R * ColOffset + C;
2441+
RValue RVal = CGF.EmitLoadOfLValue(LoadList[I], Loc);
2442+
assert(RVal.isScalar() &&
2443+
"All flattened source values should be scalars.");
2444+
llvm::Value *Cast =
2445+
CGF.EmitScalarConversion(RVal.getScalarVal(), LoadList[I].getType(),
2446+
MatTy->getElementType(), Loc);
2447+
V = CGF.Builder.CreateInsertElement(V, Cast, I);
2448+
}
24402449
}
24412450
return V;
24422451
}
@@ -2978,9 +2987,17 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
29782987
Value *Mat = Visit(E);
29792988
if (auto *MatTy = DestTy->getAs<ConstantMatrixType>()) {
29802989
SmallVector<int> Mask;
2981-
unsigned NumElts = MatTy->getNumElementsFlattened();
2982-
for (unsigned I = 0; I != NumElts; ++I)
2983-
Mask.push_back(I);
2990+
unsigned NumCols = MatTy->getNumColumns();
2991+
unsigned NumRows = MatTy->getNumRows();
2992+
unsigned ColOffset = NumCols;
2993+
if (auto *SrcMatTy = E->getType()->getAs<ConstantMatrixType>())
2994+
ColOffset = SrcMatTy->getNumColumns();
2995+
for (unsigned R = 0; R < NumRows; R++) {
2996+
for (unsigned C = 0; C < NumCols; C++) {
2997+
unsigned I = R * ColOffset + C;
2998+
Mask.push_back(I);
2999+
}
3000+
}
29843001

29853002
return Builder.CreateShuffleVector(Mat, Mask, "trunc");
29863003
}
@@ -2991,11 +3008,20 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
29913008
RValue RV = CGF.EmitAnyExpr(E);
29923009
SourceLocation Loc = CE->getExprLoc();
29933010

2994-
assert(RV.isAggregate() && "Not a valid HLSL Elementwise Cast.");
2995-
// RHS is an aggregate
2996-
LValue SrcVal = CGF.MakeAddrLValue(RV.getAggregateAddress(), E->getType());
3011+
Address SrcAddr = Address::invalid();
3012+
3013+
if (RV.isAggregate()) {
3014+
SrcAddr = RV.getAggregateAddress();
3015+
} else {
3016+
SrcAddr = CGF.CreateMemTemp(E->getType(), "hlsl.ewcast.src");
3017+
LValue TmpLV = CGF.MakeAddrLValue(SrcAddr, E->getType());
3018+
CGF.EmitStoreThroughLValue(RV, TmpLV);
3019+
}
3020+
3021+
LValue SrcVal = CGF.MakeAddrLValue(SrcAddr, E->getType());
29973022
return EmitHLSLElementwiseCast(CGF, SrcVal, DestTy, Loc);
29983023
}
3024+
29993025
} // end of switch
30003026

30013027
llvm_unreachable("unknown scalar cast");

clang/lib/Sema/SemaOverload.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2114,7 +2114,7 @@ static bool IsMatrixConversion(Sema &S, QualType FromType, QualType ToType,
21142114
ImplicitConversionKind &ICK,
21152115
ImplicitConversionKind &ElConv, Expr *From,
21162116
bool InOverloadResolution, bool CStyle) {
2117-
// The non HLSL Matrix conversion rules are not clear.
2117+
// Implicit conversions for matrices are an HLSL feature not present in C/C++.
21182118
if (!S.getLangOpts().HLSL)
21192119
return false;
21202120

clang/test/CodeGenHLSL/BasicFeatures/MatrixElementTypeCast.hlsl

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,3 +184,36 @@ struct Derived : BFields {
184184
void call4(Derived D) {
185185
int2x2 A = (int2x2)D;
186186
}
187+
188+
// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> @_Z5call5Dv4_f(
189+
// CHECK-SAME: <4 x float> noundef nofpclass(nan inf) [[M:%.*]]) #[[ATTR0]] {
190+
// CHECK-NEXT: [[ENTRY:.*:]]
191+
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca <4 x float>, align 16
192+
// CHECK-NEXT: [[M2:%.*]] = alloca [4 x float], align 4
193+
// CHECK-NEXT: [[HLSL_EWCAST_SRC:%.*]] = alloca <4 x float>, align 16
194+
// CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <4 x float>, align 4
195+
// CHECK-NEXT: store <4 x float> [[M]], ptr [[M_ADDR]], align 16
196+
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[M_ADDR]], align 16
197+
// CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[HLSL_EWCAST_SRC]], align 16
198+
// CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr inbounds <4 x float>, ptr [[HLSL_EWCAST_SRC]], i32 0
199+
// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[FLATCAST_TMP]], align 4
200+
// CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[VECTOR_GEP]], align 16
201+
// CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
202+
// CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP1]], float [[VECEXT]], i64 0
203+
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, ptr [[VECTOR_GEP]], align 16
204+
// CHECK-NEXT: [[VECEXT1:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
205+
// CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP3]], float [[VECEXT1]], i64 1
206+
// CHECK-NEXT: [[TMP6:%.*]] = load <4 x float>, ptr [[VECTOR_GEP]], align 16
207+
// CHECK-NEXT: [[VECEXT2:%.*]] = extractelement <4 x float> [[TMP6]], i32 2
208+
// CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP5]], float [[VECEXT2]], i64 2
209+
// CHECK-NEXT: [[TMP8:%.*]] = load <4 x float>, ptr [[VECTOR_GEP]], align 16
210+
// CHECK-NEXT: [[VECEXT3:%.*]] = extractelement <4 x float> [[TMP8]], i32 3
211+
// CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[TMP7]], float [[VECEXT3]], i64 3
212+
// CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[M2]], align 4
213+
// CHECK-NEXT: [[TMP10:%.*]] = load <4 x float>, ptr [[M2]], align 4
214+
// CHECK-NEXT: ret <4 x float> [[TMP10]]
215+
//
216+
float2x2 call5(float4 v) {
217+
float2x2 m = (float2x2)v;
218+
return m;
219+
}

clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
// CHECK-NEXT: [[I43:%.*]] = alloca [12 x i32], align 4
2626
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
2727
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
28-
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
28+
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 12, i32 13, i32 14>
2929
// CHECK-NEXT: store <12 x i32> [[TRUNC]], ptr [[I43]], align 4
3030
// CHECK-NEXT: [[TMP1:%.*]] = load <12 x i32>, ptr [[I43]], align 4
3131
// CHECK-NEXT: ret <12 x i32> [[TMP1]]
@@ -42,7 +42,7 @@
4242
// CHECK-NEXT: [[I33:%.*]] = alloca [9 x i32], align 4
4343
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
4444
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
45-
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
45+
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10>
4646
// CHECK-NEXT: store <9 x i32> [[TRUNC]], ptr [[I33]], align 4
4747
// CHECK-NEXT: [[TMP1:%.*]] = load <9 x i32>, ptr [[I33]], align 4
4848
// CHECK-NEXT: ret <9 x i32> [[TMP1]]
@@ -59,7 +59,7 @@
5959
// CHECK-NEXT: [[I32:%.*]] = alloca [6 x i32], align 4
6060
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
6161
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
62-
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
62+
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9>
6363
// CHECK-NEXT: store <6 x i32> [[TRUNC]], ptr [[I32]], align 4
6464
// CHECK-NEXT: [[TMP1:%.*]] = load <6 x i32>, ptr [[I32]], align 4
6565
// CHECK-NEXT: ret <6 x i32> [[TMP1]]
@@ -76,7 +76,7 @@
7676
// CHECK-NEXT: [[I23:%.*]] = alloca [6 x i32], align 4
7777
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
7878
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
79-
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
79+
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6>
8080
// CHECK-NEXT: store <6 x i32> [[TRUNC]], ptr [[I23]], align 4
8181
// CHECK-NEXT: [[TMP1:%.*]] = load <6 x i32>, ptr [[I23]], align 4
8282
// CHECK-NEXT: ret <6 x i32> [[TMP1]]
@@ -93,7 +93,7 @@
9393
// CHECK-NEXT: [[I22:%.*]] = alloca [4 x i32], align 4
9494
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
9595
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
96-
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
96+
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
9797
// CHECK-NEXT: store <4 x i32> [[TRUNC]], ptr [[I22]], align 4
9898
// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[I22]], align 4
9999
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
@@ -110,7 +110,7 @@
110110
// CHECK-NEXT: [[I21:%.*]] = alloca [2 x i32], align 4
111111
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
112112
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
113-
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <2 x i32> <i32 0, i32 1>
113+
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <2 x i32> <i32 0, i32 4>
114114
// CHECK-NEXT: store <2 x i32> [[TRUNC]], ptr [[I21]], align 4
115115
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[I21]], align 4
116116
// CHECK-NEXT: ret <2 x i32> [[TMP1]]

clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
// CHECK-NEXT: [[I43:%.*]] = alloca [12 x i32], align 4
2626
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
2727
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
28-
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
28+
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 12, i32 13, i32 14>
2929
// CHECK-NEXT: store <12 x i32> [[TRUNC]], ptr [[I43]], align 4
3030
// CHECK-NEXT: [[TMP1:%.*]] = load <12 x i32>, ptr [[I43]], align 4
3131
// CHECK-NEXT: ret <12 x i32> [[TMP1]]
@@ -42,7 +42,7 @@
4242
// CHECK-NEXT: [[I33:%.*]] = alloca [9 x i32], align 4
4343
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
4444
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
45-
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
45+
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10>
4646
// CHECK-NEXT: store <9 x i32> [[TRUNC]], ptr [[I33]], align 4
4747
// CHECK-NEXT: [[TMP1:%.*]] = load <9 x i32>, ptr [[I33]], align 4
4848
// CHECK-NEXT: ret <9 x i32> [[TMP1]]
@@ -59,7 +59,7 @@
5959
// CHECK-NEXT: [[I32:%.*]] = alloca [6 x i32], align 4
6060
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
6161
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
62-
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
62+
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9>
6363
// CHECK-NEXT: store <6 x i32> [[TRUNC]], ptr [[I32]], align 4
6464
// CHECK-NEXT: [[TMP1:%.*]] = load <6 x i32>, ptr [[I32]], align 4
6565
// CHECK-NEXT: ret <6 x i32> [[TMP1]]
@@ -76,7 +76,7 @@
7676
// CHECK-NEXT: [[I23:%.*]] = alloca [6 x i32], align 4
7777
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
7878
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
79-
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
79+
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6>
8080
// CHECK-NEXT: store <6 x i32> [[TRUNC]], ptr [[I23]], align 4
8181
// CHECK-NEXT: [[TMP1:%.*]] = load <6 x i32>, ptr [[I23]], align 4
8282
// CHECK-NEXT: ret <6 x i32> [[TMP1]]
@@ -93,7 +93,7 @@
9393
// CHECK-NEXT: [[I22:%.*]] = alloca [4 x i32], align 4
9494
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
9595
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
96-
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
96+
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
9797
// CHECK-NEXT: store <4 x i32> [[TRUNC]], ptr [[I22]], align 4
9898
// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[I22]], align 4
9999
// CHECK-NEXT: ret <4 x i32> [[TMP1]]
@@ -110,7 +110,7 @@
110110
// CHECK-NEXT: [[I21:%.*]] = alloca [2 x i32], align 4
111111
// CHECK-NEXT: store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
112112
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
113-
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <2 x i32> <i32 0, i32 1>
113+
// CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <2 x i32> <i32 0, i32 4>
114114
// CHECK-NEXT: store <2 x i32> [[TRUNC]], ptr [[I21]], align 4
115115
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[I21]], align 4
116116
// CHECK-NEXT: ret <2 x i32> [[TMP1]]

0 commit comments

Comments
 (0)