Splat function argument (microsoft#6747)

s-perron · web-flow · commit 74ba8452dfa6 · 2024-07-19T16:13:47.000Z
When a scalar variable is passed as the argument to an inout vector parameter, then the scalar is suppose to be splatted. After returning from the function, we need to extract the first element from the parameter to store back into the scalar. Fixes microsoft#6568
diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp
@@ -1359,6 +1359,15 @@ SpirvInstruction *SpirvEmitter::castToType(SpirvInstruction *value,
                                            QualType fromType, QualType toType,
                                            SourceLocation srcLoc,
                                            SourceRange range) {
+  uint32_t fromSize = 0;
+  uint32_t toSize = 0;
+  assert(isVectorType(fromType, nullptr, &fromSize) ==
+             isVectorType(toType, nullptr, &toSize) &&
+         fromSize == toSize);
+  // Avoid unused variable warning in release builds
+  (void)(fromSize);
+  (void)(toSize);
+
   if (isFloatOrVecMatOfFloatType(toType))
     return castToFloat(value, fromType, toType, srcLoc, range);
 
@@ -2929,8 +2938,8 @@ SpirvInstruction *SpirvEmitter::getBaseOfMemberFunction(
 SpirvInstruction *SpirvEmitter::processCall(const CallExpr *callExpr) {
   const FunctionDecl *callee = getCalleeDefinition(callExpr);
 
-  // Note that we always want the defintion because Stmts/Exprs in the
-  // function body references the parameters in the definition.
+  // Note that we always want the definition because Stmts/Exprs in the
+  // function body reference the parameters in the definition.
   if (!callee) {
     emitError("found undefined function", callExpr->getExprLoc());
     return nullptr;
@@ -3031,7 +3040,7 @@ SpirvInstruction *SpirvEmitter::processCall(const CallExpr *callExpr) {
     const uint32_t argIndex = i + isOperatorOverloading;
 
     // We want the argument variable here so that we can write back to it
-    // later. We will do the OpLoad of this argument manually. So ingore
+    // later. We will do the OpLoad of this argument manually. So ignore
     // the LValueToRValue implicit cast here.
     auto *arg = callExpr->getArg(argIndex)->IgnoreParenLValueCasts();
     const auto *param = callee->getParamDecl(i);
@@ -3112,9 +3121,16 @@ SpirvInstruction *SpirvEmitter::processCall(const CallExpr *callExpr) {
       // has returned.
       if (canActAsOutParmVar(param) &&
           !paramTypeMatchesArgType(paramType, arg->getType())) {
-        if (const auto *refType = paramType->getAs<ReferenceType>())
-          rhsVal = castToType(rhsVal, arg->getType(), refType->getPointeeType(),
-                              arg->getLocStart(), rhsRange);
+        if (const auto *refType = paramType->getAs<ReferenceType>()) {
+          QualType toType = refType->getPointeeType();
+          if (isScalarType(rhsVal->getAstResultType())) {
+            rhsVal =
+                splatScalarToGenerate(toType, rhsVal, SpirvLayoutRule::Void);
+          } else {
+            rhsVal = castToType(rhsVal, rhsVal->getAstResultType(), toType,
+                                arg->getLocStart(), rhsRange);
+          }
+        }
       }
 
       // Initialize the temporary variables using the contents of the arguments
@@ -3164,9 +3180,18 @@ SpirvInstruction *SpirvEmitter::processCall(const CallExpr *callExpr) {
       // mismatch, we need to first cast 'value' to the type of 'arg' because
       // the AST will not include a cast node.
       if (!paramTypeMatchesArgType(paramType, arg->getType())) {
-        if (const auto *refType = paramType->getAs<ReferenceType>())
-          value = castToType(value, refType->getPointeeType(), arg->getType(),
-                             arg->getLocStart());
+        if (const auto *refType = paramType->getAs<ReferenceType>()) {
+          QualType elementType;
+          QualType fromType = refType->getPointeeType();
+          if (isVectorType(fromType, &elementType) &&
+              isScalarType(arg->getType())) {
+            value = spvBuilder.createCompositeExtract(
+                elementType, value, {0}, value->getSourceLocation());
+            fromType = elementType;
+          }
+          value =
+              castToType(value, fromType, arg->getType(), arg->getLocStart());
+        }
       }
 
       processAssignment(arg, value, false, args[index]);
@@ -14930,7 +14955,7 @@ SpirvEmitter::splatScalarToGenerate(QualType type, SpirvInstruction *scalar,
   SourceLocation sourceLocation = scalar->getSourceLocation();
 
   if (isScalarType(type)) {
-    // If the type if bool with a non-void layout rule, then it should be
+    // If the type is bool with a non-void layout rule, then it should be
     // treated as a uint.
     assert(layoutRule == SpirvLayoutRule::Void &&
            "If the layout type is not void, then we should cast to an int when "
diff --git a/tools/clang/test/CodeGenSPIRV/fn.param.inout.type-mismatch.hlsl b/tools/clang/test/CodeGenSPIRV/fn.param.inout.type-mismatch.hlsl
@@ -3,6 +3,12 @@ void foo(const half3 input, out half3 output) {
   output = input;
 }
 
+void bar( inout float3 p)
+{
+  p += float3(1,1,1);
+}
+
+
 float4 main() : SV_Target0 {
   float3 output;
 // CHECK:       %param_var_input = OpVariable %_ptr_Function_v3half Function
@@ -17,7 +23,17 @@ float4 main() : SV_Target0 {
 // CHECK-NEXT: [[outputFloat3_0:%[0-9]+]] = OpFConvert %v3float [[outputHalf3_0]]
 // CHECK-NEXT:                         OpStore %output [[outputFloat3_0]]
 
-// CHECK-NEXT: [[outputFloat3_1:%[0-9]+]] = OpLoad %v3float %output
+// CHECK:      [[f:%[0-9]+]] = OpLoad %float %f
+// CHECK-NEXT: [[splat:%[0-9]+]] = OpCompositeConstruct %v3float [[f]] [[f]] [[f]]
+// CHECK-NEXT:      OpStore %param_var_p [[splat]]
+// CHECK-NEXT: OpFunctionCall %void %bar %param_var_p
+// CHECK-NEXT: [[ret:%[0-9]+]] = OpLoad %v3float %param_var_p
+// CHECK-NEXT: [[ext:%[0-9]+]] = OpCompositeExtract %float [[ret]] 0
+// CHECK-NEXT:      OpStore %f [[ext]]
+   float f = 0;
+   bar(f);
+
+// CHECK: [[outputFloat3_1:%[0-9]+]] = OpLoad %v3float %output
 // CHECK-NEXT: OpCompositeExtract %float [[outputFloat3_2:%[0-9]+]] 0
 // CHECK-NEXT: OpCompositeExtract %float [[outputFloat3_3:%[0-9]+]] 1
 // CHECK-NEXT: OpCompositeExtract %float [[outputFloat3_4:%[0-9]+]] 2