Changes from review comments

Tim Corringham · Tim Corringham · commit da0a2143d099 · 2025-11-03T18:57:56.000Z
Move the unrolling of the UnpackHalf2x16 from the SPIRV codegen to
Clang IR CodeGen, and replace the spv_legacyf16tof32 intrinsic with
a spv_unpackhalf2x16 intrinsic.
This greatly simplifies the SPIRV codegen, at the expense of slightly
complicating the Clang IR codegen.
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -160,6 +160,55 @@ static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) {
   return LastInst;
 }
 
+static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF, const CallExpr *E) {
+  Value *Op0 = CGF.EmitScalarExpr(E->getArg(0));
+  QualType Op0Ty = E->getArg(0)->getType();
+  llvm::Type *ResType = CGF.FloatTy;
+  uint64_t NumElements = 0;
+  if (Op0->getType()->isVectorTy()) {
+    NumElements =
+        E->getArg(0)->getType()->castAs<clang::VectorType>()->getNumElements();
+    ResType =
+        llvm::VectorType::get(ResType, ElementCount::getFixed(NumElements));
+  }
+  if (!Op0Ty->hasUnsignedIntegerRepresentation())
+    llvm_unreachable(
+        "f16tof32 operand must have an unsigned int representation");
+
+  if (CGF.CGM.getTriple().isDXIL())
+    return CGF.Builder.CreateIntrinsic(
+        ResType, Intrinsic::dx_legacyf16tof32,
+        ArrayRef<Value *>{Op0}, nullptr, "hlsl.f16tof32");
+
+  if (CGF.CGM.getTriple().isSPIRV()) {
+    // We use the SPIRV UnpackHalf2x16 operation to avoid the need for the
+    // Int16 and Float16 capabilities
+    auto UnpackType = llvm::VectorType::get(CGF.FloatTy, ElementCount::getFixed(2));
+    if (NumElements == 0) {
+      // a scalar input - simply extract the first element of the unpacked vector
+      Value *Unpack =  CGF.Builder.CreateIntrinsic(
+          UnpackType, Intrinsic::spv_unpackhalf2x16,
+          ArrayRef<Value *>{Op0});
+      return CGF.Builder.CreateExtractElement(Unpack, (uint64_t)0);
+    } else {
+      // a vector input - build a congruent output vector by iterating through
+      // the input vector calling unpackhalf2x16 for each element
+      Value *Result = PoisonValue::get(ResType);
+      for (uint64_t i = 0; i < NumElements; i++) {
+        Value *InVal = CGF.Builder.CreateExtractElement(Op0, i);
+        Value *Unpack = CGF.Builder.CreateIntrinsic(UnpackType,
+            Intrinsic::spv_unpackhalf2x16, ArrayRef<Value *>{InVal});
+        Value *Res = CGF.Builder.CreateExtractElement(Unpack, (uint64_t)0);
+        Result = CGF.Builder.CreateInsertElement(Result, Res, i);
+      }
+      return Result;
+    }
+  }
+
+  llvm_unreachable(
+        "Intrinsic F16ToF32 not supported by target architecture");
+}
+
 static Value *emitBufferStride(CodeGenFunction *CGF, const Expr *HandleExpr,
                                LValue &Stride) {
   // Figure out the stride of the buffer elements from the handle type.
@@ -561,20 +610,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
         ArrayRef<Value *>{X}, nullptr, "hlsl.degrees");
   }
   case Builtin::BI__builtin_hlsl_elementwise_f16tof32: {
-    Value *Op0 = EmitScalarExpr(E->getArg(0));
-    llvm::Type *Xty = Op0->getType();
-    llvm::Type *retType = llvm::Type::getFloatTy(this->getLLVMContext());
-    if (Xty->isVectorTy()) {
-      auto *XVecTy = E->getArg(0)->getType()->castAs<VectorType>();
-      retType = llvm::VectorType::get(
-          retType, ElementCount::getFixed(XVecTy->getNumElements()));
-    }
-    if (!E->getArg(0)->getType()->hasUnsignedIntegerRepresentation())
-      llvm_unreachable(
-          "f16tof32 operand must have an unsigned int representation");
-    return Builder.CreateIntrinsic(
-        retType, CGM.getHLSLRuntime().getLegacyF16ToF32Intrinsic(),
-        ArrayRef<Value *>{Op0}, nullptr, "hlsl.f16tof32");
+    return handleElementwiseF16ToF32(*this, E);
   }
   case Builtin::BI__builtin_hlsl_elementwise_frac: {
     Value *Op0 = EmitScalarExpr(E->getArg(0));
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -96,7 +96,6 @@ class CGHLSLRuntime {
                                    flattened_thread_id_in_group)
   GENERATE_HLSL_INTRINSIC_FUNCTION(IsInf, isinf)
   GENERATE_HLSL_INTRINSIC_FUNCTION(IsNaN, isnan)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(LegacyF16ToF32, legacyf16tof32)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Lerp, lerp)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Normalize, normalize)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt)
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -199,7 +199,6 @@ def int_spv_resource_nonuniformindex
     : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [generic_ptr_ty],
        [IntrNoMem, NoUndef<RetIndex>]>;
 
-  def int_spv_legacyf16tof32 : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_float_ty>],
-    [llvm_anyint_ty], [IntrNoMem]>;
+  def int_spv_unpackhalf2x16 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_i32_ty], [IntrNoMem]>;
 
 }
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -212,9 +212,6 @@ class SPIRVInstructionSelector : public InstructionSelector {
   bool selectOpIsNan(Register ResVReg, const SPIRVType *ResType,
                      MachineInstr &I) const;
 
-  bool selectF16ToF32(Register ResVReg, const SPIRVType *ResType,
-                      MachineInstr &I) const;
-
   template <bool Signed>
   bool selectDot4AddPacked(Register ResVReg, const SPIRVType *ResType,
                            MachineInstr &I) const;
@@ -3475,8 +3472,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
   case Intrinsic::spv_resource_nonuniformindex: {
     return selectResourceNonUniformIndex(ResVReg, ResType, I);
   }
-  case Intrinsic::spv_legacyf16tof32: {
-    return selectF16ToF32(ResVReg, ResType, I);
+  case Intrinsic::spv_unpackhalf2x16: {
+    return selectExtInst(ResVReg, ResType, I, GL::UnpackHalf2x16);
   }
 
   default: {
@@ -3751,89 +3748,6 @@ bool SPIRVInstructionSelector::selectResourceNonUniformIndex(
   return true;
 }
 
-bool SPIRVInstructionSelector::selectF16ToF32(Register ResVReg,
-                                              const SPIRVType *ResType,
-                                              MachineInstr &I) const {
-  assert(I.getNumOperands() == 3);
-  assert(I.getOperand(0).isReg());
-  assert(I.getOperand(2).isReg());
-  Register SrcReg = I.getOperand(2).getReg();
-  const SPIRVType *SrcRegType = GR.getSPIRVTypeForVReg(SrcReg);
-  LLT SrcType = MRI->getType(SrcReg);
-  SPIRVType *SrcEltType = GR.getScalarOrVectorComponentType(SrcRegType);
-  SPIRVType *ResEltType = GR.getScalarOrVectorComponentType(ResType);
-  const TargetRegisterClass *SrcRegClass = GR.getRegClass(SrcEltType);
-  const TargetRegisterClass *ResRegClass = GR.getRegClass(ResEltType);
-  MachineIRBuilder MIRBuilder(I);
-  const SPIRVType *Vec2ResType =
-      GR.getOrCreateSPIRVVectorType(ResEltType, 2, MIRBuilder, false);
-  const TargetRegisterClass *Vec2RegClass = GR.getRegClass(Vec2ResType);
-
-  bool Result = true;
-  MachineBasicBlock &BB = *I.getParent();
-  if (SrcType.isVector()) {
-    // We have a vector of uints to convert elementwise
-    uint64_t ResultSize = GR.getScalarOrVectorComponentCount(ResType);
-    SmallVector<Register> ComponentRegisters;
-    for (uint64_t Idx = 0; Idx < ResultSize; Idx++) {
-      Register EltReg = MRI->createVirtualRegister(SrcRegClass);
-      Register FReg = MRI->createVirtualRegister(ResRegClass);
-      Register Vec2Reg = MRI->createVirtualRegister(Vec2RegClass);
-
-      Result =
-          BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract))
-              .addDef(EltReg)
-              .addUse(GR.getSPIRVTypeID(SrcEltType))
-              .addUse(SrcReg)
-              .addImm(Idx)
-              .constrainAllUses(TII, TRI, RBI);
-
-      Result &=
-          BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst))
-              .addDef(Vec2Reg)
-              .addUse(GR.getSPIRVTypeID(Vec2ResType))
-              .addImm(
-                  static_cast<uint32_t>(SPIRV::InstructionSet::GLSL_std_450))
-              .addImm(GL::UnpackHalf2x16)
-              .addUse(EltReg)
-              .constrainAllUses(TII, TRI, RBI);
-
-      Result &=
-          BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract))
-              .addDef(FReg)
-              .addUse(GR.getSPIRVTypeID(ResEltType))
-              .addUse(Vec2Reg)
-              .addImm(0)
-              .constrainAllUses(TII, TRI, RBI);
-
-      ComponentRegisters.emplace_back(FReg);
-    }
-
-    MachineInstrBuilder MIB =
-        BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeConstruct))
-            .addDef(ResVReg)
-            .addUse(GR.getSPIRVTypeID(ResType));
-
-    for (Register ComponentReg : ComponentRegisters)
-      MIB.addUse(ComponentReg);
-    return Result && MIB.constrainAllUses(TII, TRI, RBI);
-
-  } else if (SrcType.isScalar()) {
-    // just a scalar uint to convert
-    Register Vec2Reg = MRI->createVirtualRegister(Vec2RegClass);
-    Result &= selectExtInst(Vec2Reg, Vec2ResType, I, GL::UnpackHalf2x16);
-    Result &=
-        BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract))
-            .addDef(ResVReg)
-            .addUse(GR.getSPIRVTypeID(ResType))
-            .addUse(Vec2Reg)
-            .addImm(0)
-            .constrainAllUses(TII, TRI, RBI);
-    return Result;
-  }
-  return false;
-}
-
 void SPIRVInstructionSelector::decorateUsesAsNonUniform(
     Register &NonUniformReg) const {
   llvm::SmallVector<Register> WorkList = {NonUniformReg};

Original file line number	Diff line number	Diff line change
`@@ -199,7 +199,6 @@ def int_spv_resource_nonuniformindex`
`199`	`199`	`: DefaultAttrsIntrinsic<[llvm_anyptr_ty], [generic_ptr_ty],`
`200`	`200`	`[IntrNoMem, NoUndef<RetIndex>]>;`
`201`	`201`
`202`		`- def int_spv_legacyf16tof32 : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_float_ty>],`
`203`		`- [llvm_anyint_ty], [IntrNoMem]>;`
	`202`	`+ def int_spv_unpackhalf2x16 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_i32_ty], [IntrNoMem]>;`
`204`	`203`
`205`	`204`	`}`