Skip to content

Commit da0a214

Browse files
author
Tim Corringham
committed
Changes from review comments
Move the unrolling of the UnpackHalf2x16 from the SPIRV codegen to Clang IR CodeGen, and replace the spv_legacyf16tof32 intrinsic with a spv_unpackhalf2x16 intrinsic. This greatly simplifies the SPIRV codegen, at the expense of slightly complicating the Clang IR codegen.
1 parent d8706d6 commit da0a214

File tree

4 files changed

+53
-105
lines changed

4 files changed

+53
-105
lines changed

clang/lib/CodeGen/CGHLSLBuiltins.cpp

Lines changed: 50 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,55 @@ static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) {
160160
return LastInst;
161161
}
162162

163+
static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF, const CallExpr *E) {
164+
Value *Op0 = CGF.EmitScalarExpr(E->getArg(0));
165+
QualType Op0Ty = E->getArg(0)->getType();
166+
llvm::Type *ResType = CGF.FloatTy;
167+
uint64_t NumElements = 0;
168+
if (Op0->getType()->isVectorTy()) {
169+
NumElements =
170+
E->getArg(0)->getType()->castAs<clang::VectorType>()->getNumElements();
171+
ResType =
172+
llvm::VectorType::get(ResType, ElementCount::getFixed(NumElements));
173+
}
174+
if (!Op0Ty->hasUnsignedIntegerRepresentation())
175+
llvm_unreachable(
176+
"f16tof32 operand must have an unsigned int representation");
177+
178+
if (CGF.CGM.getTriple().isDXIL())
179+
return CGF.Builder.CreateIntrinsic(
180+
ResType, Intrinsic::dx_legacyf16tof32,
181+
ArrayRef<Value *>{Op0}, nullptr, "hlsl.f16tof32");
182+
183+
if (CGF.CGM.getTriple().isSPIRV()) {
184+
// We use the SPIRV UnpackHalf2x16 operation to avoid the need for the
185+
// Int16 and Float16 capabilities
186+
auto UnpackType = llvm::VectorType::get(CGF.FloatTy, ElementCount::getFixed(2));
187+
if (NumElements == 0) {
188+
// a scalar input - simply extract the first element of the unpacked vector
189+
Value *Unpack = CGF.Builder.CreateIntrinsic(
190+
UnpackType, Intrinsic::spv_unpackhalf2x16,
191+
ArrayRef<Value *>{Op0});
192+
return CGF.Builder.CreateExtractElement(Unpack, (uint64_t)0);
193+
} else {
194+
// a vector input - build a congruent output vector by iterating through
195+
// the input vector calling unpackhalf2x16 for each element
196+
Value *Result = PoisonValue::get(ResType);
197+
for (uint64_t i = 0; i < NumElements; i++) {
198+
Value *InVal = CGF.Builder.CreateExtractElement(Op0, i);
199+
Value *Unpack = CGF.Builder.CreateIntrinsic(UnpackType,
200+
Intrinsic::spv_unpackhalf2x16, ArrayRef<Value *>{InVal});
201+
Value *Res = CGF.Builder.CreateExtractElement(Unpack, (uint64_t)0);
202+
Result = CGF.Builder.CreateInsertElement(Result, Res, i);
203+
}
204+
return Result;
205+
}
206+
}
207+
208+
llvm_unreachable(
209+
"Intrinsic F16ToF32 not supported by target architecture");
210+
}
211+
163212
static Value *emitBufferStride(CodeGenFunction *CGF, const Expr *HandleExpr,
164213
LValue &Stride) {
165214
// Figure out the stride of the buffer elements from the handle type.
@@ -561,20 +610,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
561610
ArrayRef<Value *>{X}, nullptr, "hlsl.degrees");
562611
}
563612
case Builtin::BI__builtin_hlsl_elementwise_f16tof32: {
564-
Value *Op0 = EmitScalarExpr(E->getArg(0));
565-
llvm::Type *Xty = Op0->getType();
566-
llvm::Type *retType = llvm::Type::getFloatTy(this->getLLVMContext());
567-
if (Xty->isVectorTy()) {
568-
auto *XVecTy = E->getArg(0)->getType()->castAs<VectorType>();
569-
retType = llvm::VectorType::get(
570-
retType, ElementCount::getFixed(XVecTy->getNumElements()));
571-
}
572-
if (!E->getArg(0)->getType()->hasUnsignedIntegerRepresentation())
573-
llvm_unreachable(
574-
"f16tof32 operand must have an unsigned int representation");
575-
return Builder.CreateIntrinsic(
576-
retType, CGM.getHLSLRuntime().getLegacyF16ToF32Intrinsic(),
577-
ArrayRef<Value *>{Op0}, nullptr, "hlsl.f16tof32");
613+
return handleElementwiseF16ToF32(*this, E);
578614
}
579615
case Builtin::BI__builtin_hlsl_elementwise_frac: {
580616
Value *Op0 = EmitScalarExpr(E->getArg(0));

clang/lib/CodeGen/CGHLSLRuntime.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,6 @@ class CGHLSLRuntime {
9696
flattened_thread_id_in_group)
9797
GENERATE_HLSL_INTRINSIC_FUNCTION(IsInf, isinf)
9898
GENERATE_HLSL_INTRINSIC_FUNCTION(IsNaN, isnan)
99-
GENERATE_HLSL_INTRINSIC_FUNCTION(LegacyF16ToF32, legacyf16tof32)
10099
GENERATE_HLSL_INTRINSIC_FUNCTION(Lerp, lerp)
101100
GENERATE_HLSL_INTRINSIC_FUNCTION(Normalize, normalize)
102101
GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt)

llvm/include/llvm/IR/IntrinsicsSPIRV.td

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,6 @@ def int_spv_resource_nonuniformindex
199199
: DefaultAttrsIntrinsic<[llvm_anyptr_ty], [generic_ptr_ty],
200200
[IntrNoMem, NoUndef<RetIndex>]>;
201201

202-
def int_spv_legacyf16tof32 : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_float_ty>],
203-
[llvm_anyint_ty], [IntrNoMem]>;
202+
def int_spv_unpackhalf2x16 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_i32_ty], [IntrNoMem]>;
204203

205204
}

llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp

Lines changed: 2 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -212,9 +212,6 @@ class SPIRVInstructionSelector : public InstructionSelector {
212212
bool selectOpIsNan(Register ResVReg, const SPIRVType *ResType,
213213
MachineInstr &I) const;
214214

215-
bool selectF16ToF32(Register ResVReg, const SPIRVType *ResType,
216-
MachineInstr &I) const;
217-
218215
template <bool Signed>
219216
bool selectDot4AddPacked(Register ResVReg, const SPIRVType *ResType,
220217
MachineInstr &I) const;
@@ -3475,8 +3472,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
34753472
case Intrinsic::spv_resource_nonuniformindex: {
34763473
return selectResourceNonUniformIndex(ResVReg, ResType, I);
34773474
}
3478-
case Intrinsic::spv_legacyf16tof32: {
3479-
return selectF16ToF32(ResVReg, ResType, I);
3475+
case Intrinsic::spv_unpackhalf2x16: {
3476+
return selectExtInst(ResVReg, ResType, I, GL::UnpackHalf2x16);
34803477
}
34813478

34823479
default: {
@@ -3751,89 +3748,6 @@ bool SPIRVInstructionSelector::selectResourceNonUniformIndex(
37513748
return true;
37523749
}
37533750

3754-
bool SPIRVInstructionSelector::selectF16ToF32(Register ResVReg,
3755-
const SPIRVType *ResType,
3756-
MachineInstr &I) const {
3757-
assert(I.getNumOperands() == 3);
3758-
assert(I.getOperand(0).isReg());
3759-
assert(I.getOperand(2).isReg());
3760-
Register SrcReg = I.getOperand(2).getReg();
3761-
const SPIRVType *SrcRegType = GR.getSPIRVTypeForVReg(SrcReg);
3762-
LLT SrcType = MRI->getType(SrcReg);
3763-
SPIRVType *SrcEltType = GR.getScalarOrVectorComponentType(SrcRegType);
3764-
SPIRVType *ResEltType = GR.getScalarOrVectorComponentType(ResType);
3765-
const TargetRegisterClass *SrcRegClass = GR.getRegClass(SrcEltType);
3766-
const TargetRegisterClass *ResRegClass = GR.getRegClass(ResEltType);
3767-
MachineIRBuilder MIRBuilder(I);
3768-
const SPIRVType *Vec2ResType =
3769-
GR.getOrCreateSPIRVVectorType(ResEltType, 2, MIRBuilder, false);
3770-
const TargetRegisterClass *Vec2RegClass = GR.getRegClass(Vec2ResType);
3771-
3772-
bool Result = true;
3773-
MachineBasicBlock &BB = *I.getParent();
3774-
if (SrcType.isVector()) {
3775-
// We have a vector of uints to convert elementwise
3776-
uint64_t ResultSize = GR.getScalarOrVectorComponentCount(ResType);
3777-
SmallVector<Register> ComponentRegisters;
3778-
for (uint64_t Idx = 0; Idx < ResultSize; Idx++) {
3779-
Register EltReg = MRI->createVirtualRegister(SrcRegClass);
3780-
Register FReg = MRI->createVirtualRegister(ResRegClass);
3781-
Register Vec2Reg = MRI->createVirtualRegister(Vec2RegClass);
3782-
3783-
Result =
3784-
BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract))
3785-
.addDef(EltReg)
3786-
.addUse(GR.getSPIRVTypeID(SrcEltType))
3787-
.addUse(SrcReg)
3788-
.addImm(Idx)
3789-
.constrainAllUses(TII, TRI, RBI);
3790-
3791-
Result &=
3792-
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst))
3793-
.addDef(Vec2Reg)
3794-
.addUse(GR.getSPIRVTypeID(Vec2ResType))
3795-
.addImm(
3796-
static_cast<uint32_t>(SPIRV::InstructionSet::GLSL_std_450))
3797-
.addImm(GL::UnpackHalf2x16)
3798-
.addUse(EltReg)
3799-
.constrainAllUses(TII, TRI, RBI);
3800-
3801-
Result &=
3802-
BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract))
3803-
.addDef(FReg)
3804-
.addUse(GR.getSPIRVTypeID(ResEltType))
3805-
.addUse(Vec2Reg)
3806-
.addImm(0)
3807-
.constrainAllUses(TII, TRI, RBI);
3808-
3809-
ComponentRegisters.emplace_back(FReg);
3810-
}
3811-
3812-
MachineInstrBuilder MIB =
3813-
BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeConstruct))
3814-
.addDef(ResVReg)
3815-
.addUse(GR.getSPIRVTypeID(ResType));
3816-
3817-
for (Register ComponentReg : ComponentRegisters)
3818-
MIB.addUse(ComponentReg);
3819-
return Result && MIB.constrainAllUses(TII, TRI, RBI);
3820-
3821-
} else if (SrcType.isScalar()) {
3822-
// just a scalar uint to convert
3823-
Register Vec2Reg = MRI->createVirtualRegister(Vec2RegClass);
3824-
Result &= selectExtInst(Vec2Reg, Vec2ResType, I, GL::UnpackHalf2x16);
3825-
Result &=
3826-
BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract))
3827-
.addDef(ResVReg)
3828-
.addUse(GR.getSPIRVTypeID(ResType))
3829-
.addUse(Vec2Reg)
3830-
.addImm(0)
3831-
.constrainAllUses(TII, TRI, RBI);
3832-
return Result;
3833-
}
3834-
return false;
3835-
}
3836-
38373751
void SPIRVInstructionSelector::decorateUsesAsNonUniform(
38383752
Register &NonUniformReg) const {
38393753
llvm::SmallVector<Register> WorkList = {NonUniformReg};

0 commit comments

Comments
 (0)