Skip to content

Commit 414b07f

Browse files
committed
[HLSL][SPIRV][DXIL] Implement dot4add_u8packed intrinsic
- create a clang built-in in Builtins.td - link dot4add_u8packed in hlsl_intrinsics.h - add lowering to spirv backend through expansion of operation as OpUDot is missing up to SPIRV 1.6 in SPIRVInstructionSelector.cpp - add lowering to spirv backend using OpUDot in applicable SPIRV version or if SPV_KHR_integer_dot_product is enabled - add dot4add_u8packed intrinsic to IntrinsicsDirectX.td and mapping to DXIL.td op Dot4AddU8Packed - add tests for HLSL intrinsic lowering to dx/spv intrinsic in dot4add_u8packed.hlsl - add tests for sema checks in dot4add_u8packed-errors.hlsl - add test of spir-v lowering in SPIRV/dot4add_u8packed.ll - add test to dxil lowering in DirectX/dot4add_u8packed.ll
1 parent 3cdac06 commit 414b07f

File tree

12 files changed

+169
-8
lines changed

12 files changed

+169
-8
lines changed

clang/include/clang/Basic/Builtins.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4798,6 +4798,12 @@ def HLSLDot4AddI8Packed : LangBuiltin<"HLSL_LANG"> {
47984798
let Prototype = "int(unsigned int, unsigned int, int)";
47994799
}
48004800

4801+
def HLSLDot4AddU8Packed : LangBuiltin<"HLSL_LANG"> {
4802+
let Spellings = ["__builtin_hlsl_dot4add_u8packed"];
4803+
let Attributes = [NoThrow, Const];
4804+
let Prototype = "unsigned int(unsigned int, unsigned int, unsigned int)";
4805+
}
4806+
48014807
def HLSLFrac : LangBuiltin<"HLSL_LANG"> {
48024808
let Spellings = ["__builtin_hlsl_elementwise_frac"];
48034809
let Attributes = [NoThrow, Const];

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18866,6 +18866,16 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
1886618866
/*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr,
1886718867
"hlsl.dot4add.i8packed");
1886818868
}
18869+
case Builtin::BI__builtin_hlsl_dot4add_u8packed: {
18870+
Value *A = EmitScalarExpr(E->getArg(0));
18871+
Value *B = EmitScalarExpr(E->getArg(1));
18872+
Value *C = EmitScalarExpr(E->getArg(2));
18873+
18874+
Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddU8PackedIntrinsic();
18875+
return Builder.CreateIntrinsic(
18876+
/*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr,
18877+
"hlsl.dot4add.u8packed");
18878+
}
1886918879
case Builtin::BI__builtin_hlsl_lerp: {
1887018880
Value *X = EmitScalarExpr(E->getArg(0));
1887118881
Value *Y = EmitScalarExpr(E->getArg(1));

clang/lib/CodeGen/CGHLSLRuntime.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ class CGHLSLRuntime {
9090
GENERATE_HLSL_INTRINSIC_FUNCTION(SDot, sdot)
9191
GENERATE_HLSL_INTRINSIC_FUNCTION(UDot, udot)
9292
GENERATE_HLSL_INTRINSIC_FUNCTION(Dot4AddI8Packed, dot4add_i8packed)
93+
GENERATE_HLSL_INTRINSIC_FUNCTION(Dot4AddU8Packed, dot4add_u8packed)
9394
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveIsFirstLane, wave_is_first_lane)
9495
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveReadLaneAt, wave_readlane)
9596

clang/lib/Headers/hlsl/hlsl_intrinsics.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -942,7 +942,13 @@ uint64_t dot(uint64_t4, uint64_t4);
942942

943943
_HLSL_AVAILABILITY(shadermodel, 6.4)
944944
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot4add_i8packed)
945-
int dot4add_i8packed(unsigned int, unsigned int, int);
945+
int dot4add_i8packed(uint, uint, int);
946+
947+
/// \fn uint dot4add_i8packed(uint A, uint B, uint C)
948+
949+
_HLSL_AVAILABILITY(shadermodel, 6.4)
950+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot4add_u8packed)
951+
uint dot4add_u8packed(uint, uint, uint);
946952

947953
//===----------------------------------------------------------------------===//
948954
// exp builtins
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
2+
// RUN: %clang_cc1 -finclude-default-header -triple \
3+
// RUN: dxil-pc-shadermodel6.3-compute %s -emit-llvm -disable-llvm-passes -o - | \
4+
// RUN: FileCheck %s -DTARGET=dx
5+
// RUN: %clang_cc1 -finclude-default-header -triple \
6+
// RUN: spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \
7+
// RUN: FileCheck %s -DTARGET=spv
8+
9+
// Test basic lowering to runtime function call.
10+
11+
// CHECK-LABEL: test
12+
uint test(uint a, uint b, uint c) {
13+
// CHECK: %[[RET:.*]] = call [[TY:i32]] @llvm.[[TARGET]].dot4add.u8packed([[TY]] %[[#]], [[TY]] %[[#]], [[TY]] %[[#]])
14+
// CHECK: ret [[TY]] %[[RET]]
15+
return dot4add_u8packed(a, b, c);
16+
}
17+
18+
// CHECK: declare [[TY]] @llvm.[[TARGET]].dot4add.u8packed([[TY]], [[TY]], [[TY]])
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only -disable-llvm-passes -verify
2+
3+
int test_too_few_arg0() {
4+
return __builtin_hlsl_dot4add_u8packed();
5+
// expected-error@-1 {{too few arguments to function call, expected 3, have 0}}
6+
}
7+
8+
int test_too_few_arg1(int p0) {
9+
return __builtin_hlsl_dot4add_u8packed(p0);
10+
// expected-error@-1 {{too few arguments to function call, expected 3, have 1}}
11+
}
12+
13+
int test_too_few_arg2(uint p0) {
14+
return __builtin_hlsl_dot4add_u8packed(p0, p0);
15+
// expected-error@-1 {{too few arguments to function call, expected 3, have 2}}
16+
}
17+
18+
int test_too_many_arg(uint p0) {
19+
return __builtin_hlsl_dot4add_u8packed(p0, p0, p0, p0);
20+
// expected-error@-1 {{too many arguments to function call, expected 3, have 4}}
21+
}
22+
23+
struct S { float f; };
24+
25+
int test_expr_struct_type_check(S p0, uint p1) {
26+
return __builtin_hlsl_dot4add_u8packed(p1, p1, p0);
27+
// expected-error@-1 {{no viable conversion from 'S' to 'unsigned int'}}
28+
}

llvm/include/llvm/IR/IntrinsicsDirectX.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ def int_dx_udot :
6969
DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
7070
[llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>],
7171
[IntrNoMem, Commutative] >;
72-
def int_dx_dot4add_i8packed : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
72+
def int_dx_dot4add_i8packed : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
73+
def int_dx_dot4add_u8packed : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
7374

7475
def int_dx_frac : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
7576
def int_dx_degrees : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;

llvm/include/llvm/IR/IntrinsicsSPIRV.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ let TargetPrefix = "spv" in {
8484
[llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>],
8585
[IntrNoMem, Commutative] >;
8686
def int_spv_dot4add_i8packed : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
87+
def int_spv_dot4add_u8packed : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
8788
def int_spv_wave_is_first_lane : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrConvergent]>;
8889
def int_spv_wave_readlane : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
8990
def int_spv_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty], [IntrNoMem]>;

llvm/lib/Target/DirectX/DXIL.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -798,6 +798,16 @@ def Dot4AddI8Packed : DXILOp<163, dot4AddPacked> {
798798
let stages = [Stages<DXIL1_0, [all_stages]>];
799799
}
800800

801+
def Dot4AddU8Packed : DXILOp<164, dot4AddPacked> {
802+
let Doc = "unsigned dot product of 4 x i8 vectors packed into i32, with "
803+
"accumulate to i32";
804+
let LLVMIntrinsic = int_dx_dot4add_u8packed;
805+
let arguments = [Int32Ty, Int32Ty, Int32Ty];
806+
let result = Int32Ty;
807+
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
808+
let stages = [Stages<DXIL1_0, [all_stages]>];
809+
}
810+
801811
def AnnotateHandle : DXILOp<216, annotateHandle> {
802812
let Doc = "annotate handle with resource properties";
803813
let arguments = [HandleTy, ResPropsTy];

llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1743,7 +1743,7 @@ bool SPIRVInstructionSelector::selectDot4AddPackedExpansion(
17431743
assert(I.getOperand(4).isReg());
17441744
MachineBasicBlock &BB = *I.getParent();
17451745

1746-
bool Result = false;
1746+
bool Result = true;
17471747

17481748
// Acc = C
17491749
Register Acc = I.getOperand(4).getReg();
@@ -1755,7 +1755,7 @@ bool SPIRVInstructionSelector::selectDot4AddPackedExpansion(
17551755
for (unsigned i = 0; i < 4; i++) {
17561756
// A[i]
17571757
Register AElt = MRI->createVirtualRegister(&SPIRV::IDRegClass);
1758-
Result |= BuildMI(BB, I, I.getDebugLoc(), TII.get(ExtractOp))
1758+
Result &= BuildMI(BB, I, I.getDebugLoc(), TII.get(ExtractOp))
17591759
.addDef(AElt)
17601760
.addUse(GR.getSPIRVTypeID(ResType))
17611761
.addUse(I.getOperand(2).getReg())
@@ -1765,7 +1765,7 @@ bool SPIRVInstructionSelector::selectDot4AddPackedExpansion(
17651765

17661766
// B[i]
17671767
Register BElt = MRI->createVirtualRegister(&SPIRV::IDRegClass);
1768-
Result |= BuildMI(BB, I, I.getDebugLoc(), TII.get(ExtractOp))
1768+
Result &= BuildMI(BB, I, I.getDebugLoc(), TII.get(ExtractOp))
17691769
.addDef(BElt)
17701770
.addUse(GR.getSPIRVTypeID(ResType))
17711771
.addUse(I.getOperand(3).getReg())
@@ -1775,7 +1775,7 @@ bool SPIRVInstructionSelector::selectDot4AddPackedExpansion(
17751775

17761776
// A[i] * B[i]
17771777
Register Mul = MRI->createVirtualRegister(&SPIRV::IDRegClass);
1778-
Result |= BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpIMulS))
1778+
Result &= BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpIMulS))
17791779
.addDef(Mul)
17801780
.addUse(GR.getSPIRVTypeID(ResType))
17811781
.addUse(AElt)
@@ -1784,7 +1784,7 @@ bool SPIRVInstructionSelector::selectDot4AddPackedExpansion(
17841784

17851785
// Discard 24 highest-bits so that stored i32 register is i8 equivalent
17861786
Register MaskMul = MRI->createVirtualRegister(&SPIRV::IDRegClass);
1787-
Result |= BuildMI(BB, I, I.getDebugLoc(), TII.get(ExtractOp))
1787+
Result &= BuildMI(BB, I, I.getDebugLoc(), TII.get(ExtractOp))
17881788
.addDef(MaskMul)
17891789
.addUse(GR.getSPIRVTypeID(ResType))
17901790
.addUse(Mul)
@@ -1795,7 +1795,7 @@ bool SPIRVInstructionSelector::selectDot4AddPackedExpansion(
17951795
// Acc = Acc + A[i] * B[i]
17961796
Register Sum =
17971797
i < 3 ? MRI->createVirtualRegister(&SPIRV::IDRegClass) : ResVReg;
1798-
Result |= BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpIAddS))
1798+
Result &= BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpIAddS))
17991799
.addDef(Sum)
18001800
.addUse(GR.getSPIRVTypeID(ResType))
18011801
.addUse(Acc)
@@ -2646,6 +2646,11 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
26462646
STI.isAtLeastSPIRVVer(VersionTuple(1, 6)))
26472647
return selectDot4AddPacked<true>(ResVReg, ResType, I);
26482648
return selectDot4AddPackedExpansion<true>(ResVReg, ResType, I);
2649+
case Intrinsic::spv_dot4add_u8packed:
2650+
if (STI.canUseExtension(SPIRV::Extension::SPV_KHR_integer_dot_product) ||
2651+
STI.isAtLeastSPIRVVer(VersionTuple(1, 6)))
2652+
return selectDot4AddPacked<false>(ResVReg, ResType, I);
2653+
return selectDot4AddPackedExpansion<false>(ResVReg, ResType, I);
26492654
case Intrinsic::spv_all:
26502655
return selectAll(ResVReg, ResType, I);
26512656
case Intrinsic::spv_any:

0 commit comments

Comments
 (0)