Skip to content

Commit 3cdac06

Browse files
authored
[HLSL][SPIRV][DXIL] Implement dot4add_i8packed intrinsic (#113623)
- create a clang built-in in Builtins.td - link dot4add_i8packed in hlsl_intrinsics.h - add lowering to spirv backend through expansion of operation as OPSDot is missing up to SPIRV 1.6 in SPIRVInstructionSelector.cpp - add lowering to spirv backend using OpSDot in applicable SPIRV version or if SPV_KHR_integer_dot_product is enabled - add dot4add_i8packed intrinsic to IntrinsicsDirectX.td and mapping to DXIL.td op Dot4AddI8Packed - add tests for HLSL intrinsic lowering to dx/spv intrinsic in dot4add_i8packed.hlsl - add tests for sema checks in dot4add_i8packed-errors.hlsl - add test of spir-v lowering in SPIRV/dot4add_i8packed.ll - add test to dxil lowering in DirectX/dot4add_i8packed.ll Resolves #99220
1 parent 04aaa35 commit 3cdac06

File tree

18 files changed

+347
-27
lines changed

18 files changed

+347
-27
lines changed

clang/include/clang/Basic/Builtins.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4792,6 +4792,12 @@ def HLSLDotProduct : LangBuiltin<"HLSL_LANG"> {
47924792
let Prototype = "void(...)";
47934793
}
47944794

4795+
def HLSLDot4AddI8Packed : LangBuiltin<"HLSL_LANG"> {
4796+
let Spellings = ["__builtin_hlsl_dot4add_i8packed"];
4797+
let Attributes = [NoThrow, Const];
4798+
let Prototype = "int(unsigned int, unsigned int, int)";
4799+
}
4800+
47954801
def HLSLFrac : LangBuiltin<"HLSL_LANG"> {
47964802
let Spellings = ["__builtin_hlsl_elementwise_frac"];
47974803
let Attributes = [NoThrow, Const];

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18855,7 +18855,17 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
1885518855
/*ReturnType=*/T0->getScalarType(),
1885618856
getDotProductIntrinsic(CGM.getHLSLRuntime(), VecTy0->getElementType()),
1885718857
ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.dot");
18858-
} break;
18858+
}
18859+
case Builtin::BI__builtin_hlsl_dot4add_i8packed: {
18860+
Value *A = EmitScalarExpr(E->getArg(0));
18861+
Value *B = EmitScalarExpr(E->getArg(1));
18862+
Value *C = EmitScalarExpr(E->getArg(2));
18863+
18864+
Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddI8PackedIntrinsic();
18865+
return Builder.CreateIntrinsic(
18866+
/*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr,
18867+
"hlsl.dot4add.i8packed");
18868+
}
1885918869
case Builtin::BI__builtin_hlsl_lerp: {
1886018870
Value *X = EmitScalarExpr(E->getArg(0));
1886118871
Value *Y = EmitScalarExpr(E->getArg(1));

clang/lib/CodeGen/CGHLSLRuntime.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ class CGHLSLRuntime {
8989
GENERATE_HLSL_INTRINSIC_FUNCTION(FDot, fdot)
9090
GENERATE_HLSL_INTRINSIC_FUNCTION(SDot, sdot)
9191
GENERATE_HLSL_INTRINSIC_FUNCTION(UDot, udot)
92+
GENERATE_HLSL_INTRINSIC_FUNCTION(Dot4AddI8Packed, dot4add_i8packed)
9293
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveIsFirstLane, wave_is_first_lane)
9394
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveReadLaneAt, wave_readlane)
9495

clang/lib/Headers/hlsl/hlsl_intrinsics.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -934,6 +934,16 @@ uint64_t dot(uint64_t3, uint64_t3);
934934
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot)
935935
uint64_t dot(uint64_t4, uint64_t4);
936936

937+
//===----------------------------------------------------------------------===//
938+
// dot4add builtins
939+
//===----------------------------------------------------------------------===//
940+
941+
/// \fn int dot4add_i8packed(uint A, uint B, int C)
942+
943+
_HLSL_AVAILABILITY(shadermodel, 6.4)
944+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_dot4add_i8packed)
945+
int dot4add_i8packed(unsigned int, unsigned int, int);
946+
937947
//===----------------------------------------------------------------------===//
938948
// exp builtins
939949
//===----------------------------------------------------------------------===//
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// RUN: %clang_cc1 -finclude-default-header -triple \
2+
// RUN: dxil-pc-shadermodel6.3-compute %s -emit-llvm -disable-llvm-passes -o - | \
3+
// RUN: FileCheck %s -DTARGET=dx
4+
// RUN: %clang_cc1 -finclude-default-header -triple \
5+
// RUN: spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \
6+
// RUN: FileCheck %s -DTARGET=spv
7+
8+
// Test basic lowering to runtime function call.
9+
10+
// CHECK-LABEL: test
11+
int test(uint a, uint b, int c) {
12+
// CHECK: %[[RET:.*]] = call [[TY:i32]] @llvm.[[TARGET]].dot4add.i8packed([[TY]] %[[#]], [[TY]] %[[#]], [[TY]] %[[#]])
13+
// CHECK: ret [[TY]] %[[RET]]
14+
return dot4add_i8packed(a, b, c);
15+
}
16+
17+
// CHECK: declare [[TY]] @llvm.[[TARGET]].dot4add.i8packed([[TY]], [[TY]], [[TY]])
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only -disable-llvm-passes -verify
2+
3+
int test_too_few_arg0() {
4+
return __builtin_hlsl_dot4add_i8packed();
5+
// expected-error@-1 {{too few arguments to function call, expected 3, have 0}}
6+
}
7+
8+
int test_too_few_arg1(int p0) {
9+
return __builtin_hlsl_dot4add_i8packed(p0);
10+
// expected-error@-1 {{too few arguments to function call, expected 3, have 1}}
11+
}
12+
13+
int test_too_few_arg2(int p0) {
14+
return __builtin_hlsl_dot4add_i8packed(p0, p0);
15+
// expected-error@-1 {{too few arguments to function call, expected 3, have 2}}
16+
}
17+
18+
int test_too_many_arg(int p0) {
19+
return __builtin_hlsl_dot4add_i8packed(p0, p0, p0, p0);
20+
// expected-error@-1 {{too many arguments to function call, expected 3, have 4}}
21+
}
22+
23+
struct S { float f; };
24+
25+
int test_expr_struct_type_check(S p0, int p1) {
26+
return __builtin_hlsl_dot4add_i8packed(p0, p1, p1);
27+
// expected-error@-1 {{no viable conversion from 'S' to 'unsigned int'}}
28+
}

llvm/docs/SPIRVUsage.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,8 @@ list of supported SPIR-V extensions, sorted alphabetically by their extension na
179179
- Provides additional information to a compiler, similar to the llvm.assume and llvm.expect intrinsics.
180180
* - ``SPV_KHR_float_controls``
181181
- Provides new execution modes to control floating-point computations by overriding an implementation’s default behavior for rounding modes, denormals, signed zero, and infinities.
182+
* - ``SPV_KHR_integer_dot_product``
183+
- Adds instructions for dot product operations on integer vectors with optional accumulation. Integer vectors includes 4-component vector of 8 bit integers and 4-component vectors of 8 bit integers packed into 32-bit integers.
182184
* - ``SPV_KHR_linkonce_odr``
183185
- Allows to use the LinkOnceODR linkage type that lets a function or global variable to be merged with other functions or global variables of the same name when linkage occurs.
184186
* - ``SPV_KHR_no_integer_wrap_decoration``

llvm/include/llvm/IR/IntrinsicsDirectX.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ def int_dx_udot :
6969
DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
7070
[llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>],
7171
[IntrNoMem, Commutative] >;
72+
def int_dx_dot4add_i8packed : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
7273

7374
def int_dx_frac : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
7475
def int_dx_degrees : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;

llvm/include/llvm/IR/IntrinsicsSPIRV.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ let TargetPrefix = "spv" in {
8383
DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
8484
[llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>],
8585
[IntrNoMem, Commutative] >;
86+
def int_spv_dot4add_i8packed : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
8687
def int_spv_wave_is_first_lane : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrConvergent]>;
8788
def int_spv_wave_readlane : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
8889
def int_spv_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty], [IntrNoMem]>;

llvm/lib/Target/DirectX/DXIL.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,16 @@ def SplitDouble : DXILOp<102, splitDouble> {
788788
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
789789
}
790790

791+
def Dot4AddI8Packed : DXILOp<163, dot4AddPacked> {
792+
let Doc = "signed dot product of 4 x i8 vectors packed into i32, with "
793+
"accumulate to i32";
794+
let LLVMIntrinsic = int_dx_dot4add_i8packed;
795+
let arguments = [Int32Ty, Int32Ty, Int32Ty];
796+
let result = Int32Ty;
797+
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
798+
let stages = [Stages<DXIL1_0, [all_stages]>];
799+
}
800+
791801
def AnnotateHandle : DXILOp<216, annotateHandle> {
792802
let Doc = "annotate handle with resource properties";
793803
let arguments = [HandleTy, ResPropsTy];

0 commit comments

Comments
 (0)