Skip to content

Commit 09a8c3b

Browse files
authored
Merge pull request #5 from jimblandy/fx-dot4add-packed-return-type
Fix the return types of `dot4add_i8packed` and `dot4add_u8packed`.
2 parents c75bb05 + f895647 commit 09a8c3b

File tree

3 files changed

+39
-5
lines changed

3 files changed

+39
-5
lines changed

tools/clang/lib/Sema/SemaHLSL.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6216,8 +6216,8 @@ bool HLSLExternalSource::MatchArguments(
62166216
(iArg != retArgIdx && retTypeIdx == pIntrinsicArg->uComponentTypeId);
62176217
// For literal arg which don't affect return type, find concrete type.
62186218
// For literal arg affect return type,
6219-
// TryEvalIntrinsic in CGHLSLMS.cpp will take care of cases
6220-
// where all argumentss are literal.
6219+
// TryEvalIntrinsic in CGHLSLMSFinishCodeGen.cpp will take care of
6220+
// cases where all arguments are literal.
62216221
// CombineBasicTypes will cover the rest cases.
62226222
if (!affectRetType) {
62236223
TypeInfoEltKind =
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// RUN: %dxc /enable-16bit-types /T cs_6_8 %s | FileCheck %s
2+
3+
// Compiling this HLSL would fail this assertion in TranslateDot4AddPacked:
4+
//
5+
// DXASSERT(
6+
// !accTy->isVectorTy() && accTy->isIntegerTy(32),
7+
// "otherwise, unexpected vector support in high level intrinsic template");
8+
//
9+
// Bug was fixed by changing the declarations of dot4add_i8packed and
10+
// dot4add_u8packed in utils/hct/gen_intrin_main.txt to simply write
11+
// out their argument and return types, rather than using the $typeN
12+
// reference syntax.
13+
14+
// CHECK: call i32 @dx.op.dot4AddPacked.i32{{.*}}Dot4AddI8Packed(acc,a,b)
15+
// CHECK: call i32 @dx.op.dot4AddPacked.i32{{.*}}Dot4AddU8Packed(acc,a,b)
16+
// CHECK: call float @dx.op.dot2AddHalf.f32{{.*}}Dot2AddHalf(acc,ax,ay,bx,by)
17+
18+
RWByteAddressBuffer buf;
19+
20+
[numthreads(1, 1, 1)]
21+
void main()
22+
{
23+
int a = dot4add_i8packed(0, 0, 0);
24+
int b = dot4add_i8packed(0, 0, a);
25+
buf.Store<int>(0, b);
26+
27+
uint c = dot4add_u8packed(0, 0, 0);
28+
uint d = dot4add_u8packed(0, 0, c);
29+
buf.Store<uint>(4, d);
30+
31+
float e = dot2add(half2(0,0), half2(0,0), 1.0);
32+
float f = dot2add(half2(0,0), half2(0,0), e);
33+
buf.Store<float>(8, f);
34+
}

utils/hct/gen_intrin_main.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -336,9 +336,9 @@ float<4,3> [[rn]] ObjectToWorld4x3();
336336
float<4,3> [[rn]] WorldToObject4x3();
337337

338338
// Packed dot products with accumulate:
339-
$type3 [[rn]] dot4add_u8packed(in uint a, in $type1 b, in uint c);
340-
$type3 [[rn]] dot4add_i8packed(in uint a, in $type1 b, in int c);
341-
$type3 [[rn]] dot2add(in float16_t<2> a, in $type1 b, in float c);
339+
uint [[rn]] dot4add_u8packed(in uint a, in $type1 b, in uint c);
340+
int [[rn]] dot4add_i8packed(in uint a, in $type1 b, in int c);
341+
float [[rn]] dot2add(in float16_t<2> a, in $type1 b, in float c);
342342

343343
// Unpacking intrinsics
344344
int16_t<4> [[rn]] unpack_s8s16(in p32i8 pk);

0 commit comments

Comments
 (0)