Skip to content

Commit 1e194fd

Browse files
committed
Implement AddUint64 HLSL codegen and sema
1 parent 95d993a commit 1e194fd

File tree

7 files changed

+239
-1
lines changed

7 files changed

+239
-1
lines changed

clang/include/clang/Basic/Builtins.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4753,6 +4753,12 @@ def GetDeviceSideMangledName : LangBuiltin<"CUDA_LANG"> {
47534753
}
47544754

47554755
// HLSL
4756+
def HLSLAddUint64: LangBuiltin<"HLSL_LANG"> {
4757+
let Spellings = ["__builtin_hlsl_adduint64"];
4758+
let Attributes = [NoThrow, Const];
4759+
let Prototype = "void(...)";
4760+
}
4761+
47564762
def HLSLResourceGetPointer : LangBuiltin<"HLSL_LANG"> {
47574763
let Spellings = ["__builtin_hlsl_resource_getpointer"];
47584764
let Attributes = [NoThrow];

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10623,7 +10623,7 @@ def err_second_argument_to_cwsc_not_pointer : Error<
1062310623
"second argument to __builtin_call_with_static_chain must be of pointer type">;
1062410624

1062510625
def err_vector_incorrect_num_elements : Error<
10626-
"%select{too many|too few}0 elements in vector %select{initialization|operand}3 (expected %1 elements, have %2)">;
10626+
"%select{too many|too few|incorrect number of}0 elements in vector %select{initialization|operand}3 (expected %1 elements, have %2)">;
1062710627
def err_altivec_empty_initializer : Error<"expected initializer">;
1062810628

1062910629
def err_invalid_neon_type_code : Error<

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19105,6 +19105,51 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
1910519105
return nullptr;
1910619106

1910719107
switch (BuiltinID) {
19108+
case Builtin::BI__builtin_hlsl_adduint64: {
19109+
Value *OpA = EmitScalarExpr(E->getArg(0));
19110+
Value *OpB = EmitScalarExpr(E->getArg(1));
19111+
assert(E->getArg(0)->getType()->hasIntegerRepresentation() &&
19112+
E->getArg(1)->getType()->hasIntegerRepresentation() &&
19113+
"AddUint64 operands must have an integer representation");
19114+
assert(((E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() ==
19115+
2 &&
19116+
E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() ==
19117+
2) ||
19118+
(E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() ==
19119+
4 &&
19120+
E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() ==
19121+
4)) &&
19122+
"input vectors must have 2 or 4 elements each");
19123+
19124+
llvm::Value *Result = PoisonValue::get(OpA->getType());
19125+
uint64_t NumElements =
19126+
E->getArg(0)->getType()->castAs<VectorType>()->getNumElements();
19127+
for (uint64_t i = 0; i < NumElements / 2; ++i) {
19128+
19129+
// Obtain low and high words of inputs A and B
19130+
llvm::Value *LowA = Builder.CreateExtractElement(OpA, 2 * i + 0);
19131+
llvm::Value *HighA = Builder.CreateExtractElement(OpA, 2 * i + 1);
19132+
llvm::Value *LowB = Builder.CreateExtractElement(OpB, 2 * i + 0);
19133+
llvm::Value *HighB = Builder.CreateExtractElement(OpB, 2 * i + 1);
19134+
19135+
// Use an uadd_with_overflow to compute the sum of low words and obtain a
19136+
// carry value
19137+
llvm::Value *Carry;
19138+
llvm::Value *LowSum = EmitOverflowIntrinsic(
19139+
*this, llvm::Intrinsic::uadd_with_overflow, LowA, LowB, Carry);
19140+
llvm::Value *ZExtCarry = Builder.CreateZExt(Carry, HighA->getType());
19141+
19142+
// Sum the high words and the carry
19143+
llvm::Value *HighSum = Builder.CreateAdd(HighA, HighB);
19144+
llvm::Value *HighSumPlusCarry = Builder.CreateAdd(HighSum, ZExtCarry);
19145+
19146+
// Insert the low and high word sums into the result vector
19147+
Result = Builder.CreateInsertElement(Result, LowSum, 2 * i + 0);
19148+
Result = Builder.CreateInsertElement(Result, HighSumPlusCarry, 2 * i + 1,
19149+
"hlsl.AddUint64");
19150+
}
19151+
return Result;
19152+
}
1910819153
case Builtin::BI__builtin_hlsl_resource_getpointer: {
1910919154
Value *HandleOp = EmitScalarExpr(E->getArg(0));
1911019155
Value *IndexOp = EmitScalarExpr(E->getArg(1));

clang/lib/Headers/hlsl/hlsl_intrinsics.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,27 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos)
138138
float4 acos(float4);
139139

140140
//===----------------------------------------------------------------------===//
141+
// AddUint64 builtins
142+
//===----------------------------------------------------------------------===//
143+
144+
/// \fn T AddUint64(T a, T b)
145+
/// \brief Implements unsigned 64-bit integer addition using pairs of unsigned
146+
/// 32-bit integers.
147+
/// \param x [in] The first unsigned 32-bit integer pair(s)
148+
/// \param y [in] The second unsigned 32-bit integer pair(s)
149+
///
150+
/// This function takes one or two pairs (low, high) of unsigned 32-bit integer
151+
/// values and returns pairs (low, high) of unsigned 32-bit integer
152+
/// values representing the result of unsigned 64-bit integer addition.
153+
154+
_HLSL_AVAILABILITY(shadermodel, 6.0)
155+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_adduint64)
156+
uint32_t2 AddUint64(uint32_t2, uint32_t2);
157+
_HLSL_AVAILABILITY(shadermodel, 6.0)
158+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_adduint64)
159+
uint32_t4 AddUint64(uint32_t4, uint32_t4);
160+
161+
// //===----------------------------------------------------------------------===//
141162
// all builtins
142163
//===----------------------------------------------------------------------===//
143164

clang/lib/Sema/SemaHLSL.cpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "clang/AST/Type.h"
2222
#include "clang/AST/TypeLoc.h"
2323
#include "clang/Basic/Builtins.h"
24+
#include "clang/Basic/DiagnosticParse.h"
2425
#include "clang/Basic/DiagnosticSema.h"
2526
#include "clang/Basic/IdentifierTable.h"
2627
#include "clang/Basic/LLVM.h"
@@ -2023,6 +2024,18 @@ static bool CheckAllArgsHaveFloatRepresentation(Sema *S, CallExpr *TheCall) {
20232024
checkAllFloatTypes);
20242025
}
20252026

2027+
static bool CheckUnsignedIntegerRepresentation(Sema *S, CallExpr *TheCall) {
2028+
auto checkUnsignedInteger = [](clang::QualType PassedType) -> bool {
2029+
clang::QualType BaseType =
2030+
PassedType->isVectorType()
2031+
? PassedType->getAs<clang::VectorType>()->getElementType()
2032+
: PassedType;
2033+
return !BaseType->isUnsignedIntegerType();
2034+
};
2035+
return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.UnsignedIntTy,
2036+
checkUnsignedInteger);
2037+
}
2038+
20262039
static bool CheckFloatOrHalfRepresentations(Sema *S, CallExpr *TheCall) {
20272040
auto checkFloatorHalf = [](clang::QualType PassedType) -> bool {
20282041
clang::QualType BaseType =
@@ -2214,6 +2227,42 @@ static bool CheckResourceHandle(
22142227
// returning an ExprError
22152228
bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
22162229
switch (BuiltinID) {
2230+
case Builtin::BI__builtin_hlsl_adduint64: {
2231+
if (SemaRef.checkArgCount(TheCall, 2))
2232+
return true;
2233+
if (CheckVectorElementCallArgs(&SemaRef, TheCall))
2234+
return true;
2235+
if (CheckUnsignedIntegerRepresentation(&SemaRef, TheCall))
2236+
return true;
2237+
2238+
// CheckVectorElementCallArgs(...) guarantees both args are the same type.
2239+
assert(TheCall->getArg(0)->getType() == TheCall->getArg(1)->getType() &&
2240+
"Both args must be of the same type");
2241+
2242+
// ensure both args are vectors
2243+
auto *VTy = TheCall->getArg(0)->getType()->getAs<VectorType>();
2244+
if (!VTy) {
2245+
SemaRef.Diag(TheCall->getBeginLoc(),
2246+
diag::err_vector_incorrect_num_elements)
2247+
<< 2 << "2 or 4" << 1 << /*operand*/ 1;
2248+
return true;
2249+
}
2250+
2251+
// ensure both args have 2 elements, or both args have 4 elements
2252+
int NumElementsArg1 = VTy->getNumElements();
2253+
if (NumElementsArg1 != 2 && NumElementsArg1 != 4) {
2254+
SemaRef.Diag(TheCall->getBeginLoc(),
2255+
diag::err_vector_incorrect_num_elements)
2256+
<< 2 << "2 or 4" << NumElementsArg1 << /*operand*/ 1;
2257+
return true;
2258+
}
2259+
2260+
ExprResult A = TheCall->getArg(0);
2261+
QualType ArgTyA = A.get()->getType();
2262+
// return type is the same as the input type
2263+
TheCall->setType(ArgTyA);
2264+
break;
2265+
}
22172266
case Builtin::BI__builtin_hlsl_resource_getpointer: {
22182267
if (SemaRef.checkArgCount(TheCall, 2) ||
22192268
CheckResourceHandle(&SemaRef, TheCall, 0) ||
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \
3+
// RUN: -emit-llvm -disable-llvm-passes -o - | \
4+
// RUN: FileCheck %s --check-prefixes=CHECK
5+
6+
7+
// CHECK-LABEL: define noundef <2 x i32> @_Z20test_AddUint64_uint2Dv2_jS_(
8+
// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
9+
// CHECK-NEXT: [[ENTRY:.*:]]
10+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x i32>, align 8
11+
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i32>, align 8
12+
// CHECK-NEXT: store <2 x i32> [[A]], ptr [[A_ADDR]], align 8
13+
// CHECK-NEXT: store <2 x i32> [[B]], ptr [[B_ADDR]], align 8
14+
// CHECK-NEXT: [[A_LOAD:%.*]] = load <2 x i32>, ptr [[A_ADDR]], align 8
15+
// CHECK-NEXT: [[B_LOAD:%.*]] = load <2 x i32>, ptr [[B_ADDR]], align 8
16+
// CHECK-NEXT: [[LowA:%.*]] = extractelement <2 x i32> [[A_LOAD]], i64 0
17+
// CHECK-NEXT: [[HighA:%.*]] = extractelement <2 x i32> [[A_LOAD]], i64 1
18+
// CHECK-NEXT: [[LowB:%.*]] = extractelement <2 x i32> [[B_LOAD]], i64 0
19+
// CHECK-NEXT: [[HighB:%.*]] = extractelement <2 x i32> [[B_LOAD]], i64 1
20+
// CHECK-NEXT: [[UAddc:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LowA]], i32 [[LowB]])
21+
// CHECK-NEXT: [[Carry:%.*]] = extractvalue { i32, i1 } [[UAddc]], 1
22+
// CHECK-NEXT: [[LowSum:%.*]] = extractvalue { i32, i1 } [[UAddc]], 0
23+
// CHECK-NEXT: [[CarryZExt:%.*]] = zext i1 [[Carry]] to i32
24+
// CHECK-NEXT: [[HighSum:%.*]] = add i32 [[HighA]], [[HighB]]
25+
// CHECK-NEXT: [[HighSumPlusCarry:%.*]] = add i32 [[HighSum]], [[CarryZExt]]
26+
// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[LowSum]], i64 0
27+
// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = insertelement <2 x i32> [[HLSL_ADDUINT64_UPTO0]], i32 [[HighSumPlusCarry]], i64 1
28+
// CHECK-NEXT: ret <2 x i32> [[HLSL_ADDUINT64]]
29+
//
30+
uint2 test_AddUint64_uint2(uint2 a, uint2 b) {
31+
return AddUint64(a, b);
32+
}
33+
34+
// CHECK-LABEL: define noundef <4 x i32> @_Z20test_AddUint64_uint4Dv4_jS_(
35+
// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
36+
// CHECK-NEXT: [[ENTRY:.*:]]
37+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16
38+
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16
39+
// CHECK-NEXT: store <4 x i32> [[A]], ptr [[A_ADDR]], align 16
40+
// CHECK-NEXT: store <4 x i32> [[B]], ptr [[B_ADDR]], align 16
41+
// CHECK-NEXT: [[A_LOAD:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
42+
// CHECK-NEXT: [[B_LOAD:%.*]] = load <4 x i32>, ptr [[B_ADDR]], align 16
43+
// CHECK-NEXT: [[LowA:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 0
44+
// CHECK-NEXT: [[HighA:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 1
45+
// CHECK-NEXT: [[LowB:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 0
46+
// CHECK-NEXT: [[HighB:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 1
47+
// CHECK-NEXT: [[UAddc:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LowA]], i32 [[LowB]])
48+
// CHECK-NEXT: [[Carry:%.*]] = extractvalue { i32, i1 } [[UAddc]], 1
49+
// CHECK-NEXT: [[LowSum:%.*]] = extractvalue { i32, i1 } [[UAddc]], 0
50+
// CHECK-NEXT: [[CarryZExt:%.*]] = zext i1 [[Carry]] to i32
51+
// CHECK-NEXT: [[HighSum:%.*]] = add i32 [[HighA]], [[HighB]]
52+
// CHECK-NEXT: [[HighSumPlusCarry:%.*]] = add i32 [[HighSum]], [[CarryZExt]]
53+
// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[LowSum]], i64 0
54+
// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO1:%.*]] = insertelement <4 x i32> [[HLSL_ADDUINT64_UPTO0]], i32 [[HighSumPlusCarry]], i64 1
55+
// CHECK-NEXT: [[LowA1:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 2
56+
// CHECK-NEXT: [[HighA1:%.*]] = extractelement <4 x i32> [[A_LOAD]], i64 3
57+
// CHECK-NEXT: [[LowB1:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 2
58+
// CHECK-NEXT: [[HighB1:%.*]] = extractelement <4 x i32> [[B_LOAD]], i64 3
59+
// CHECK-NEXT: [[UAddc1:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LowA1]], i32 [[LowB1]])
60+
// CHECK-NEXT: [[Carry1:%.*]] = extractvalue { i32, i1 } [[UAddc1]], 1
61+
// CHECK-NEXT: [[LowSum1:%.*]] = extractvalue { i32, i1 } [[UAddc1]], 0
62+
// CHECK-NEXT: [[CarryZExt1:%.*]] = zext i1 [[Carry1]] to i32
63+
// CHECK-NEXT: [[HighSum1:%.*]] = add i32 [[HighA1]], [[HighB1]]
64+
// CHECK-NEXT: [[HighSumPlusCarry1:%.*]] = add i32 [[HighSum1]], [[CarryZExt1]]
65+
// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO2:%.*]] = insertelement <4 x i32> [[HLSL_ADDUINT64_UPTO1]], i32 [[LowSum1]], i64 2
66+
// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = insertelement <4 x i32> [[HLSL_ADDUINT64_UPTO2]], i32 [[HighSumPlusCarry1]], i64 3
67+
// CHECK-NEXT: ret <4 x i32> [[HLSL_ADDUINT64]]
68+
//
69+
uint4 test_AddUint64_uint4(uint4 a, uint4 b) {
70+
return AddUint64(a, b);
71+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify
2+
3+
uint2 test_too_few_arg() {
4+
return __builtin_hlsl_adduint64();
5+
// expected-error@-1 {{too few arguments to function call, expected 2, have 0}}
6+
}
7+
8+
uint4 test_too_many_arg(uint4 a) {
9+
return __builtin_hlsl_adduint64(a, a, a);
10+
// expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
11+
}
12+
13+
uint2 test_mismatched_arg_types(uint2 a, uint4 b) {
14+
return __builtin_hlsl_adduint64(a, b);
15+
// expected-error@-1 {{all arguments to '__builtin_hlsl_adduint64' must have the same type}}
16+
}
17+
18+
uint2 test_too_many_arg_elements(uint3 a, uint3 b) {
19+
return __builtin_hlsl_adduint64(a, b);
20+
// expected-error@-1 {{incorrect number of elements in vector operand (expected 2 or 4 elements, have 3)}}
21+
}
22+
23+
uint4 test_too_few_arg_elements(uint3 a, uint3 b) {
24+
return __builtin_hlsl_adduint64(a, b);
25+
// expected-error@-1 {{incorrect number of elements in vector operand (expected 2 or 4 elements, have 3)}}
26+
}
27+
28+
uint2 test_scalar_arg_type(uint a) {
29+
return __builtin_hlsl_adduint64(a, a);
30+
// expected-error@-1 {{incorrect number of elements in vector operand (expected 2 or 4 elements, have 1)}}
31+
}
32+
33+
uint2 test_signed_integer_args(int2 a, int2 b) {
34+
return __builtin_hlsl_adduint64(a, b);
35+
// expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(unsigned int)))) unsigned int' (vector of 2 'unsigned int' values)}}
36+
}
37+
38+
struct S {
39+
uint2 a;
40+
};
41+
42+
uint2 test_incorrect_arg_type(S a) {
43+
return __builtin_hlsl_adduint64(a, a);
44+
// expected-error@-1 {{passing 'S' to parameter of incompatible type 'unsigned int'}}
45+
}
46+

0 commit comments

Comments
 (0)