Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/Builtins.td
Original file line number Diff line number Diff line change
Expand Up @@ -4753,6 +4753,12 @@ def GetDeviceSideMangledName : LangBuiltin<"CUDA_LANG"> {
}

// HLSL
def HLSLAddUint64: LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_adduint64"];
let Attributes = [NoThrow, Const];
let Prototype = "void(...)";
}

def HLSLResourceGetPointer : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_resource_getpointer"];
let Attributes = [NoThrow];
Expand Down
5 changes: 5 additions & 0 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -10657,6 +10657,11 @@ def err_vector_incorrect_num_elements : Error<
"%select{too many|too few}0 elements in vector %select{initialization|operand}3 (expected %1 elements, have %2)">;
def err_altivec_empty_initializer : Error<"expected initializer">;

def err_vector_incorrect_bit_count : Error<
"incorrect number of bits in vector operand (expected %select{|a multiple of}0 %1 bits, have %2)">;
def err_integer_incorrect_bit_count : Error<
"incorrect number of bits in integer (expected %0 bits, have %1)">;

def err_invalid_neon_type_code : Error<
"incompatible constant for this __builtin_neon function">;
def err_argument_invalid_range : Error<
Expand Down
64 changes: 64 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19445,6 +19445,70 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
return nullptr;

switch (BuiltinID) {
case Builtin::BI__builtin_hlsl_adduint64: {
Value *OpA = EmitScalarExpr(E->getArg(0));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Forwarding: #125319 (comment)

Value *OpB = EmitScalarExpr(E->getArg(1));
assert(E->getArg(0)->getType()->hasIntegerRepresentation() &&
E->getArg(1)->getType()->hasIntegerRepresentation() &&
"AddUint64 operands must have an integer representation");
assert(((E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() ==
2 &&
E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() ==
2) ||
(E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() ==
4 &&
E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() ==
4)) &&
"input vectors must have 2 or 4 elements each");

uint64_t NumElements =
E->getArg(0)->getType()->castAs<VectorType>()->getNumElements();

llvm::Value *Result = PoisonValue::get(OpA->getType());
llvm::Value *LowA;
llvm::Value *HighA;
llvm::Value *LowB;
llvm::Value *HighB;

// Obtain low and high words of inputs A and B
if (NumElements == 2) {
LowA = Builder.CreateExtractElement(OpA, (uint64_t)0, "LowA");
HighA = Builder.CreateExtractElement(OpA, (uint64_t)1, "HighA");
LowB = Builder.CreateExtractElement(OpB, (uint64_t)0, "LowB");
HighB = Builder.CreateExtractElement(OpB, (uint64_t)1, "HighB");
} else {
LowA = Builder.CreateShuffleVector(OpA, ArrayRef<int>{0, 2}, "LowA");
HighA = Builder.CreateShuffleVector(OpA, ArrayRef<int>{1, 3}, "HighA");
LowB = Builder.CreateShuffleVector(OpB, ArrayRef<int>{0, 2}, "LowB");
HighB = Builder.CreateShuffleVector(OpB, ArrayRef<int>{1, 3}, "HighB");
}

// Use an uadd_with_overflow to compute the sum of low words and obtain a
// carry value
llvm::Value *Carry;
llvm::Value *LowSum = EmitOverflowIntrinsic(
*this, llvm::Intrinsic::uadd_with_overflow, LowA, LowB, Carry);
llvm::Value *ZExtCarry =
Builder.CreateZExt(Carry, HighA->getType(), "CarryZExt");

// Sum the high words and the carry
llvm::Value *HighSum = Builder.CreateAdd(HighA, HighB, "HighSum");
llvm::Value *HighSumPlusCarry =
Builder.CreateAdd(HighSum, ZExtCarry, "HighSumPlusCarry");

// Insert the low and high word sums into the result vector
if (NumElements == 2) {
Result = Builder.CreateInsertElement(Result, LowSum, (uint64_t)0,
"hlsl.AddUint64.upto0");
Result = Builder.CreateInsertElement(Result, HighSumPlusCarry,
(uint64_t)1, "hlsl.AddUint64");
} else { /* NumElements == 4 */
Result = Builder.CreateShuffleVector(LowSum, HighSumPlusCarry,
ArrayRef<int>{0, 2, 1, 3},
"hlsl.AddUint64");
}
return Result;
}
case Builtin::BI__builtin_hlsl_resource_getpointer: {
Value *HandleOp = EmitScalarExpr(E->getArg(0));
Value *IndexOp = EmitScalarExpr(E->getArg(1));
Expand Down
21 changes: 21 additions & 0 deletions clang/lib/Headers/hlsl/hlsl_intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,27 @@ _HLSL_BUILTIN_ALIAS(__builtin_elementwise_acos)
float4 acos(float4);

//===----------------------------------------------------------------------===//
// AddUint64 builtins
//===----------------------------------------------------------------------===//

/// \fn T AddUint64(T a, T b)
/// \brief Implements unsigned 64-bit integer addition using pairs of unsigned
/// 32-bit integers.
/// \param x [in] The first unsigned 32-bit integer pair(s)
/// \param y [in] The second unsigned 32-bit integer pair(s)
///
/// This function takes one or two pairs (low, high) of unsigned 32-bit integer
/// values and returns pairs (low, high) of unsigned 32-bit integer
/// values representing the result of unsigned 64-bit integer addition.

_HLSL_AVAILABILITY(shadermodel, 6.0)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_adduint64)
uint32_t2 AddUint64(uint32_t2, uint32_t2);
_HLSL_AVAILABILITY(shadermodel, 6.0)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_adduint64)
uint32_t4 AddUint64(uint32_t4, uint32_t4);

// //===----------------------------------------------------------------------===//
// all builtins
//===----------------------------------------------------------------------===//

Expand Down
58 changes: 58 additions & 0 deletions clang/lib/Sema/SemaHLSL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2038,6 +2038,18 @@ static bool CheckAllArgsHaveFloatRepresentation(Sema *S, CallExpr *TheCall) {
checkAllFloatTypes);
}

static bool CheckUnsignedIntRepresentations(Sema *S, CallExpr *TheCall) {
auto checkUnsignedInteger = [](clang::QualType PassedType) -> bool {
clang::QualType BaseType =
PassedType->isVectorType()
? PassedType->getAs<clang::VectorType>()->getElementType()
: PassedType;
return !BaseType->isUnsignedIntegerType();
};
return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.UnsignedIntTy,
checkUnsignedInteger);
}

static bool CheckFloatOrHalfRepresentations(Sema *S, CallExpr *TheCall) {
auto checkFloatorHalf = [](clang::QualType PassedType) -> bool {
clang::QualType BaseType =
Expand Down Expand Up @@ -2229,6 +2241,52 @@ static bool CheckResourceHandle(
// returning an ExprError
bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
switch (BuiltinID) {
case Builtin::BI__builtin_hlsl_adduint64: {
if (SemaRef.checkArgCount(TheCall, 2))
return true;
if (CheckVectorElementCallArgs(&SemaRef, TheCall))
return true;
if (CheckUnsignedIntRepresentations(&SemaRef, TheCall))
return true;

// CheckVectorElementCallArgs(...) guarantees both args are the same type.
assert(TheCall->getArg(0)->getType() == TheCall->getArg(1)->getType() &&
"Both args must be of the same type");

// ensure both args are vectors
auto *VTy = TheCall->getArg(0)->getType()->getAs<VectorType>();
if (!VTy) {
SemaRef.Diag(TheCall->getBeginLoc(), diag::err_vec_builtin_non_vector)
<< TheCall->getDirectCallee() << /*all*/ 1;
return true;
}

// ensure arg integers are 32-bits
uint64_t ElementBitCount = getASTContext()
.getTypeSizeInChars(VTy->getElementType())
.getQuantity() *
8;
if (ElementBitCount != 32) {
SemaRef.Diag(TheCall->getBeginLoc(),
diag::err_integer_incorrect_bit_count)
<< 32 << ElementBitCount;
return true;
}

// ensure both args are vectors of total bit size of a multiple of 64
int NumElementsArg = VTy->getNumElements();
if (NumElementsArg != 2 && NumElementsArg != 4) {
SemaRef.Diag(TheCall->getBeginLoc(), diag::err_vector_incorrect_bit_count)
<< 1 /*a multiple of*/ << 64 << NumElementsArg * ElementBitCount;
return true;
}

ExprResult A = TheCall->getArg(0);
QualType ArgTyA = A.get()->getType();
// return type is the same as the input type
TheCall->setType(ArgTyA);
break;
}
case Builtin::BI__builtin_hlsl_resource_getpointer: {
if (SemaRef.checkArgCount(TheCall, 2) ||
CheckResourceHandle(&SemaRef, TheCall, 0) ||
Expand Down
58 changes: 58 additions & 0 deletions clang/test/CodeGenHLSL/builtins/AddUint64.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \
// RUN: -emit-llvm -disable-llvm-passes -o - | \
// RUN: FileCheck %s --check-prefixes=CHECK


// CHECK-LABEL: define noundef <2 x i32> @_Z20test_AddUint64_uint2Dv2_jS_(
// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x i32>, align 8
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <2 x i32>, align 8
// CHECK-NEXT: store <2 x i32> [[A]], ptr [[A_ADDR]], align 8
// CHECK-NEXT: store <2 x i32> [[B]], ptr [[B_ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[A_ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[B_ADDR]], align 8
// CHECK-NEXT: [[LOWA:%.*]] = extractelement <2 x i32> [[TMP0]], i64 0
// CHECK-NEXT: [[HIGHA:%.*]] = extractelement <2 x i32> [[TMP0]], i64 1
// CHECK-NEXT: [[LOWB:%.*]] = extractelement <2 x i32> [[TMP1]], i64 0
// CHECK-NEXT: [[HIGHB:%.*]] = extractelement <2 x i32> [[TMP1]], i64 1
// CHECK-NEXT: [[TMP2:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[LOWA]], i32 [[LOWB]])
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0
// CHECK-NEXT: [[CARRYZEXT:%.*]] = zext i1 [[TMP3]] to i32
// CHECK-NEXT: [[HIGHSUM:%.*]] = add i32 [[HIGHA]], [[HIGHB]]
// CHECK-NEXT: [[HIGHSUMPLUSCARRY:%.*]] = add i32 [[HIGHSUM]], [[CARRYZEXT]]
// CHECK-NEXT: [[HLSL_ADDUINT64_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i64 0
// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = insertelement <2 x i32> [[HLSL_ADDUINT64_UPTO0]], i32 [[HIGHSUMPLUSCARRY]], i64 1
// CHECK-NEXT: ret <2 x i32> [[HLSL_ADDUINT64]]
//
uint2 test_AddUint64_uint2(uint2 a, uint2 b) {
return AddUint64(a, b);
}

// CHECK-LABEL: define noundef <4 x i32> @_Z20test_AddUint64_uint4Dv4_jS_(
// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x i32>, align 16
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <4 x i32>, align 16
// CHECK-NEXT: store <4 x i32> [[A]], ptr [[A_ADDR]], align 16
// CHECK-NEXT: store <4 x i32> [[B]], ptr [[B_ADDR]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[B_ADDR]], align 16
// CHECK-NEXT: [[LOWA:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 0, i32 2>
// CHECK-NEXT: [[HIGHA:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
// CHECK-NEXT: [[LOWB:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 0, i32 2>
// CHECK-NEXT: [[HIGHB:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
// CHECK-NEXT: [[TMP2:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[LOWA]], <2 x i32> [[LOWB]])
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP2]], 1
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i1> } [[TMP2]], 0
// CHECK-NEXT: [[CARRYZEXT:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32>
// CHECK-NEXT: [[HIGHSUM:%.*]] = add <2 x i32> [[HIGHA]], [[HIGHB]]
// CHECK-NEXT: [[HIGHSUMPLUSCARRY:%.*]] = add <2 x i32> [[HIGHSUM]], [[CARRYZEXT]]
// CHECK-NEXT: [[HLSL_ADDUINT64:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[HIGHSUMPLUSCARRY]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
// CHECK-NEXT: ret <4 x i32> [[HLSL_ADDUINT64]]
//
uint4 test_AddUint64_uint4(uint4 a, uint4 b) {
return AddUint64(a, b);
}
46 changes: 46 additions & 0 deletions clang/test/SemaHLSL/BuiltIns/AddUint64-errors.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify

uint2 test_too_few_arg() {
return __builtin_hlsl_adduint64();
// expected-error@-1 {{too few arguments to function call, expected 2, have 0}}
}

uint4 test_too_many_arg(uint4 a) {
return __builtin_hlsl_adduint64(a, a, a);
// expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
}

uint2 test_mismatched_arg_types(uint2 a, uint4 b) {
return __builtin_hlsl_adduint64(a, b);
// expected-error@-1 {{all arguments to '__builtin_hlsl_adduint64' must have the same type}}
}

uint2 test_bad_num_arg_elements(uint3 a, uint3 b) {
return __builtin_hlsl_adduint64(a, b);
// expected-error@-1 {{incorrect number of bits in vector operand (expected a multiple of 64 bits, have 96)}}
}

uint2 test_scalar_arg_type(uint a) {
return __builtin_hlsl_adduint64(a, a);
// expected-error@-1 {{all arguments to '__builtin_hlsl_adduint64' must be vectors}}
}

uint2 test_uint64_args(uint16_t2 a) {
return __builtin_hlsl_adduint64(a, a);
// expected-error@-1 {{incorrect number of bits in integer (expected 32 bits, have 16)}}
}

uint2 test_signed_integer_args(int2 a, int2 b) {
return __builtin_hlsl_adduint64(a, b);
// expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(unsigned int)))) unsigned int' (vector of 2 'unsigned int' values)}}
}

struct S {
uint2 a;
};

uint2 test_incorrect_arg_type(S a) {
return __builtin_hlsl_adduint64(a, a);
// expected-error@-1 {{passing 'S' to parameter of incompatible type 'unsigned int'}}
}

13 changes: 13 additions & 0 deletions llvm/lib/Target/DirectX/DXIL.td
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def HandleTy : DXILOpParamType;
def ResBindTy : DXILOpParamType;
def ResPropsTy : DXILOpParamType;
def SplitDoubleTy : DXILOpParamType;
def BinaryWithCarryTy : DXILOpParamType;

class DXILOpClass;

Expand Down Expand Up @@ -738,6 +739,18 @@ def UMin : DXILOp<40, binary> {
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}

def UAddc : DXILOp<44, binaryWithCarryOrBorrow > {
let Doc = "Unsigned 32-bit integer arithmetic add with carry. uaddc(a,b) = (a+b, a+b overflowed ? 1 : 0)";
// TODO: This `let intrinsics = ...` line may be uncommented when
// https://github.com/llvm/llvm-project/issues/113192 is fixed
// let intrinsics = [IntrinSelect<int_uadd_with_overflow>];
let arguments = [OverloadTy, OverloadTy];
let result = BinaryWithCarryTy;
let overloads = [Overloads<DXIL1_0, [Int32Ty]>];
let stages = [Stages<DXIL1_0, [all_stages]>];
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}

def FMad : DXILOp<46, tertiary> {
let Doc = "Floating point arithmetic multiply/add operation. fmad(m,a,b) = m "
"* a + b.";
Expand Down
14 changes: 14 additions & 0 deletions llvm/lib/Target/DirectX/DXILOpBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,14 @@ static StructType *getSplitDoubleType(LLVMContext &Context) {
return StructType::create({Int32Ty, Int32Ty}, "dx.types.splitdouble");
}

static StructType *getBinaryWithCarryType(LLVMContext &Context) {
if (auto *ST = StructType::getTypeByName(Context, "dx.types.i32c"))
return ST;
Type *Int32Ty = Type::getInt32Ty(Context);
Type *Int1Ty = Type::getInt1Ty(Context);
return StructType::create({Int32Ty, Int1Ty}, "dx.types.i32c");
}

static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
Type *OverloadTy) {
switch (Kind) {
Expand Down Expand Up @@ -273,6 +281,8 @@ static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
return getResPropsType(Ctx);
case OpParamType::SplitDoubleTy:
return getSplitDoubleType(Ctx);
case OpParamType::BinaryWithCarryTy:
return getBinaryWithCarryType(Ctx);
}
llvm_unreachable("Invalid parameter kind");
return nullptr;
Expand Down Expand Up @@ -539,6 +549,10 @@ StructType *DXILOpBuilder::getSplitDoubleType(LLVMContext &Context) {
return ::getSplitDoubleType(Context);
}

StructType *DXILOpBuilder::getBinaryWithCarryType(LLVMContext &Context) {
return ::getBinaryWithCarryType(Context);
}

StructType *DXILOpBuilder::getHandleType() {
return ::getHandleType(IRB.getContext());
}
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/DirectX/DXILOpBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ class DXILOpBuilder {
/// Get the `%dx.types.splitdouble` type.
StructType *getSplitDoubleType(LLVMContext &Context);

/// Get the `%dx.types.i32c` type.
StructType *getBinaryWithCarryType(LLVMContext &Context);

/// Get the `%dx.types.Handle` type.
StructType *getHandleType();

Expand Down
Loading
Loading