Skip to content

Commit 19595b1

Browse files
addressing PR comments
1 parent 0e3e887 commit 19595b1

File tree

17 files changed

+393
-279
lines changed

17 files changed

+393
-279
lines changed

clang/include/clang/Basic/Builtins.td

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4871,18 +4871,6 @@ def HLSLRadians : LangBuiltin<"HLSL_LANG"> {
48714871
let Prototype = "void(...)";
48724872
}
48734873

4874-
def HLSLRadians : LangBuiltin<"HLSL_LANG"> {
4875-
let Spellings = ["__builtin_hlsl_elementwise_radians"];
4876-
let Attributes = [NoThrow, Const];
4877-
let Prototype = "void(...)";
4878-
}
4879-
4880-
def HLSLRadians : LangBuiltin<"HLSL_LANG"> {
4881-
let Spellings = ["__builtin_hlsl_elementwise_radians"];
4882-
let Attributes = [NoThrow, Const];
4883-
let Prototype = "void(...)";
4884-
}
4885-
48864874
def HLSLSplitDouble: LangBuiltin<"HLSL_LANG"> {
48874875
let Spellings = ["__builtin_hlsl_splitdouble"];
48884876
let Attributes = [NoThrow, Const];

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 107 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "CGObjCRuntime.h"
1818
#include "CGOpenCLRuntime.h"
1919
#include "CGRecordLayout.h"
20+
#include "CGValue.h"
2021
#include "CodeGenFunction.h"
2122
#include "CodeGenModule.h"
2223
#include "ConstantEmitter.h"
@@ -27,21 +28,19 @@
2728
#include "clang/AST/Decl.h"
2829
#include "clang/AST/OSLog.h"
2930
#include "clang/AST/OperationKinds.h"
31+
#include "clang/AST/Type.h"
3032
#include "clang/Basic/TargetBuiltins.h"
3133
#include "clang/Basic/TargetInfo.h"
3234
#include "clang/Basic/TargetOptions.h"
3335
#include "clang/CodeGen/CGFunctionInfo.h"
3436
#include "clang/Frontend/FrontendDiagnostic.h"
3537
#include "llvm/ADT/APFloat.h"
3638
#include "llvm/ADT/APInt.h"
37-
#include "llvm/ADT/ArrayRef.h"
3839
#include "llvm/ADT/FloatingPointMode.h"
3940
#include "llvm/ADT/SmallPtrSet.h"
40-
#include "llvm/ADT/SmallVector.h"
4141
#include "llvm/ADT/StringExtras.h"
4242
#include "llvm/Analysis/ValueTracking.h"
4343
#include "llvm/IR/DataLayout.h"
44-
#include "llvm/IR/DerivedTypes.h"
4544
#include "llvm/IR/InlineAsm.h"
4645
#include "llvm/IR/Intrinsics.h"
4746
#include "llvm/IR/IntrinsicsAArch64.h"
@@ -70,6 +69,7 @@
7069
#include "llvm/TargetParser/X86TargetParser.h"
7170
#include <optional>
7271
#include <sstream>
72+
#include <utility>
7373

7474
using namespace clang;
7575
using namespace CodeGen;
@@ -18955,92 +18955,141 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: {
1895518955
CGM.getHLSLRuntime().getRadiansIntrinsic(), ArrayRef<Value *>{Op0},
1895618956
nullptr, "hlsl.radians");
1895718957
}
18958-
// This should only be called when targeting DXIL
1895918958
case Builtin::BI__builtin_hlsl_splitdouble: {
1896018959

1896118960
assert((E->getArg(0)->getType()->hasFloatingRepresentation() &&
1896218961
E->getArg(1)->getType()->hasUnsignedIntegerRepresentation() &&
1896318962
E->getArg(2)->getType()->hasUnsignedIntegerRepresentation()) &&
1896418963
"asuint operands types mismatch");
1896518964
Value *Op0 = EmitScalarExpr(E->getArg(0));
18966-
const HLSLOutArgExpr *OutArg1 = dyn_cast<HLSLOutArgExpr>(E->getArg(1));
18967-
const HLSLOutArgExpr *OutArg2 = dyn_cast<HLSLOutArgExpr>(E->getArg(2));
18965+
const auto *OutArg1 = dyn_cast<HLSLOutArgExpr>(E->getArg(1));
18966+
const auto *OutArg2 = dyn_cast<HLSLOutArgExpr>(E->getArg(2));
1896818967

1896918968
CallArgList Args;
18970-
auto [Op1BaseLValue, Op1TmpLValue] =
18971-
EmitHLSLOutArgExpr(OutArg1, Args, OutArg1->getType());
18972-
auto [Op2BaseLValue, Op2TmpLValue] =
18973-
EmitHLSLOutArgExpr(OutArg2, Args, OutArg2->getType());
18969+
LValue Op1TmpLValue = EmitHLSLOutArgExpr(OutArg1, Args, OutArg1->getType());
18970+
LValue Op2TmpLValue = EmitHLSLOutArgExpr(OutArg2, Args, OutArg2->getType());
1897418971

18975-
if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil) {
18972+
if (getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee())
18973+
Args.reverseWritebacks();
1897618974

18977-
llvm::StructType *retType = llvm::StructType::get(Int32Ty, Int32Ty);
18975+
auto EmitVectorCode =
18976+
[](Value *Op, CGBuilderTy *Builder,
18977+
FixedVectorType *DestTy) -> std::pair<Value *, Value *> {
18978+
Value *bitcast = Builder->CreateBitCast(Op, DestTy);
1897818979

18979-
if (Op0->getType()->isVectorTy()) {
18980-
auto *Op0VecTy = E->getArg(0)->getType()->getAs<VectorType>();
18980+
SmallVector<int> LowbitsIndex;
18981+
SmallVector<int> HighbitsIndex;
1898118982

18982-
llvm::VectorType *i32VecTy = llvm::VectorType::get(
18983-
Int32Ty, ElementCount::getFixed(Op0VecTy->getNumElements()));
18984-
retType = llvm::StructType::get(i32VecTy, i32VecTy);
18983+
for (unsigned int Idx = 0; Idx < DestTy->getNumElements(); Idx += 2) {
18984+
LowbitsIndex.push_back(Idx);
18985+
HighbitsIndex.push_back(Idx + 1);
1898518986
}
1898618987

18987-
CallInst *CI =
18988-
Builder.CreateIntrinsic(retType, Intrinsic::dx_splitdouble, {Op0},
18989-
nullptr, "hlsl.splitdouble");
18990-
18991-
Value *arg0 = Builder.CreateExtractValue(CI, 0);
18992-
Value *arg1 = Builder.CreateExtractValue(CI, 1);
18988+
Value *Arg0 = Builder->CreateShuffleVector(bitcast, LowbitsIndex);
18989+
Value *Arg1 = Builder->CreateShuffleVector(bitcast, HighbitsIndex);
1899318990

18994-
Builder.CreateStore(arg0, Op1TmpLValue.getAddress());
18995-
auto *s = Builder.CreateStore(arg1, Op2TmpLValue.getAddress());
18991+
return std::make_pair(Arg0, Arg1);
18992+
};
1899618993

18997-
EmitWritebacks(*this, Args);
18998-
return s;
18999-
}
18994+
Value *LastInst = nullptr;
1900018995

18996+
if (CGM.getTarget().getTriple().isDXIL()) {
1900118997

19002-
if(!Op0->getType()->isVectorTy()){
19003-
FixedVectorType *destTy = FixedVectorType::get(Int32Ty, 2);
19004-
Value *bitcast = Builder.CreateBitCast(Op0, destTy);
18998+
llvm::Type *RetElementTy = Int32Ty;
18999+
if (auto *Op0VecTy = E->getArg(0)->getType()->getAs<VectorType>())
19000+
RetElementTy = llvm::VectorType::get(
19001+
Int32Ty, ElementCount::getFixed(Op0VecTy->getNumElements()));
19002+
auto *RetTy = llvm::StructType::get(RetElementTy, RetElementTy);
1900519003

19006-
Value *arg0 = Builder.CreateExtractElement(bitcast, 0.0);
19007-
Value *arg1 = Builder.CreateExtractElement(bitcast, 1.0);
19004+
CallInst *CI = Builder.CreateIntrinsic(
19005+
RetTy, Intrinsic::dx_splitdouble, {Op0}, nullptr, "hlsl.splitdouble");
1900819006

19009-
Builder.CreateStore(arg0, Op1TmpLValue.getAddress());
19010-
auto *s = Builder.CreateStore(arg1, Op2TmpLValue.getAddress());
19007+
Value *Arg0 = Builder.CreateExtractValue(CI, 0);
19008+
Value *Arg1 = Builder.CreateExtractValue(CI, 1);
1901119009

19012-
EmitWritebacks(*this, Args);
19013-
return s;
19014-
}
19015-
19016-
auto *Op0VecTy = E->getArg(0)->getType()->getAs<VectorType>();
19010+
Builder.CreateStore(Arg0, Op1TmpLValue.getAddress());
19011+
LastInst = Builder.CreateStore(Arg1, Op2TmpLValue.getAddress());
1901719012

19018-
int numElements = Op0VecTy -> getNumElements() * 2;
19013+
} else {
1901919014

19020-
FixedVectorType *destTy = FixedVectorType::get(Int32Ty, numElements);
19021-
19022-
Value *bitcast = Builder.CreateBitCast(Op0, destTy);
19015+
assert(!CGM.getTarget().getTriple().isDXIL() &&
19016+
"For non-DXIL targets we generate the instructions");
1902319017

19024-
SmallVector<int> lowbitsIndex;
19025-
SmallVector<int> highbitsIndex;
19018+
if (!Op0->getType()->isVectorTy()) {
19019+
FixedVectorType *DestTy = FixedVectorType::get(Int32Ty, 2);
19020+
Value *Bitcast = Builder.CreateBitCast(Op0, DestTy);
1902619021

19027-
for(int idx = 0; idx < numElements; idx += 2){
19028-
lowbitsIndex.push_back(idx);
19029-
}
19022+
Value *Arg0 = Builder.CreateExtractElement(Bitcast, 0.0);
19023+
Value *Arg1 = Builder.CreateExtractElement(Bitcast, 1.0);
1903019024

19031-
for(int idx = 1; idx < numElements; idx += 2){
19032-
highbitsIndex.push_back(idx);
19033-
}
19025+
Builder.CreateStore(Arg0, Op1TmpLValue.getAddress());
19026+
LastInst = Builder.CreateStore(Arg1, Op2TmpLValue.getAddress());
19027+
} else {
1903419028

19035-
Value *arg0 = Builder.CreateShuffleVector(bitcast, lowbitsIndex);
19036-
Value *arg1 = Builder.CreateShuffleVector(bitcast, highbitsIndex);
19029+
const auto *TargTy = E->getArg(0)->getType()->getAs<VectorType>();
19030+
19031+
int NumElements = TargTy->getNumElements();
19032+
19033+
FixedVectorType *DestTy = FixedVectorType::get(Int32Ty, 4);
19034+
if (NumElements == 1) {
19035+
FixedVectorType *DestTy = FixedVectorType::get(Int32Ty, 2);
19036+
Value *Bitcast = Builder.CreateBitCast(Op0, DestTy);
19037+
19038+
Value *Arg0 = Builder.CreateExtractElement(Bitcast, 0.0);
19039+
Value *Arg1 = Builder.CreateExtractElement(Bitcast, 1.0);
19040+
19041+
Builder.CreateStore(Arg0, Op1TmpLValue.getAddress());
19042+
LastInst = Builder.CreateStore(Arg1, Op2TmpLValue.getAddress());
19043+
} else if (NumElements == 2) {
19044+
auto [LowBits, HighBits] = EmitVectorCode(Op0, &Builder, DestTy);
19045+
19046+
Builder.CreateStore(LowBits, Op1TmpLValue.getAddress());
19047+
LastInst = Builder.CreateStore(HighBits, Op2TmpLValue.getAddress());
19048+
} else {
19049+
19050+
SmallVector<std::pair<Value *, Value *>> EmitedValuePairs;
19051+
19052+
for (int It = 0; It < NumElements; It += 2) {
19053+
// Due to existing restrictions to SPIR-V and splitdouble,
19054+
// all shufflevector operations, should return vectors of
19055+
// the same size, up to 4. Such introduce and edge case
19056+
// when we got odd sized vectors, which will require
19057+
// an additional dummy value, such is masked out in a later
19058+
// stage of this code.
19059+
auto Shuff = Builder.CreateShuffleVector(
19060+
Op0, {It, ((It + 1) % NumElements)});
19061+
std::pair<Value *, Value *> ValuePair =
19062+
EmitVectorCode(Shuff, &Builder, DestTy);
19063+
EmitedValuePairs.push_back(ValuePair);
19064+
}
1903719065

19038-
Builder.CreateStore(arg0, Op1TmpLValue.getAddress());
19039-
auto *s = Builder.CreateStore(arg1, Op2TmpLValue.getAddress());
19066+
SmallVector<int> Index;
19067+
auto InitSize = NumElements > 4 ? 4 : NumElements;
19068+
for (int It = 0; It < InitSize; It++)
19069+
Index.push_back(It);
19070+
19071+
auto arg0 = Builder.CreateShuffleVector(
19072+
EmitedValuePairs[0].first, EmitedValuePairs[1].first, Index);
19073+
auto arg1 = Builder.CreateShuffleVector(
19074+
EmitedValuePairs[0].second, EmitedValuePairs[1].second, Index);
19075+
19076+
for (size_t It = 2; It < EmitedValuePairs.size(); It++) {
19077+
int CurIndexSize = Index.size();
19078+
Index.insert(Index.end(), {CurIndexSize + 1,
19079+
((CurIndexSize + 2) % NumElements)});
19080+
arg0 = Builder.CreateShuffleVector(arg0, EmitedValuePairs[It].first,
19081+
Index);
19082+
arg1 = Builder.CreateShuffleVector(
19083+
arg1, EmitedValuePairs[It].second, Index);
19084+
}
1904019085

19086+
Builder.CreateStore(arg0, Op1TmpLValue.getAddress());
19087+
LastInst = Builder.CreateStore(arg1, Op2TmpLValue.getAddress());
19088+
}
19089+
}
19090+
}
1904119091
EmitWritebacks(*this, Args);
19042-
return s;
19043-
19092+
return LastInst;
1904419093
}
1904519094
}
1904619095
return nullptr;

0 commit comments

Comments
 (0)