Skip to content

Commit 4163d82

Browse files
addressing PR comments
1 parent dc978af commit 4163d82

File tree

18 files changed

+384
-283
lines changed

18 files changed

+384
-283
lines changed

clang/include/clang/Basic/Builtins.td

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4871,20 +4871,8 @@ def HLSLRadians : LangBuiltin<"HLSL_LANG"> {
48714871
let Prototype = "void(...)";
48724872
}
48734873

4874-
def HLSLRadians : LangBuiltin<"HLSL_LANG"> {
4875-
let Spellings = ["__builtin_hlsl_elementwise_radians"];
4876-
let Attributes = [NoThrow, Const];
4877-
let Prototype = "void(...)";
4878-
}
4879-
4880-
def HLSLRadians : LangBuiltin<"HLSL_LANG"> {
4881-
let Spellings = ["__builtin_hlsl_elementwise_radians"];
4882-
let Attributes = [NoThrow, Const];
4883-
let Prototype = "void(...)";
4884-
}
4885-
48864874
def HLSLSplitDouble: LangBuiltin<"HLSL_LANG"> {
4887-
let Spellings = ["__builtin_hlsl_splitdouble"];
4875+
let Spellings = ["__builtin_hlsl_elementwise_splitdouble"];
48884876
let Attributes = [NoThrow, Const];
48894877
let Prototype = "void(...)";
48904878
}

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 157 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "CGObjCRuntime.h"
1818
#include "CGOpenCLRuntime.h"
1919
#include "CGRecordLayout.h"
20+
#include "CGValue.h"
2021
#include "CodeGenFunction.h"
2122
#include "CodeGenModule.h"
2223
#include "ConstantEmitter.h"
@@ -25,23 +26,22 @@
2526
#include "clang/AST/ASTContext.h"
2627
#include "clang/AST/Attr.h"
2728
#include "clang/AST/Decl.h"
29+
#include "clang/AST/Expr.h"
2830
#include "clang/AST/OSLog.h"
2931
#include "clang/AST/OperationKinds.h"
32+
#include "clang/AST/Type.h"
3033
#include "clang/Basic/TargetBuiltins.h"
3134
#include "clang/Basic/TargetInfo.h"
3235
#include "clang/Basic/TargetOptions.h"
3336
#include "clang/CodeGen/CGFunctionInfo.h"
3437
#include "clang/Frontend/FrontendDiagnostic.h"
3538
#include "llvm/ADT/APFloat.h"
3639
#include "llvm/ADT/APInt.h"
37-
#include "llvm/ADT/ArrayRef.h"
3840
#include "llvm/ADT/FloatingPointMode.h"
3941
#include "llvm/ADT/SmallPtrSet.h"
40-
#include "llvm/ADT/SmallVector.h"
4142
#include "llvm/ADT/StringExtras.h"
4243
#include "llvm/Analysis/ValueTracking.h"
4344
#include "llvm/IR/DataLayout.h"
44-
#include "llvm/IR/DerivedTypes.h"
4545
#include "llvm/IR/InlineAsm.h"
4646
#include "llvm/IR/Intrinsics.h"
4747
#include "llvm/IR/IntrinsicsAArch64.h"
@@ -70,6 +70,7 @@
7070
#include "llvm/TargetParser/X86TargetParser.h"
7171
#include <optional>
7272
#include <sstream>
73+
#include <utility>
7374

7475
using namespace clang;
7576
using namespace CodeGen;
@@ -98,6 +99,157 @@ static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
9899
I->addAnnotationMetadata("auto-init");
99100
}
100101

102+
static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) {
103+
Value *Op0 = CGF->EmitScalarExpr(E->getArg(0));
104+
const auto *OutArg1 = dyn_cast<HLSLOutArgExpr>(E->getArg(1));
105+
const auto *OutArg2 = dyn_cast<HLSLOutArgExpr>(E->getArg(2));
106+
107+
CallArgList Args;
108+
LValue Op1TmpLValue =
109+
CGF->EmitHLSLOutArgExpr(OutArg1, Args, OutArg1->getType());
110+
LValue Op2TmpLValue =
111+
CGF->EmitHLSLOutArgExpr(OutArg2, Args, OutArg2->getType());
112+
113+
if (CGF->getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee())
114+
Args.reverseWritebacks();
115+
116+
auto EmitVectorCode =
117+
[](Value *Op, CGBuilderTy *Builder,
118+
FixedVectorType *DestTy) -> std::pair<Value *, Value *> {
119+
Value *bitcast = Builder->CreateBitCast(Op, DestTy);
120+
121+
SmallVector<int> LowbitsIndex;
122+
SmallVector<int> HighbitsIndex;
123+
124+
for (unsigned int Idx = 0; Idx < DestTy->getNumElements(); Idx += 2) {
125+
LowbitsIndex.push_back(Idx);
126+
HighbitsIndex.push_back(Idx + 1);
127+
}
128+
129+
Value *Arg0 = Builder->CreateShuffleVector(bitcast, LowbitsIndex);
130+
Value *Arg1 = Builder->CreateShuffleVector(bitcast, HighbitsIndex);
131+
132+
return std::make_pair(Arg0, Arg1);
133+
};
134+
135+
Value *LastInst = nullptr;
136+
137+
if (CGF->CGM.getTarget().getTriple().isDXIL()) {
138+
139+
llvm::Type *RetElementTy = CGF->Int32Ty;
140+
if (auto *Op0VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>())
141+
RetElementTy = llvm::VectorType::get(
142+
CGF->Int32Ty, ElementCount::getFixed(Op0VecTy->getNumElements()));
143+
auto *RetTy = llvm::StructType::get(RetElementTy, RetElementTy);
144+
145+
CallInst *CI = CGF->Builder.CreateIntrinsic(
146+
RetTy, Intrinsic::dx_splitdouble, {Op0}, nullptr, "hlsl.splitdouble");
147+
148+
Value *Arg0 = CGF->Builder.CreateExtractValue(CI, 0);
149+
Value *Arg1 = CGF->Builder.CreateExtractValue(CI, 1);
150+
151+
CGF->Builder.CreateStore(Arg0, Op1TmpLValue.getAddress());
152+
LastInst = CGF->Builder.CreateStore(Arg1, Op2TmpLValue.getAddress());
153+
154+
} else {
155+
156+
assert(!CGF->CGM.getTarget().getTriple().isDXIL() &&
157+
"For non-DXIL targets we generate the instructions");
158+
159+
if (!Op0->getType()->isVectorTy()) {
160+
FixedVectorType *DestTy = FixedVectorType::get(CGF->Int32Ty, 2);
161+
Value *Bitcast = CGF->Builder.CreateBitCast(Op0, DestTy);
162+
163+
Value *Arg0 = CGF->Builder.CreateExtractElement(Bitcast, 0.0);
164+
Value *Arg1 = CGF->Builder.CreateExtractElement(Bitcast, 1.0);
165+
166+
CGF->Builder.CreateStore(Arg0, Op1TmpLValue.getAddress());
167+
LastInst = CGF->Builder.CreateStore(Arg1, Op2TmpLValue.getAddress());
168+
} else {
169+
170+
const auto *TargTy = E->getArg(0)->getType()->getAs<clang::VectorType>();
171+
172+
int NumElements = TargTy->getNumElements();
173+
174+
FixedVectorType *DestTy = FixedVectorType::get(CGF->Int32Ty, 4);
175+
if (NumElements == 1) {
176+
FixedVectorType *DestTy = FixedVectorType::get(CGF->Int32Ty, 2);
177+
Value *Bitcast = CGF->Builder.CreateBitCast(Op0, DestTy);
178+
179+
Value *Arg0 = CGF->Builder.CreateExtractElement(Bitcast, 0.0);
180+
Value *Arg1 = CGF->Builder.CreateExtractElement(Bitcast, 1.0);
181+
182+
CGF->Builder.CreateStore(Arg0, Op1TmpLValue.getAddress());
183+
LastInst = CGF->Builder.CreateStore(Arg1, Op2TmpLValue.getAddress());
184+
} else if (NumElements == 2) {
185+
auto [LowBits, HighBits] = EmitVectorCode(Op0, &CGF->Builder, DestTy);
186+
187+
CGF->Builder.CreateStore(LowBits, Op1TmpLValue.getAddress());
188+
LastInst =
189+
CGF->Builder.CreateStore(HighBits, Op2TmpLValue.getAddress());
190+
} else {
191+
192+
SmallVector<std::pair<Value *, Value *>> EmitedValuePairs;
193+
194+
int isOdd = NumElements % 2;
195+
int NumEvenElements = NumElements - isOdd;
196+
197+
for (int It = 0; It < NumEvenElements; It += 2) {
198+
auto Shuff = CGF->Builder.CreateShuffleVector(Op0, {It, It + 1});
199+
std::pair<Value *, Value *> ValuePair =
200+
EmitVectorCode(Shuff, &CGF->Builder, DestTy);
201+
EmitedValuePairs.push_back(ValuePair);
202+
}
203+
204+
if (isOdd == 1) {
205+
FixedVectorType *DestTy = FixedVectorType::get(CGF->Int32Ty, 2);
206+
auto Shuff = CGF->Builder.CreateShuffleVector(Op0, {NumEvenElements});
207+
std::pair<Value *, Value *> ValuePair =
208+
EmitVectorCode(Shuff, &CGF->Builder, DestTy);
209+
EmitedValuePairs.push_back(ValuePair);
210+
}
211+
212+
SmallVector<int> Index = {0, 1};
213+
214+
auto arg0 = EmitedValuePairs[0].first;
215+
auto arg1 = EmitedValuePairs[0].second;
216+
217+
auto EvenSizedPairs = EmitedValuePairs.size() - isOdd;
218+
219+
for (int It = 1; It < EvenSizedPairs; It++) {
220+
int CurIndexSize = Index.size();
221+
Index.insert(Index.end(), {CurIndexSize, CurIndexSize + 1});
222+
arg0 = CGF->Builder.CreateShuffleVector(
223+
arg0, EmitedValuePairs[It].first, Index);
224+
arg1 = CGF->Builder.CreateShuffleVector(
225+
arg1, EmitedValuePairs[It].second, Index);
226+
}
227+
228+
if (isOdd == 1) {
229+
int CurIndexSize = Index.size();
230+
231+
auto extendedLowerBits = CGF->Builder.CreateShuffleVector(
232+
EmitedValuePairs[EvenSizedPairs].first, {0, 0});
233+
234+
auto extendedHighBits = CGF->Builder.CreateShuffleVector(
235+
EmitedValuePairs[EvenSizedPairs].second, {0, 0});
236+
Index.insert(Index.end(), {CurIndexSize});
237+
238+
arg0 =
239+
CGF->Builder.CreateShuffleVector(arg0, extendedLowerBits, Index);
240+
arg1 =
241+
CGF->Builder.CreateShuffleVector(arg1, extendedHighBits, Index);
242+
}
243+
244+
CGF->Builder.CreateStore(arg0, Op1TmpLValue.getAddress());
245+
LastInst = CGF->Builder.CreateStore(arg1, Op2TmpLValue.getAddress());
246+
}
247+
}
248+
}
249+
CGF->EmitWritebacks(*CGF, Args);
250+
return LastInst;
251+
}
252+
101253
/// getBuiltinLibFunction - Given a builtin id for a function like
102254
/// "__builtin_fabsf", return a Function* for "fabsf".
103255
llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
@@ -18955,92 +19107,13 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: {
1895519107
CGM.getHLSLRuntime().getRadiansIntrinsic(), ArrayRef<Value *>{Op0},
1895619108
nullptr, "hlsl.radians");
1895719109
}
18958-
// This should only be called when targeting DXIL
18959-
case Builtin::BI__builtin_hlsl_splitdouble: {
19110+
case Builtin::BI__builtin_hlsl_elementwise_splitdouble: {
1896019111

1896119112
assert((E->getArg(0)->getType()->hasFloatingRepresentation() &&
1896219113
E->getArg(1)->getType()->hasUnsignedIntegerRepresentation() &&
1896319114
E->getArg(2)->getType()->hasUnsignedIntegerRepresentation()) &&
1896419115
"asuint operands types mismatch");
18965-
Value *Op0 = EmitScalarExpr(E->getArg(0));
18966-
const HLSLOutArgExpr *OutArg1 = dyn_cast<HLSLOutArgExpr>(E->getArg(1));
18967-
const HLSLOutArgExpr *OutArg2 = dyn_cast<HLSLOutArgExpr>(E->getArg(2));
18968-
18969-
CallArgList Args;
18970-
auto [Op1BaseLValue, Op1TmpLValue] =
18971-
EmitHLSLOutArgExpr(OutArg1, Args, OutArg1->getType());
18972-
auto [Op2BaseLValue, Op2TmpLValue] =
18973-
EmitHLSLOutArgExpr(OutArg2, Args, OutArg2->getType());
18974-
18975-
if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil) {
18976-
18977-
llvm::StructType *retType = llvm::StructType::get(Int32Ty, Int32Ty);
18978-
18979-
if (Op0->getType()->isVectorTy()) {
18980-
auto *Op0VecTy = E->getArg(0)->getType()->getAs<VectorType>();
18981-
18982-
llvm::VectorType *i32VecTy = llvm::VectorType::get(
18983-
Int32Ty, ElementCount::getFixed(Op0VecTy->getNumElements()));
18984-
retType = llvm::StructType::get(i32VecTy, i32VecTy);
18985-
}
18986-
18987-
CallInst *CI =
18988-
Builder.CreateIntrinsic(retType, Intrinsic::dx_splitdouble, {Op0},
18989-
nullptr, "hlsl.splitdouble");
18990-
18991-
Value *arg0 = Builder.CreateExtractValue(CI, 0);
18992-
Value *arg1 = Builder.CreateExtractValue(CI, 1);
18993-
18994-
Builder.CreateStore(arg0, Op1TmpLValue.getAddress());
18995-
auto *s = Builder.CreateStore(arg1, Op2TmpLValue.getAddress());
18996-
18997-
EmitWritebacks(*this, Args);
18998-
return s;
18999-
}
19000-
19001-
19002-
if(!Op0->getType()->isVectorTy()){
19003-
FixedVectorType *destTy = FixedVectorType::get(Int32Ty, 2);
19004-
Value *bitcast = Builder.CreateBitCast(Op0, destTy);
19005-
19006-
Value *arg0 = Builder.CreateExtractElement(bitcast, 0.0);
19007-
Value *arg1 = Builder.CreateExtractElement(bitcast, 1.0);
19008-
19009-
Builder.CreateStore(arg0, Op1TmpLValue.getAddress());
19010-
auto *s = Builder.CreateStore(arg1, Op2TmpLValue.getAddress());
19011-
19012-
EmitWritebacks(*this, Args);
19013-
return s;
19014-
}
19015-
19016-
auto *Op0VecTy = E->getArg(0)->getType()->getAs<VectorType>();
19017-
19018-
int numElements = Op0VecTy -> getNumElements() * 2;
19019-
19020-
FixedVectorType *destTy = FixedVectorType::get(Int32Ty, numElements);
19021-
19022-
Value *bitcast = Builder.CreateBitCast(Op0, destTy);
19023-
19024-
SmallVector<int> lowbitsIndex;
19025-
SmallVector<int> highbitsIndex;
19026-
19027-
for(int idx = 0; idx < numElements; idx += 2){
19028-
lowbitsIndex.push_back(idx);
19029-
}
19030-
19031-
for(int idx = 1; idx < numElements; idx += 2){
19032-
highbitsIndex.push_back(idx);
19033-
}
19034-
19035-
Value *arg0 = Builder.CreateShuffleVector(bitcast, lowbitsIndex);
19036-
Value *arg1 = Builder.CreateShuffleVector(bitcast, highbitsIndex);
19037-
19038-
Builder.CreateStore(arg0, Op1TmpLValue.getAddress());
19039-
auto *s = Builder.CreateStore(arg1, Op2TmpLValue.getAddress());
19040-
19041-
EmitWritebacks(*this, Args);
19042-
return s;
19043-
19116+
return handleHlslSplitdouble(E, this);
1904419117
}
1904519118
}
1904619119
return nullptr;

clang/lib/CodeGen/CGCall.cpp

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#include "llvm/IR/IntrinsicInst.h"
4141
#include "llvm/IR/Intrinsics.h"
4242
#include "llvm/IR/Type.h"
43+
#include "llvm/Support/Path.h"
4344
#include "llvm/Transforms/Utils/Local.h"
4445
#include <optional>
4546
using namespace clang;
@@ -4197,7 +4198,7 @@ static void emitWriteback(CodeGenFunction &CGF,
41974198
// Release the old value.
41984199
CGF.EmitARCRelease(oldValue, srcLV.isARCPreciseLifetime());
41994200

4200-
// Otherwise, we can just do a normal lvalue store.
4201+
// Otherwise, we can just do a normal lvalue store.
42014202
} else {
42024203
CGF.EmitStoreThroughLValue(RValue::get(value), srcLV);
42034204
}
@@ -4207,12 +4208,6 @@ static void emitWriteback(CodeGenFunction &CGF,
42074208
CGF.EmitBlock(contBB);
42084209
}
42094210

4210-
static void emitWritebacks(CodeGenFunction &CGF,
4211-
const CallArgList &args) {
4212-
for (const auto &I : args.writebacks())
4213-
emitWriteback(CGF, I);
4214-
}
4215-
42164211
static void deactivateArgCleanupsBeforeCall(CodeGenFunction &CGF,
42174212
const CallArgList &CallArgs) {
42184213
ArrayRef<CallArgList::CallArgCleanup> Cleanups =
@@ -4683,7 +4678,8 @@ void CallArg::copyInto(CodeGenFunction &CGF, Address Addr) const {
46834678

46844679
void CodeGenFunction::EmitWritebacks(CodeGenFunction &CGF,
46854680
const CallArgList &args) {
4686-
emitWritebacks(CGF, args);
4681+
for (const auto &I : args.writebacks())
4682+
emitWriteback(CGF, I);
46874683
}
46884684

46894685
void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
@@ -5898,7 +5894,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
58985894
// Emit any call-associated writebacks immediately. Arguably this
58995895
// should happen after any return-value munging.
59005896
if (CallArgs.hasWritebacks())
5901-
emitWritebacks(*this, CallArgs);
5897+
CodeGenFunction::EmitWritebacks(*this, CallArgs);
59025898

59035899
// The stack cleanup for inalloca arguments has to run out of the normal
59045900
// lexical order, so deactivate it and run it manually here.

clang/lib/CodeGen/CGExpr.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5478,9 +5478,8 @@ CodeGenFunction::EmitHLSLOutArgLValues(const HLSLOutArgExpr *E, QualType Ty) {
54785478
return std::make_pair(BaseLV, TempLV);
54795479
}
54805480

5481-
std::pair<LValue, LValue>
5482-
CodeGenFunction::EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, CallArgList &Args,
5483-
QualType Ty) {
5481+
LValue CodeGenFunction::EmitHLSLOutArgExpr(const HLSLOutArgExpr *E,
5482+
CallArgList &Args, QualType Ty) {
54845483

54855484
auto [BaseLV, TempLV] = EmitHLSLOutArgLValues(E, Ty);
54865485

@@ -5495,7 +5494,7 @@ CodeGenFunction::EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, CallArgList &Args,
54955494
Args.addWriteback(BaseLV, TmpAddr, nullptr, E->getWritebackCast(),
54965495
LifetimeSize);
54975496
Args.add(RValue::get(TmpAddr, *this), Ty);
5498-
return std::make_pair(BaseLV, TempLV);
5497+
return TempLV;
54995498
}
55005499

55015500
LValue

0 commit comments

Comments
 (0)