|
17 | 17 | #include "CGObjCRuntime.h" |
18 | 18 | #include "CGOpenCLRuntime.h" |
19 | 19 | #include "CGRecordLayout.h" |
| 20 | +#include "CGValue.h" |
20 | 21 | #include "CodeGenFunction.h" |
21 | 22 | #include "CodeGenModule.h" |
22 | 23 | #include "ConstantEmitter.h" |
|
25 | 26 | #include "clang/AST/ASTContext.h" |
26 | 27 | #include "clang/AST/Attr.h" |
27 | 28 | #include "clang/AST/Decl.h" |
| 29 | +#include "clang/AST/Expr.h" |
28 | 30 | #include "clang/AST/OSLog.h" |
29 | 31 | #include "clang/AST/OperationKinds.h" |
| 32 | +#include "clang/AST/Type.h" |
30 | 33 | #include "clang/Basic/TargetBuiltins.h" |
31 | 34 | #include "clang/Basic/TargetInfo.h" |
32 | 35 | #include "clang/Basic/TargetOptions.h" |
33 | 36 | #include "clang/CodeGen/CGFunctionInfo.h" |
34 | 37 | #include "clang/Frontend/FrontendDiagnostic.h" |
35 | 38 | #include "llvm/ADT/APFloat.h" |
36 | 39 | #include "llvm/ADT/APInt.h" |
37 | | -#include "llvm/ADT/ArrayRef.h" |
38 | 40 | #include "llvm/ADT/FloatingPointMode.h" |
39 | 41 | #include "llvm/ADT/SmallPtrSet.h" |
40 | | -#include "llvm/ADT/SmallVector.h" |
41 | 42 | #include "llvm/ADT/StringExtras.h" |
42 | 43 | #include "llvm/Analysis/ValueTracking.h" |
43 | 44 | #include "llvm/IR/DataLayout.h" |
44 | | -#include "llvm/IR/DerivedTypes.h" |
45 | 45 | #include "llvm/IR/InlineAsm.h" |
46 | 46 | #include "llvm/IR/Intrinsics.h" |
47 | 47 | #include "llvm/IR/IntrinsicsAArch64.h" |
|
70 | 70 | #include "llvm/TargetParser/X86TargetParser.h" |
71 | 71 | #include <optional> |
72 | 72 | #include <sstream> |
| 73 | +#include <utility> |
73 | 74 |
|
74 | 75 | using namespace clang; |
75 | 76 | using namespace CodeGen; |
@@ -98,6 +99,157 @@ static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, |
98 | 99 | I->addAnnotationMetadata("auto-init"); |
99 | 100 | } |
100 | 101 |
|
| 102 | +static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) { |
| 103 | + Value *Op0 = CGF->EmitScalarExpr(E->getArg(0)); |
| 104 | + const auto *OutArg1 = dyn_cast<HLSLOutArgExpr>(E->getArg(1)); |
| 105 | + const auto *OutArg2 = dyn_cast<HLSLOutArgExpr>(E->getArg(2)); |
| 106 | + |
| 107 | + CallArgList Args; |
| 108 | + LValue Op1TmpLValue = |
| 109 | + CGF->EmitHLSLOutArgExpr(OutArg1, Args, OutArg1->getType()); |
| 110 | + LValue Op2TmpLValue = |
| 111 | + CGF->EmitHLSLOutArgExpr(OutArg2, Args, OutArg2->getType()); |
| 112 | + |
| 113 | + if (CGF->getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) |
| 114 | + Args.reverseWritebacks(); |
| 115 | + |
| 116 | + auto EmitVectorCode = |
| 117 | + [](Value *Op, CGBuilderTy *Builder, |
| 118 | + FixedVectorType *DestTy) -> std::pair<Value *, Value *> { |
| 119 | + Value *bitcast = Builder->CreateBitCast(Op, DestTy); |
| 120 | + |
| 121 | + SmallVector<int> LowbitsIndex; |
| 122 | + SmallVector<int> HighbitsIndex; |
| 123 | + |
| 124 | + for (unsigned int Idx = 0; Idx < DestTy->getNumElements(); Idx += 2) { |
| 125 | + LowbitsIndex.push_back(Idx); |
| 126 | + HighbitsIndex.push_back(Idx + 1); |
| 127 | + } |
| 128 | + |
| 129 | + Value *Arg0 = Builder->CreateShuffleVector(bitcast, LowbitsIndex); |
| 130 | + Value *Arg1 = Builder->CreateShuffleVector(bitcast, HighbitsIndex); |
| 131 | + |
| 132 | + return std::make_pair(Arg0, Arg1); |
| 133 | + }; |
| 134 | + |
| 135 | + Value *LastInst = nullptr; |
| 136 | + |
| 137 | + if (CGF->CGM.getTarget().getTriple().isDXIL()) { |
| 138 | + |
| 139 | + llvm::Type *RetElementTy = CGF->Int32Ty; |
| 140 | + if (auto *Op0VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) |
| 141 | + RetElementTy = llvm::VectorType::get( |
| 142 | + CGF->Int32Ty, ElementCount::getFixed(Op0VecTy->getNumElements())); |
| 143 | + auto *RetTy = llvm::StructType::get(RetElementTy, RetElementTy); |
| 144 | + |
| 145 | + CallInst *CI = CGF->Builder.CreateIntrinsic( |
| 146 | + RetTy, Intrinsic::dx_splitdouble, {Op0}, nullptr, "hlsl.splitdouble"); |
| 147 | + |
| 148 | + Value *Arg0 = CGF->Builder.CreateExtractValue(CI, 0); |
| 149 | + Value *Arg1 = CGF->Builder.CreateExtractValue(CI, 1); |
| 150 | + |
| 151 | + CGF->Builder.CreateStore(Arg0, Op1TmpLValue.getAddress()); |
| 152 | + LastInst = CGF->Builder.CreateStore(Arg1, Op2TmpLValue.getAddress()); |
| 153 | + |
| 154 | + } else { |
| 155 | + |
| 156 | + assert(!CGF->CGM.getTarget().getTriple().isDXIL() && |
| 157 | + "For non-DXIL targets we generate the instructions"); |
| 158 | + |
| 159 | + if (!Op0->getType()->isVectorTy()) { |
| 160 | + FixedVectorType *DestTy = FixedVectorType::get(CGF->Int32Ty, 2); |
| 161 | + Value *Bitcast = CGF->Builder.CreateBitCast(Op0, DestTy); |
| 162 | + |
| 163 | + Value *Arg0 = CGF->Builder.CreateExtractElement(Bitcast, 0.0); |
| 164 | + Value *Arg1 = CGF->Builder.CreateExtractElement(Bitcast, 1.0); |
| 165 | + |
| 166 | + CGF->Builder.CreateStore(Arg0, Op1TmpLValue.getAddress()); |
| 167 | + LastInst = CGF->Builder.CreateStore(Arg1, Op2TmpLValue.getAddress()); |
| 168 | + } else { |
| 169 | + |
| 170 | + const auto *TargTy = E->getArg(0)->getType()->getAs<clang::VectorType>(); |
| 171 | + |
| 172 | + int NumElements = TargTy->getNumElements(); |
| 173 | + |
| 174 | + FixedVectorType *DestTy = FixedVectorType::get(CGF->Int32Ty, 4); |
| 175 | + if (NumElements == 1) { |
| 176 | + FixedVectorType *DestTy = FixedVectorType::get(CGF->Int32Ty, 2); |
| 177 | + Value *Bitcast = CGF->Builder.CreateBitCast(Op0, DestTy); |
| 178 | + |
| 179 | + Value *Arg0 = CGF->Builder.CreateExtractElement(Bitcast, 0.0); |
| 180 | + Value *Arg1 = CGF->Builder.CreateExtractElement(Bitcast, 1.0); |
| 181 | + |
| 182 | + CGF->Builder.CreateStore(Arg0, Op1TmpLValue.getAddress()); |
| 183 | + LastInst = CGF->Builder.CreateStore(Arg1, Op2TmpLValue.getAddress()); |
| 184 | + } else if (NumElements == 2) { |
| 185 | + auto [LowBits, HighBits] = EmitVectorCode(Op0, &CGF->Builder, DestTy); |
| 186 | + |
| 187 | + CGF->Builder.CreateStore(LowBits, Op1TmpLValue.getAddress()); |
| 188 | + LastInst = |
| 189 | + CGF->Builder.CreateStore(HighBits, Op2TmpLValue.getAddress()); |
| 190 | + } else { |
| 191 | + |
| 192 | + SmallVector<std::pair<Value *, Value *>> EmitedValuePairs; |
| 193 | + |
| 194 | + int isOdd = NumElements % 2; |
| 195 | + int NumEvenElements = NumElements - isOdd; |
| 196 | + |
| 197 | + for (int It = 0; It < NumEvenElements; It += 2) { |
| 198 | + auto Shuff = CGF->Builder.CreateShuffleVector(Op0, {It, It + 1}); |
| 199 | + std::pair<Value *, Value *> ValuePair = |
| 200 | + EmitVectorCode(Shuff, &CGF->Builder, DestTy); |
| 201 | + EmitedValuePairs.push_back(ValuePair); |
| 202 | + } |
| 203 | + |
| 204 | + if (isOdd == 1) { |
| 205 | + FixedVectorType *DestTy = FixedVectorType::get(CGF->Int32Ty, 2); |
| 206 | + auto Shuff = CGF->Builder.CreateShuffleVector(Op0, {NumEvenElements}); |
| 207 | + std::pair<Value *, Value *> ValuePair = |
| 208 | + EmitVectorCode(Shuff, &CGF->Builder, DestTy); |
| 209 | + EmitedValuePairs.push_back(ValuePair); |
| 210 | + } |
| 211 | + |
| 212 | + SmallVector<int> Index = {0, 1}; |
| 213 | + |
| 214 | + auto arg0 = EmitedValuePairs[0].first; |
| 215 | + auto arg1 = EmitedValuePairs[0].second; |
| 216 | + |
| 217 | + auto EvenSizedPairs = EmitedValuePairs.size() - isOdd; |
| 218 | + |
| 219 | + for (int It = 1; It < EvenSizedPairs; It++) { |
| 220 | + int CurIndexSize = Index.size(); |
| 221 | + Index.insert(Index.end(), {CurIndexSize, CurIndexSize + 1}); |
| 222 | + arg0 = CGF->Builder.CreateShuffleVector( |
| 223 | + arg0, EmitedValuePairs[It].first, Index); |
| 224 | + arg1 = CGF->Builder.CreateShuffleVector( |
| 225 | + arg1, EmitedValuePairs[It].second, Index); |
| 226 | + } |
| 227 | + |
| 228 | + if (isOdd == 1) { |
| 229 | + int CurIndexSize = Index.size(); |
| 230 | + |
| 231 | + auto extendedLowerBits = CGF->Builder.CreateShuffleVector( |
| 232 | + EmitedValuePairs[EvenSizedPairs].first, {0, 0}); |
| 233 | + |
| 234 | + auto extendedHighBits = CGF->Builder.CreateShuffleVector( |
| 235 | + EmitedValuePairs[EvenSizedPairs].second, {0, 0}); |
| 236 | + Index.insert(Index.end(), {CurIndexSize}); |
| 237 | + |
| 238 | + arg0 = |
| 239 | + CGF->Builder.CreateShuffleVector(arg0, extendedLowerBits, Index); |
| 240 | + arg1 = |
| 241 | + CGF->Builder.CreateShuffleVector(arg1, extendedHighBits, Index); |
| 242 | + } |
| 243 | + |
| 244 | + CGF->Builder.CreateStore(arg0, Op1TmpLValue.getAddress()); |
| 245 | + LastInst = CGF->Builder.CreateStore(arg1, Op2TmpLValue.getAddress()); |
| 246 | + } |
| 247 | + } |
| 248 | + } |
| 249 | + CGF->EmitWritebacks(*CGF, Args); |
| 250 | + return LastInst; |
| 251 | +} |
| 252 | + |
101 | 253 | /// getBuiltinLibFunction - Given a builtin id for a function like |
102 | 254 | /// "__builtin_fabsf", return a Function* for "fabsf". |
103 | 255 | llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, |
@@ -18955,92 +19107,13 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: { |
18955 | 19107 | CGM.getHLSLRuntime().getRadiansIntrinsic(), ArrayRef<Value *>{Op0}, |
18956 | 19108 | nullptr, "hlsl.radians"); |
18957 | 19109 | } |
18958 | | - // This should only be called when targeting DXIL |
18959 | | - case Builtin::BI__builtin_hlsl_splitdouble: { |
| 19110 | + case Builtin::BI__builtin_hlsl_elementwise_splitdouble: { |
18960 | 19111 |
|
18961 | 19112 | assert((E->getArg(0)->getType()->hasFloatingRepresentation() && |
18962 | 19113 | E->getArg(1)->getType()->hasUnsignedIntegerRepresentation() && |
18963 | 19114 | E->getArg(2)->getType()->hasUnsignedIntegerRepresentation()) && |
18964 | 19115 | "asuint operands types mismatch"); |
18965 | | - Value *Op0 = EmitScalarExpr(E->getArg(0)); |
18966 | | - const HLSLOutArgExpr *OutArg1 = dyn_cast<HLSLOutArgExpr>(E->getArg(1)); |
18967 | | - const HLSLOutArgExpr *OutArg2 = dyn_cast<HLSLOutArgExpr>(E->getArg(2)); |
18968 | | - |
18969 | | - CallArgList Args; |
18970 | | - auto [Op1BaseLValue, Op1TmpLValue] = |
18971 | | - EmitHLSLOutArgExpr(OutArg1, Args, OutArg1->getType()); |
18972 | | - auto [Op2BaseLValue, Op2TmpLValue] = |
18973 | | - EmitHLSLOutArgExpr(OutArg2, Args, OutArg2->getType()); |
18974 | | - |
18975 | | - if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil) { |
18976 | | - |
18977 | | - llvm::StructType *retType = llvm::StructType::get(Int32Ty, Int32Ty); |
18978 | | - |
18979 | | - if (Op0->getType()->isVectorTy()) { |
18980 | | - auto *Op0VecTy = E->getArg(0)->getType()->getAs<VectorType>(); |
18981 | | - |
18982 | | - llvm::VectorType *i32VecTy = llvm::VectorType::get( |
18983 | | - Int32Ty, ElementCount::getFixed(Op0VecTy->getNumElements())); |
18984 | | - retType = llvm::StructType::get(i32VecTy, i32VecTy); |
18985 | | - } |
18986 | | - |
18987 | | - CallInst *CI = |
18988 | | - Builder.CreateIntrinsic(retType, Intrinsic::dx_splitdouble, {Op0}, |
18989 | | - nullptr, "hlsl.splitdouble"); |
18990 | | - |
18991 | | - Value *arg0 = Builder.CreateExtractValue(CI, 0); |
18992 | | - Value *arg1 = Builder.CreateExtractValue(CI, 1); |
18993 | | - |
18994 | | - Builder.CreateStore(arg0, Op1TmpLValue.getAddress()); |
18995 | | - auto *s = Builder.CreateStore(arg1, Op2TmpLValue.getAddress()); |
18996 | | - |
18997 | | - EmitWritebacks(*this, Args); |
18998 | | - return s; |
18999 | | - } |
19000 | | - |
19001 | | - |
19002 | | - if(!Op0->getType()->isVectorTy()){ |
19003 | | - FixedVectorType *destTy = FixedVectorType::get(Int32Ty, 2); |
19004 | | - Value *bitcast = Builder.CreateBitCast(Op0, destTy); |
19005 | | - |
19006 | | - Value *arg0 = Builder.CreateExtractElement(bitcast, 0.0); |
19007 | | - Value *arg1 = Builder.CreateExtractElement(bitcast, 1.0); |
19008 | | - |
19009 | | - Builder.CreateStore(arg0, Op1TmpLValue.getAddress()); |
19010 | | - auto *s = Builder.CreateStore(arg1, Op2TmpLValue.getAddress()); |
19011 | | - |
19012 | | - EmitWritebacks(*this, Args); |
19013 | | - return s; |
19014 | | - } |
19015 | | - |
19016 | | - auto *Op0VecTy = E->getArg(0)->getType()->getAs<VectorType>(); |
19017 | | - |
19018 | | - int numElements = Op0VecTy -> getNumElements() * 2; |
19019 | | - |
19020 | | - FixedVectorType *destTy = FixedVectorType::get(Int32Ty, numElements); |
19021 | | - |
19022 | | - Value *bitcast = Builder.CreateBitCast(Op0, destTy); |
19023 | | - |
19024 | | - SmallVector<int> lowbitsIndex; |
19025 | | - SmallVector<int> highbitsIndex; |
19026 | | - |
19027 | | - for(int idx = 0; idx < numElements; idx += 2){ |
19028 | | - lowbitsIndex.push_back(idx); |
19029 | | - } |
19030 | | - |
19031 | | - for(int idx = 1; idx < numElements; idx += 2){ |
19032 | | - highbitsIndex.push_back(idx); |
19033 | | - } |
19034 | | - |
19035 | | - Value *arg0 = Builder.CreateShuffleVector(bitcast, lowbitsIndex); |
19036 | | - Value *arg1 = Builder.CreateShuffleVector(bitcast, highbitsIndex); |
19037 | | - |
19038 | | - Builder.CreateStore(arg0, Op1TmpLValue.getAddress()); |
19039 | | - auto *s = Builder.CreateStore(arg1, Op2TmpLValue.getAddress()); |
19040 | | - |
19041 | | - EmitWritebacks(*this, Args); |
19042 | | - return s; |
19043 | | - |
| 19116 | + return handleHlslSplitdouble(E, this); |
19044 | 19117 | } |
19045 | 19118 | } |
19046 | 19119 | return nullptr; |
|
0 commit comments