Skip to content

Commit 25ade5b

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web'
2 parents 97d30eb + bbf4d5d commit 25ade5b

File tree

64 files changed

+1875
-2872
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+1875
-2872
lines changed

clang/lib/Driver/ToolChains/SYCL.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1088,7 +1088,8 @@ SYCLToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
10881088
bool Unsupported = false;
10891089
for (OptSpecifier UnsupportedOpt : getUnsupportedOpts()) {
10901090
if (Opt.matches(UnsupportedOpt)) {
1091-
if (A->getValues().size() == 1) {
1091+
if (Opt.getID() == options::OPT_fsanitize_EQ &&
1092+
A->getValues().size() == 1) {
10921093
std::string SanitizeVal = A->getValue();
10931094
if (SanitizeVal == "address") {
10941095
if (IsNewDAL)
@@ -1246,6 +1247,11 @@ void SYCLToolChain::AddImpliedTargetArgs(const llvm::Triple &Triple,
12461247
if (Arg *A = Args.getLastArg(options::OPT_O_Group))
12471248
if (A->getOption().matches(options::OPT_O0))
12481249
BeArgs.push_back("-cl-opt-disable");
1250+
// In precise floating-point mode we pass the OpenCL flag forcing division to
1251+
// be correctly rounded.
1252+
if (Arg *A = Args.getLastArg(options::OPT_ffp_model_EQ))
1253+
if (StringRef{A->getValue()}.equals("precise"))
1254+
BeArgs.push_back("-cl-fp32-correctly-rounded-divide-sqrt");
12491255
StringRef RegAllocModeOptName = "-ftarget-register-alloc-mode=";
12501256
if (Arg *A = Args.getLastArg(options::OPT_ftarget_register_alloc_mode_EQ)) {
12511257
StringRef RegAllocModeVal = A->getValue(0);

clang/test/Driver/sycl-offload-aot.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,18 @@
220220
// RUN: | FileCheck -check-prefix=CHK-TOOLS-IMPLIED-OPTS-GEN %s
221221
// CHK-TOOLS-IMPLIED-OPTS-GEN: ocloc{{.*}} "-options" "-g -cl-opt-disable" "-DFOO1" "-DFOO2"
222222

223+
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64_fpga-unknown-unknown -ffp-model=precise -Xsycl-target-backend "-DFOO1 -DFOO2" %s 2>&1 \
224+
// RUN: | FileCheck -check-prefix=CHK-TOOLS-IMPLIED-ROUNDING-FPGA %s
225+
// CHK-TOOLS-IMPLIED-ROUNDING-FPGA: opencl-aot{{.*}} "--bo=-cl-fp32-correctly-rounded-divide-sqrt" "-DFOO1" "-DFOO2"
226+
227+
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64_x86_64-unknown-unknown -ffp-model=precise -Xsycl-target-backend "-DFOO1 -DFOO2" %s 2>&1 \
228+
// RUN: | FileCheck -check-prefix=CHK-TOOLS-IMPLIED-ROUNDING-CPU %s
229+
// CHK-TOOLS-IMPLIED-ROUNDING-CPU: opencl-aot{{.*}} "--bo=-cl-fp32-correctly-rounded-divide-sqrt" "-DFOO1" "-DFOO2"
230+
231+
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64_gen-unknown-unknown -ffp-model=precise -Xsycl-target-backend "-DFOO1 -DFOO2" %s 2>&1 \
232+
// RUN: | FileCheck -check-prefix=CHK-TOOLS-IMPLIED-ROUNDING-GEN %s
233+
// CHK-TOOLS-IMPLIED-ROUNDING-GEN: ocloc{{.*}} "-options" "-cl-fp32-correctly-rounded-divide-sqrt" "-DFOO1" "-DFOO2"
234+
223235
/// Check -Xsycl-target-linker option passing
224236
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64_fpga-unknown-unknown -Xshardware -Xsycl-target-linker "-DFOO1 -DFOO2" %s 2>&1 \
225237
// RUN: | FileCheck -check-prefix=CHK-TOOLS-FPGA-OPTS2 %s

clang/test/Driver/sycl-offload.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,11 @@
424424
// CHK-TOOLS-IMPLIED-OPTS-O0-NOT: clang-offload-wrapper{{.*}} "-compile-opts={{.*}}-cl-opt-disable"
425425
// CHK-TOOLS-IMPLIED-OPTS-O2-NOT: clang-offload-wrapper{{.*}} "-compile-opts={{.*}}-cl-opt-disable"
426426

427+
/// Check for implied options (-ffp-model=precise)
428+
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64 -ffp-model=precise %s 2>&1 \
429+
// RUN: | FileCheck -check-prefix=CHK-TOOLS-IMPLIED-ROUNDING %s
430+
// CHK-TOOLS-IMPLIED-ROUNDING: clang-offload-wrapper{{.*}} "-compile-opts={{.*}}-cl-fp32-correctly-rounded-divide-sqrt
431+
427432
// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64-unknown-unknown -Xsycl-target-linker "-DFOO1 -DFOO2" %s 2>&1 \
428433
// RUN: | FileCheck -check-prefix=CHK-TOOLS-OPTS2 %s
429434
// CHK-TOOLS-OPTS2: clang-offload-wrapper{{.*}} "-link-opts=-DFOO1 -DFOO2"

clang/test/Driver/sycl-unsupported.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,12 @@
3535
// RUN: -check-prefixes=UNSUPPORTED_OPT_DIAG,UNSUPPORTED_OPT
3636
// RUN: %clangxx -fsycl -forder-file-instrumentation -### %s 2>&1 \
3737
// RUN: | FileCheck %s -DARCH=spir64 -DOPT=-forder-file-instrumentation
38+
// Check to make sure our '-fsanitize=address' exception isn't triggered by a
39+
// different option
40+
// RUN: %clangxx -fsycl -fprofile-instr-generate=address -### %s 2>&1 \
41+
// RUN: | FileCheck %s -DARCH=spir64 -DOPT=-fprofile-instr-generate=address \
42+
// RUN: -DOPT_CC1=-fprofile-instrument=clang \
43+
// RUN: -check-prefixes=UNSUPPORTED_OPT_DIAG,UNSUPPORTED_OPT
3844

3945
// CHECK: ignoring '[[OPT]]' option as it is not currently supported for target '[[ARCH]]{{.*}}' [-Woption-ignored]
4046
// CHECK-NOT: clang{{.*}} "-fsycl-is-device"{{.*}} "[[OPT]]{{.*}}"

llvm/include/llvm/SYCLLowerIR/GlobalOffset.h

Lines changed: 28 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "llvm/IR/Module.h"
1313
#include "llvm/IR/PassManager.h"
1414
#include "llvm/SYCLLowerIR/TargetHelpers.h"
15+
#include "llvm/Transforms/Utils/Cloning.h"
1516

1617
namespace llvm {
1718

@@ -38,41 +39,38 @@ class GlobalOffsetPass : public PassInfoMixin<GlobalOffsetPass> {
3839
/// `Func` belongs, contains both the original function and its clone with the
3940
/// signature extended with the implicit offset parameter and `_with_offset`
4041
/// appended to the name.
41-
/// An alloca of 3 zeros (corresponding to offsets in x, y and z) is added to
42-
/// the original kernel, in order to keep the interface of kernel's call
43-
/// graph unified, regardless of the fact if the global offset has been used.
4442
///
4543
/// \param Func Kernel to be processed.
4644
void processKernelEntryPoint(Function *Func);
4745

48-
/// This function adds an implicit parameter to the function containing a
49-
/// call instruction to the implicit offset intrinsic or another function
50-
/// (which eventually calls the instrinsic). If the call instruction is to
51-
/// the implicit offset intrinsic, then the intrinisic is replaced with the
52-
/// parameter that was added.
46+
/// For a function containing a call instruction to the implicit offset
47+
/// intrinsic, or another function which eventually calls the intrinsic,
48+
/// this function clones the function and adds an implicit parameter to the
49+
/// clone.
50+
/// If the call instruction is to the implicit offset intrinsic then the
51+
/// intrinsic inside the cloned function is replaced with the parameter that
52+
/// was added.
5353
///
54-
/// Once the function, say `F`, containing a call to `Callee` has the
55-
/// implicit parameter added, callers of `F` are processed by recursively
56-
/// calling this function, passing `F` to `CalleeWithImplicitParam`.
57-
///
58-
/// Since the cloning of entry points may alter the users of a function, the
59-
/// cloning must be done as early as possible, as to ensure that no users are
60-
/// added to previous callees in the call-tree.
54+
/// Once the clone of a function, say `F`, containing a call to `Callee`
55+
/// has the implicit parameter added, callers of `F` are processed by
56+
/// getting cloned and their clones are processed by recursively calling the
57+
/// clone of 'F', passing `F` to `CalleeWithImplicitParam`.
6158
///
6259
/// \param Callee is the function (to which this transformation has already
6360
/// been applied), or to the implicit offset intrinsic.
6461
///
6562
/// \param CalleeWithImplicitParam indicates whether Callee is to the
6663
/// implicit intrinsic (when `nullptr`) or to another function (not
67-
/// `nullptr`) - this is used to know whether calls to it needs to have the
68-
/// implicit parameter added to it or replaced with the implicit parameter.
64+
/// `nullptr`) - this is used to know whether calls to it inside clones need
65+
/// to have the implicit parameter added to it or be replaced with the
66+
/// implicit parameter.
6967
void addImplicitParameterToCallers(Module &M, Value *Callee,
7068
Function *CalleeWithImplicitParam);
7169

72-
/// For a given function `Func` extend signature to contain an implicit
73-
/// offset argument.
70+
/// For a given function `Func` create a clone and extend its signature to
71+
/// contain an implicit offset argument.
7472
///
75-
/// \param Func A function to add offset to.
73+
/// \param Func A function to be cloned and add offset to.
7674
///
7775
/// \param ImplicitArgumentType Architecture dependant type of the implicit
7876
/// argument holding the global offset.
@@ -81,13 +79,15 @@ class GlobalOffsetPass : public PassInfoMixin<GlobalOffsetPass> {
8179
/// keep it intact and create a clone of it with `_wit_offset` appended to
8280
/// the name.
8381
///
84-
/// \returns A pair of new function with the offset argument added and a
82+
/// \param IsKernel Indicates whether Func is a kernel entry point.
83+
///
84+
/// \returns A pair of the new function with the offset argument added, a
8585
/// pointer to the implicit argument (either a func argument or a bitcast
8686
/// turning it to the correct type).
8787
std::pair<Function *, Value *>
8888
addOffsetArgumentToFunction(Module &M, Function *Func,
8989
Type *ImplicitArgumentType = nullptr,
90-
bool KeepOriginal = false);
90+
bool KeepOriginal = false, bool IsKernel = false);
9191

9292
/// Create a mapping of kernel entry points to their metadata nodes. While
9393
/// iterating over kernels make sure that a given kernel entry point has no
@@ -102,8 +102,12 @@ class GlobalOffsetPass : public PassInfoMixin<GlobalOffsetPass> {
102102
SmallVectorImpl<KernelPayload> &KernelPayloads);
103103

104104
private:
105-
/// Keep track of which functions have been processed to avoid processing
106-
/// twice.
105+
/// Keep track of all cloned offset functions to avoid processing them.
106+
llvm::SmallPtrSet<Function *, 8> Clones;
107+
/// Save clone mappings to obtain pointers to CallInsts during processing.
108+
llvm::ValueToValueMapTy GlobalVMap;
109+
/// Keep track of which non-offset functions have been processed to avoid
110+
/// processing twice.
107111
llvm::DenseMap<Function *, Value *> ProcessedFunctions;
108112
/// Keep a map of all entry point functions with metadata.
109113
llvm::DenseMap<Function *, MDNode *> EntryPointMetadata;

llvm/lib/SYCLLowerIR/GlobalOffset.cpp

Lines changed: 55 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -83,34 +83,7 @@ PreservedAnalyses GlobalOffsetPass::run(Module &M, ModuleAnalysisManager &) {
8383
if (!ImplicitOffsetIntrinsic || ImplicitOffsetIntrinsic->use_empty())
8484
return PreservedAnalyses::all();
8585

86-
if (!EnableGlobalOffset) {
87-
SmallVector<CallInst *, 4> Worklist;
88-
SmallVector<LoadInst *, 4> LI;
89-
SmallVector<Instruction *, 4> PtrUses;
90-
91-
// Collect all GEPs and Loads from the intrinsic's CallInsts
92-
for (Value *V : ImplicitOffsetIntrinsic->users()) {
93-
Worklist.push_back(cast<CallInst>(V));
94-
for (Value *V2 : V->users())
95-
getLoads(cast<Instruction>(V2), PtrUses, LI);
96-
}
97-
98-
// Replace each use of a collected Load with a Constant 0
99-
for (LoadInst *L : LI)
100-
L->replaceAllUsesWith(ConstantInt::get(L->getType(), 0));
101-
102-
// Remove all collected Loads and GEPs from the kernel.
103-
// PtrUses is returned by `getLoads` in topological order.
104-
// Walk it backwards so we don't violate users.
105-
for (auto *I : reverse(PtrUses))
106-
I->eraseFromParent();
107-
108-
// Remove all collected CallInsts from the kernel.
109-
for (CallInst *CI : Worklist) {
110-
auto *I = cast<Instruction>(CI);
111-
I->eraseFromParent();
112-
}
113-
} else {
86+
if (EnableGlobalOffset) {
11487
// For AMD allocas and pointers have to be to CONSTANT_PRIVATE (5), NVVM is
11588
// happy with ADDRESS_SPACE_GENERIC (0).
11689
TargetAS = AT == ArchType::Cuda ? 0 : 5;
@@ -133,6 +106,32 @@ PreservedAnalyses GlobalOffsetPass::run(Module &M, ModuleAnalysisManager &) {
133106
// Add implicit parameters to all direct and indirect users of the offset
134107
addImplicitParameterToCallers(M, ImplicitOffsetIntrinsic, nullptr);
135108
}
109+
SmallVector<CallInst *, 4> Worklist;
110+
SmallVector<LoadInst *, 4> Loads;
111+
SmallVector<Instruction *, 4> PtrUses;
112+
113+
// Collect all GEPs and Loads from the intrinsic's CallInsts
114+
for (Value *V : ImplicitOffsetIntrinsic->users()) {
115+
Worklist.push_back(cast<CallInst>(V));
116+
for (Value *V2 : V->users())
117+
getLoads(cast<Instruction>(V2), PtrUses, Loads);
118+
}
119+
120+
// Replace each use of a collected Load with a Constant 0
121+
for (LoadInst *L : Loads)
122+
L->replaceAllUsesWith(ConstantInt::get(L->getType(), 0));
123+
124+
// Remove all collected Loads and GEPs from the kernel.
125+
// PtrUses is returned by `getLoads` in topological order.
126+
// Walk it backwards so we don't violate users.
127+
for (auto *I : reverse(PtrUses))
128+
I->eraseFromParent();
129+
130+
// Remove all collected CallInsts from the kernel.
131+
for (CallInst *CI : Worklist) {
132+
auto *I = cast<Instruction>(CI);
133+
I->eraseFromParent();
134+
}
136135

137136
// Assert that all uses of `ImplicitOffsetIntrinsic` are removed and delete
138137
// it.
@@ -161,7 +160,8 @@ void GlobalOffsetPass::processKernelEntryPoint(Function *Func) {
161160

162161
auto *NewFunc = addOffsetArgumentToFunction(
163162
M, Func, KernelImplicitArgumentType->getPointerTo(),
164-
/*KeepOriginal=*/true)
163+
/*KeepOriginal=*/true,
164+
/*IsKernel=*/true)
165165
.first;
166166
Argument *NewArgument = std::prev(NewFunc->arg_end());
167167
// Pass byval to the kernel for NVIDIA, AMD's calling convention disallows
@@ -177,62 +177,43 @@ void GlobalOffsetPass::processKernelEntryPoint(Function *Func) {
177177
FuncMetadata->getOperand(1),
178178
FuncMetadata->getOperand(2)};
179179
KernelMetadata->addOperand(MDNode::get(Ctx, NewMetadata));
180-
181-
// Create alloca of zeros for the implicit offset in the original func.
182-
BasicBlock *EntryBlock = &Func->getEntryBlock();
183-
IRBuilder<> Builder(EntryBlock, EntryBlock->getFirstInsertionPt());
184-
Type *ImplicitOffsetType =
185-
ArrayType::get(Type::getInt32Ty(M.getContext()), 3);
186-
AllocaInst *ImplicitOffset =
187-
Builder.CreateAlloca(ImplicitOffsetType, TargetAS);
188-
uint64_t AllocByteSize =
189-
ImplicitOffset->getAllocationSizeInBits(M.getDataLayout()).value() / 8;
190-
CallInst *MemsetCall =
191-
Builder.CreateMemSet(ImplicitOffset, Builder.getInt8(0), AllocByteSize,
192-
ImplicitOffset->getAlign());
193-
MemsetCall->addParamAttr(0, Attribute::NonNull);
194-
MemsetCall->addDereferenceableParamAttr(0, AllocByteSize);
195-
ProcessedFunctions[Func] = Builder.CreateConstInBoundsGEP2_32(
196-
ImplicitOffsetType, ImplicitOffset, 0, 0);
197180
}
198181

199182
void GlobalOffsetPass::addImplicitParameterToCallers(
200183
Module &M, Value *Callee, Function *CalleeWithImplicitParam) {
201-
202-
// Make sure that all entry point callers are processed.
203184
SmallVector<User *, 8> Users{Callee->users()};
204-
for (User *U : Users) {
205-
auto *Call = dyn_cast<CallInst>(U);
206-
if (!Call)
207-
continue;
208185

209-
Function *Caller = Call->getFunction();
210-
if (EntryPointMetadata.count(Caller) != 0) {
211-
processKernelEntryPoint(Caller);
212-
}
213-
}
214-
215-
// User collection may have changed, so we reinitialize it.
216-
Users = SmallVector<User *, 8>{Callee->users()};
217186
for (User *U : Users) {
218187
auto *CallToOld = dyn_cast<CallInst>(U);
219188
if (!CallToOld)
220189
return;
221190

222191
auto *Caller = CallToOld->getFunction();
223192

224-
// Determine if `Caller` needs processed or if this is another callsite
225-
// from an already-processed function.
226-
Function *NewFunc;
193+
// Only original function uses are considered.
194+
// Clones are processed through a global VMap.
195+
if (Clones.contains(Caller))
196+
continue;
197+
198+
// Kernel entry points need additional processing and change Metdadata.
199+
if (EntryPointMetadata.count(Caller) != 0)
200+
processKernelEntryPoint(Caller);
201+
202+
// Determine if `Caller` needs to be processed or if this is another
203+
// callsite from a non-offset function or an already-processed function.
227204
Value *ImplicitOffset = ProcessedFunctions[Caller];
228205
bool AlreadyProcessed = ImplicitOffset != nullptr;
206+
207+
Function *NewFunc;
229208
if (AlreadyProcessed) {
230209
NewFunc = Caller;
231210
} else {
232211
std::tie(NewFunc, ImplicitOffset) =
233-
addOffsetArgumentToFunction(M, Caller);
212+
addOffsetArgumentToFunction(M, Caller,
213+
/*KernelImplicitArgumentType*/ nullptr,
214+
/*KeepOriginal=*/true);
234215
}
235-
216+
CallToOld = cast<CallInst>(GlobalVMap[CallToOld]);
236217
if (!CalleeWithImplicitParam) {
237218
// Replace intrinsic call with parameter.
238219
CallToOld->replaceAllUsesWith(ImplicitOffset);
@@ -269,15 +250,12 @@ void GlobalOffsetPass::addImplicitParameterToCallers(
269250

270251
// Process callers of the old function.
271252
addImplicitParameterToCallers(M, Caller, NewFunc);
272-
273-
// Now that the old function is dead, delete it.
274-
Caller->dropAllReferences();
275-
Caller->eraseFromParent();
276253
}
277254
}
278255

279256
std::pair<Function *, Value *> GlobalOffsetPass::addOffsetArgumentToFunction(
280-
Module &M, Function *Func, Type *ImplicitArgumentType, bool KeepOriginal) {
257+
Module &M, Function *Func, Type *ImplicitArgumentType, bool KeepOriginal,
258+
bool IsKernel) {
281259
FunctionType *FuncTy = Func->getFunctionType();
282260
const AttributeList &FuncAttrs = Func->getAttributes();
283261
ImplicitArgumentType =
@@ -316,23 +294,22 @@ std::pair<Function *, Value *> GlobalOffsetPass::addOffsetArgumentToFunction(
316294
// TODO: Are there better naming alternatives that allow for unmangling?
317295
NewFunc->setName(Func->getName() + "_with_offset");
318296

319-
ValueToValueMapTy VMap;
320297
for (Function::arg_iterator FuncArg = Func->arg_begin(),
321298
FuncEnd = Func->arg_end(),
322299
NewFuncArg = NewFunc->arg_begin();
323300
FuncArg != FuncEnd; ++FuncArg, ++NewFuncArg) {
324-
VMap[FuncArg] = NewFuncArg;
301+
GlobalVMap[FuncArg] = NewFuncArg;
325302
}
326303

327304
SmallVector<ReturnInst *, 8> Returns;
328-
CloneFunctionInto(NewFunc, Func, VMap,
305+
CloneFunctionInto(NewFunc, Func, GlobalVMap,
329306
CloneFunctionChangeType::GlobalChanges, Returns);
330307
// In order to keep the signatures of functions called by the kernel
331308
// unified, the pass has to copy global offset to an array allocated in
332309
// addrspace(3). This is done as kernels can't allocate and fill the
333-
// array in constant address space, which would be required for the case
334-
// with no global offset.
335-
if (AT == ArchType::AMDHSA) {
310+
// array in constant address space.
311+
// Not required any longer, but left due to deprecatedness.
312+
if (IsKernel && AT == ArchType::AMDHSA) {
336313
BasicBlock *EntryBlock = &NewFunc->getEntryBlock();
337314
IRBuilder<> Builder(EntryBlock, EntryBlock->getFirstInsertionPt());
338315
Type *ImplicitOffsetType =
@@ -399,8 +376,8 @@ std::pair<Function *, Value *> GlobalOffsetPass::addOffsetArgumentToFunction(
399376
Type::getInt32Ty(M.getContext())->getPointerTo(TargetAS));
400377
}
401378

402-
ProcessedFunctions[NewFunc] = ImplicitOffset;
403-
379+
ProcessedFunctions[Func] = ImplicitOffset;
380+
Clones.insert(NewFunc);
404381
// Return the new function and the offset argument.
405382
return {NewFunc, ImplicitOffset};
406383
}

0 commit comments

Comments
 (0)