Skip to content

Commit 44d9e6a

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web' (20 commits)
2 parents e1a1617 + 3892fe6 commit 44d9e6a

File tree

67 files changed

+2366
-980
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+2366
-980
lines changed

.github/workflows/sycl_precommit.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,13 @@ jobs:
2727
container:
2828
image: ghcr.io/intel/llvm/sycl_ubuntu2004_nightly:no-drivers
2929
steps:
30+
- name: 'PR commits + 1'
31+
run: echo "PR_FETCH_DEPTH=$(( ${{ github.event.pull_request.commits }} + 1 ))" >> "${GITHUB_ENV}"
3032
- uses: actions/checkout@v2
3133
with:
3234
ref: ${{ github.event.pull_request.head.sha }}
3335
persist-credentials: false
34-
fetch-depth: 2
36+
fetch-depth: ${{ env.PR_FETCH_DEPTH }}
3537
- name: Run clang-format
3638
uses: ./devops/actions/clang-format
3739

clang/include/clang/Basic/Attr.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2388,6 +2388,18 @@ def SYCLIntelFPGANofusion : StmtAttr {
23882388
let Documentation = [SYCLIntelFPGANofusionAttrDocs];
23892389
}
23902390

2391+
def SYCLIntelFPGAMaxReinvocationDelay : StmtAttr {
2392+
let Spellings = [CXX11<"intel", "max_reinvocation_delay">];
2393+
let Subjects = SubjectList<[ForStmt, CXXForRangeStmt, WhileStmt, DoStmt],
2394+
ErrorDiag, "'for', 'while', and 'do' statements">;
2395+
let Args = [ExprArgument<"NExpr">];
2396+
let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost];
2397+
let IsStmtDependent = 1;
2398+
let Documentation = [SYCLIntelFPGAMaxReinvocationDelayAttrDocs];
2399+
}
2400+
def : MutualExclusions<[SYCLIntelFPGADisableLoopPipelining,
2401+
SYCLIntelFPGAMaxReinvocationDelay]>;
2402+
23912403
def IntelFPGALocalNonConstVar : SubsetSubject<Var,
23922404
[{S->hasLocalStorage() &&
23932405
S->getKind() != Decl::ImplicitParam &&

clang/include/clang/Basic/AttrDocs.td

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3318,7 +3318,7 @@ disables pipelining of the loop or function data path, causing the loop
33183318
or function to be executed serially. Cannot be used on the same loop or
33193319
function, or in conjunction with ``max_interleaving``,
33203320
``speculated_iterations``, ``max_concurrency``, ``initiation_interval``,
3321-
or ``ivdep``.
3321+
``ivdep``, or ``max_reinvocation_delay``.
33223322

33233323
.. code-block:: c++
33243324

@@ -3451,6 +3451,31 @@ loop should not be fused with any adjacent loop.
34513451
}];
34523452
}
34533453

3454+
def SYCLIntelFPGAMaxReinvocationDelayAttrDocs : Documentation {
3455+
let Category = DocCatVariable;
3456+
let Heading = "intel::max_reinvocation_delay";
3457+
let Content = [{
3458+
This attribute applies to a loop. Specifies the maximum number of cycles allowed
3459+
on the delay between the launch of the last iteration of a loop invocation and
3460+
the launch of the first iteration of a new loop invocation. Parameter N is
3461+
mandatory, and is a positive integer. Cannot be used on the same loop in
3462+
conjunction with disable_loop_pipelining.
3463+
3464+
.. code-block:: c++
3465+
3466+
void foo() {
3467+
int var = 0;
3468+
[[intel::max_reinvocation_delay(1)]]
3469+
for (int i = 0; sycl::log10((float)(x)) < 10; i++) var++;
3470+
}
3471+
3472+
template<int N>
3473+
void bar() {
3474+
[[intel::max_reinvocation_delay(N)]] for(;;) { }
3475+
}
3476+
}];
3477+
}
3478+
34543479
def SYCLIntelLoopFuseDocs : Documentation {
34553480
let Category = DocCatFunction;
34563481
let Heading = "loop_fuse, loop_fuse_independent";

clang/include/clang/Basic/DiagnosticDriverKinds.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,8 @@ def warn_drv_sycl_offload_target_duplicate : Warning<
345345
def warn_drv_sycl_target_missing : Warning<
346346
"linked binaries do not contain expected '%0' target; found targets: '%1'">,
347347
InGroup<SyclTarget>;
348+
def err_drv_multiple_target_with_forced_target : Error<
349+
"multiple target usage with '%0' is not supported with '%1'">;
348350
def err_drv_failed_to_deduce_target_from_arch : Error<
349351
"failed to deduce triple for target architecture '%0'; specify the triple "
350352
"using '-fopenmp-targets' and '-Xopenmp-target' instead.">;

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7221,9 +7221,11 @@ def warn_format_nonliteral : Warning<
72217221
InGroup<FormatNonLiteral>, DefaultIgnore;
72227222

72237223
def err_sycl_device_global_incorrect_scope : Error<
7224-
"'device_global' variables must be static or declared at namespace scope">;
7224+
"'device_global' variable must be a static data member or declared in global or namespace scope">;
72257225
def err_sycl_device_global_not_publicly_accessible: Error<
7226-
"'device_global' member variable %0 is not publicly accessible from namespace scope">;
7226+
"'device_global' member variable %0 should be publicly accessible from namespace scope">;
7227+
def err_sycl_device_global_array : Error<
7228+
"'device_global' array is not allowed">;
72277229

72287230
def err_unexpected_interface : Error<
72297231
"unexpected interface name %0: expected expression">;

clang/include/clang/Driver/Options.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2860,6 +2860,10 @@ def fsycl_link_targets_EQ : CommaJoined<["-"], "fsycl-link-targets=">,
28602860
Flags<[NoXarchOption, CC1Option, CoreOption, Deprecated]>,
28612861
HelpText<"Specify comma-separated list of triples SYCL offloading targets "
28622862
"to produce linked device images (deprecated)">;
2863+
def fsycl_force_target_EQ : Joined<["-"], "fsycl-force-target=">,
2864+
Flags<[NoXarchOption, CoreOption]>,
2865+
HelpText<"Force the usage of the given triple when extracting device code "
2866+
"from any given objects on the command line">;
28632867
def fsycl_device_code_split_EQ : Joined<["-"], "fsycl-device-code-split=">,
28642868
Flags<[CC1Option, CoreOption]>, HelpText<"Perform SYCL device code split: per_kernel (device code module is "
28652869
"created for each SYCL kernel) | per_source (device code module is created for each source (translation unit)) | off (no device code split). | auto (use heuristic to select the best way of splitting device code). "

clang/include/clang/Sema/Sema.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2296,6 +2296,9 @@ class Sema final {
22962296
Expr *E);
22972297
SYCLIntelFPGALoopCoalesceAttr *
22982298
BuildSYCLIntelFPGALoopCoalesceAttr(const AttributeCommonInfo &CI, Expr *E);
2299+
SYCLIntelFPGAMaxReinvocationDelayAttr *
2300+
BuildSYCLIntelFPGAMaxReinvocationDelayAttr(const AttributeCommonInfo &CI,
2301+
Expr *E);
22992302

23002303
bool CheckQualifiedFunctionForTypeId(QualType T, SourceLocation Loc);
23012304

clang/lib/CodeGen/CGLoopInfo.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,15 @@ MDNode *LoopInfo::createMetadata(
611611
llvm::Type::getInt32Ty(Ctx), VC.second))};
612612
LoopProperties.push_back(MDNode::get(Ctx, Vals));
613613
}
614+
615+
if (Attrs.SYCLMaxReinvocationDelayNCycles) {
616+
Metadata *Vals[] = {
617+
MDString::get(Ctx, "llvm.loop.intel.max_reinvocation_delay.count"),
618+
ConstantAsMetadata::get(
619+
ConstantInt::get(llvm::Type::getInt32Ty(Ctx),
620+
*Attrs.SYCLMaxReinvocationDelayNCycles))};
621+
LoopProperties.push_back(MDNode::get(Ctx, Vals));
622+
}
614623

615624
LoopProperties.insert(LoopProperties.end(), AdditionalLoopProperties.begin(),
616625
AdditionalLoopProperties.end());
@@ -645,6 +654,7 @@ void LoopAttributes::clear() {
645654
SYCLMaxInterleavingNInvocations.reset();
646655
SYCLSpeculatedIterationsNIterations.reset();
647656
SYCLIntelFPGAVariantCount.clear();
657+
SYCLMaxReinvocationDelayNCycles.reset();
648658
UnrollCount = 0;
649659
UnrollAndJamCount = 0;
650660
VectorizeEnable = LoopAttributes::Unspecified;
@@ -681,6 +691,7 @@ LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs,
681691
!Attrs.SYCLMaxInterleavingNInvocations &&
682692
!Attrs.SYCLSpeculatedIterationsNIterations &&
683693
Attrs.SYCLIntelFPGAVariantCount.empty() && Attrs.UnrollCount == 0 &&
694+
!Attrs.SYCLMaxReinvocationDelayNCycles &&
684695
Attrs.UnrollAndJamCount == 0 && !Attrs.PipelineDisabled &&
685696
Attrs.PipelineInitiationInterval == 0 &&
686697
Attrs.VectorizePredicateEnable == LoopAttributes::Unspecified &&
@@ -1012,6 +1023,9 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
10121023
// emitted
10131024
// For attribute nofusion:
10141025
// 'llvm.loop.fusion.disable' metadata will be emitted
1026+
// For attribute max_reinvocation_delay:
1027+
// n - 'llvm.loop.intel.max_reinvocation_delay.count, i32 n' metadata will be
1028+
// emitted
10151029
for (const auto *A : Attrs) {
10161030
if (const auto *IntelFPGAIVDep = dyn_cast<SYCLIntelFPGAIVDepAttr>(A))
10171031
addSYCLIVDepInfo(Header->getContext(), IntelFPGAIVDep->getSafelenValue(),
@@ -1076,6 +1090,14 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
10761090

10771091
if (isa<SYCLIntelFPGANofusionAttr>(A))
10781092
setSYCLNofusionEnable();
1093+
1094+
if (const auto *IntelFPGAMaxReinvocationDelay =
1095+
dyn_cast<SYCLIntelFPGAMaxReinvocationDelayAttr>(A)) {
1096+
const auto *CE = cast<ConstantExpr>(
1097+
IntelFPGAMaxReinvocationDelay->getNExpr());
1098+
llvm::APSInt ArgVal = CE->getResultAsAPSInt();
1099+
setSYCLMaxReinvocationDelayNCycles(ArgVal.getSExtValue());
1100+
}
10791101
}
10801102

10811103
setMustProgress(MustProgress);

clang/lib/CodeGen/CGLoopInfo.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,9 @@ struct LoopAttributes {
134134
/// Value for llvm.loop.intel.speculated.iterations.count metadata.
135135
llvm::Optional<unsigned> SYCLSpeculatedIterationsNIterations;
136136

137+
// Value for llvm.loop.intel.max_reinvocation_delay metadata.
138+
llvm::Optional<unsigned> SYCLMaxReinvocationDelayNCycles;
139+
137140
/// llvm.unroll.
138141
unsigned UnrollCount;
139142

@@ -410,6 +413,11 @@ class LoopInfoStack {
410413
/// Set no progress for the next loop pushed.
411414
void setMustProgress(bool P) { StagedAttrs.MustProgress = P; }
412415

416+
/// Set value of max reinvocation delay for the next loop pushed.
417+
void setSYCLMaxReinvocationDelayNCycles(unsigned C) {
418+
StagedAttrs.SYCLMaxReinvocationDelayNCycles = C;
419+
}
420+
413421
private:
414422
/// Returns true if there is LoopInfo on the stack.
415423
bool hasInfo() const { return !Active.empty(); }

clang/lib/Driver/Driver.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -785,6 +785,8 @@ static bool addSYCLDefaultTriple(Compilation &C,
785785
/// Returns true if a triple is added to SYCLTriples, false otherwise
786786
if (!C.getDriver().isSYCLDefaultTripleImplied())
787787
return false;
788+
if (C.getInputArgs().hasArg(options::OPT_fsycl_force_target_EQ))
789+
return false;
788790
for (const auto &SYCLTriple : SYCLTriples) {
789791
if (SYCLTriple.getSubArch() == llvm::Triple::NoSubArch &&
790792
SYCLTriple.isSPIR())
@@ -1057,6 +1059,14 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
10571059
C.getInputArgs().getLastArg(options::OPT_fsycl_device_code_split_EQ),
10581060
{"per_kernel", "per_source", "auto", "off"});
10591061

1062+
Arg *SYCLForceTarget =
1063+
getArgRequiringSYCLRuntime(options::OPT_fsycl_force_target_EQ);
1064+
if (SYCLForceTarget) {
1065+
StringRef Val(SYCLForceTarget->getValue());
1066+
llvm::Triple TT(MakeSYCLDeviceTriple(Val));
1067+
if (!isValidSYCLTriple(TT))
1068+
Diag(clang::diag::err_drv_invalid_sycl_target) << Val;
1069+
}
10601070
bool HasSYCLTargetsOption = SYCLTargets || SYCLLinkTargets || SYCLAddTargets;
10611071
llvm::StringMap<StringRef> FoundNormalizedTriples;
10621072
llvm::SmallVector<llvm::Triple, 4> UniqueSYCLTriplesVec;
@@ -1066,6 +1076,15 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
10661076
Arg *SYCLTargetsValues = SYCLTargets ? SYCLTargets : SYCLLinkTargets;
10671077
if (SYCLTargetsValues) {
10681078
if (SYCLTargetsValues->getNumValues()) {
1079+
1080+
// Multiple targets are currently not supported when using
1081+
// -fsycl-force-target as the bundler does not allow for multiple
1082+
// outputs of the same target.
1083+
if (SYCLForceTarget && SYCLTargetsValues->getNumValues() > 1)
1084+
Diag(clang::diag::err_drv_multiple_target_with_forced_target)
1085+
<< SYCLTargetsValues->getAsString(C.getInputArgs())
1086+
<< SYCLForceTarget->getAsString(C.getInputArgs());
1087+
10691088
for (StringRef Val : SYCLTargetsValues->getValues()) {
10701089
llvm::Triple TT(MakeSYCLDeviceTriple(Val));
10711090
if (!isValidSYCLTriple(TT)) {

0 commit comments

Comments
 (0)