2828#include " tsar/Analysis/Clang/ASTDependenceAnalysis.h"
2929#include " tsar/Analysis/Clang/CanonicalLoop.h"
3030#include " tsar/Analysis/Clang/LoopMatcher.h"
31+ #include " tsar/Analysis/Clang/PerfectLoop.h"
3132#include " tsar/Analysis/Passes.h"
3233#include " tsar/Analysis/Parallel/Passes.h"
3334#include " tsar/Analysis/Parallel/ParallelLoop.h"
@@ -53,7 +54,7 @@ class ClangDVMHSMParallelization : public ClangSMParallelization {
5354 initializeClangDVMHSMParallelizationPass (*PassRegistry::getPassRegistry ());
5455 }
5556private:
56- bool exploitParallelism (const Loop &IR, const clang::ForStmt &AST,
57+ bool exploitParallelism (const DFLoop &IR, const clang::ForStmt &AST,
5758 const ClangSMParallelProvider &Provider,
5859 tsar::ClangDependenceAnalyzer &ASTDepInfo,
5960 TransformationContext &TfmCtx) override ;
@@ -108,28 +109,39 @@ void addVarList(const ClangDependenceAnalyzer::ReductionVarListT &VarInfoList,
108109 ParallelFor.push_back (' )' );
109110 }
110111}
112+
113+ unsigned getPerfectNestSize (const DFLoop &DFL,
114+ const PerfectLoopInfo &PerfectInfo,
115+ const CanonicalLoopSet &CanonicalLoops) {
116+ auto *CurrDFL = &DFL;
117+ unsigned PerfectSize = 1 ;
118+ for (; PerfectInfo.count (CurrDFL) && CurrDFL->getNumRegions () > 0 ;
119+ ++PerfectSize) {
120+ CurrDFL = dyn_cast<DFLoop>(*CurrDFL->region_begin ());
121+ if (!CurrDFL)
122+ return PerfectSize;
123+ auto CanonicalItr = CanonicalLoops.find_as (const_cast <DFLoop *>(CurrDFL));
124+ if (CanonicalItr == CanonicalLoops.end () || !(**CanonicalItr).isCanonical ())
125+ return PerfectSize;
126+ }
127+ return PerfectSize;
128+ }
111129} // namespace
112130
113131bool ClangDVMHSMParallelization::exploitParallelism (
114- const Loop &IR, const clang::ForStmt &AST,
132+ const DFLoop &IR, const clang::ForStmt &AST,
115133 const ClangSMParallelProvider &Provider,
116134 tsar::ClangDependenceAnalyzer &ASTRegionAnalysis,
117135 TransformationContext &TfmCtx) {
118136 auto &ASTDepInfo = ASTRegionAnalysis.getDependenceInfo ();
119137 if (!ASTDepInfo.get <trait::FirstPrivate>().empty () ||
120138 !ASTDepInfo.get <trait::LastPrivate>().empty ())
121139 return false ;
122- SmallString<128 > ParallelFor (" #pragma dvm parallel (1)" );
123- if (!ASTDepInfo.get <trait::Private>().empty ()) {
124- ParallelFor += " private" ;
125- addVarList (ASTDepInfo.get <trait::Private>(), ParallelFor);
126- }
127- addVarList (ASTDepInfo.get <trait::Reduction>(), ParallelFor);
128- ParallelFor += ' \n ' ;
129140 SmallString<128 > DVMHRegion (" #pragma dvm region" );
130141 SmallString<128 > DVMHActual, DVMHGetActual;
131142 auto &PI = Provider.get <ParallelLoopPass>().getParallelLoopInfo ();
132- if (!PI[&IR].isHostOnly () && ASTRegionAnalysis.evaluateDefUse ()) {
143+ bool HostOnly = false ;
144+ if (!PI[IR.getLoop ()].isHostOnly () && ASTRegionAnalysis.evaluateDefUse ()) {
133145 if (!ASTDepInfo.get <trait::ReadOccurred>().empty ()) {
134146 DVMHActual += " #pragma dvm actual" ;
135147 addVarList (ASTDepInfo.get <trait::ReadOccurred>(), DVMHActual);
@@ -150,7 +162,23 @@ bool ClangDVMHSMParallelization::exploitParallelism(
150162 }
151163 } else {
152164 DVMHRegion += " targets(HOST)" ;
165+ HostOnly = true ;
153166 }
167+ auto &PerfectInfo = Provider.get <ClangPerfectLoopPass>().getPerfectLoopInfo ();
168+ auto &CanonicalInfo = Provider.get <CanonicalLoopPass>().getCanonicalLoopInfo ();
169+ SmallString<128 > ParallelFor (" #pragma dvm parallel (" );
170+ if (HostOnly)
171+ ParallelFor += " 1" ;
172+ else
173+ Twine (getPerfectNestSize (IR, PerfectInfo, CanonicalInfo))
174+ .toStringRef (ParallelFor);
175+ ParallelFor += " )" ;
176+ if (!ASTDepInfo.get <trait::Private>().empty ()) {
177+ ParallelFor += " private" ;
178+ addVarList (ASTDepInfo.get <trait::Private>(), ParallelFor);
179+ }
180+ addVarList (ASTDepInfo.get <trait::Reduction>(), ParallelFor);
181+ ParallelFor += ' \n ' ;
154182 DVMHRegion += " \n {\n " ;
155183 // Add directives to the source code.
156184 auto &Rewriter = TfmCtx.getRewriter ();
0 commit comments