2929#include " tsar/Analysis/Clang/CanonicalLoop.h"
3030#include " tsar/Analysis/Clang/LoopMatcher.h"
3131#include " tsar/Analysis/Clang/PerfectLoop.h"
32+ #include " tsar/Analysis/Memory/DIArrayAccess.h"
3233#include " tsar/Analysis/Passes.h"
3334#include " tsar/Analysis/Parallel/Passes.h"
3435#include " tsar/Analysis/Parallel/ParallelLoop.h"
@@ -53,6 +54,7 @@ class ClangDVMHSMParallelization : public ClangSMParallelization {
5354 ClangDVMHSMParallelization () : ClangSMParallelization(ID) {
5455 initializeClangDVMHSMParallelizationPass (*PassRegistry::getPassRegistry ());
5556 }
57+
5658private:
5759 bool exploitParallelism (const DFLoop &IR, const clang::ForStmt &AST,
5860 const ClangSMParallelProvider &Provider,
@@ -113,7 +115,8 @@ void addVarList(const ClangDependenceAnalyzer::ReductionVarListT &VarInfoList,
113115void addOnClause (const DFLoop &DFL, const PerfectLoopInfo &PerfectInfo,
114116 const CanonicalLoopSet &CanonicalLoops,
115117 const MemoryMatchInfo &MemoryMatcher,
116- SmallVectorImpl<char > &Out) {
118+ SmallVectorImpl<std::pair<ObjectID, StringRef>> &ParallelNest,
119+ SmallVectorImpl<char > &Out, unsigned NestSize = 0 ) {
117120 Out.append ({' ' , ' o' , ' n' , ' (' });
118121 auto *CurrDFL = &DFL;
119122 auto CanonicalItr = CanonicalLoops.find_as (const_cast <DFLoop *>(CurrDFL));
@@ -129,10 +132,19 @@ void addOnClause(const DFLoop &DFL, const PerfectLoopInfo &PerfectInfo,
129132 Out.append (MatchItr->get <AST>()->getName ().begin (),
130133 MatchItr->get <AST>()->getName ().end ());
131134 Out.append ({' ]' });
135+ auto LoopID = CurrDFL->getLoop ()->getLoopID ();
136+ assert (LoopID &&
137+ " Loop ID must be available for the outermost parallel loop!" );
138+ ParallelNest.emplace_back (LoopID, MatchItr->get <AST>()->getName ());
132139 for (; PerfectInfo.count (CurrDFL) && CurrDFL->getNumRegions () > 0 ;) {
140+ if (NestSize > 0 && NestSize <= ParallelNest.size ())
141+ break ;
133142 CurrDFL = dyn_cast<DFLoop>(*CurrDFL->region_begin ());
134143 if (!CurrDFL)
135144 break ;
145+ auto LoopID = CurrDFL->getLoop ()->getLoopID ();
146+ if (!LoopID)
147+ break ;
136148 auto CanonicalItr = CanonicalLoops.find_as (const_cast <DFLoop *>(CurrDFL));
137149 if (CanonicalItr == CanonicalLoops.end () || !(**CanonicalItr).isCanonical ())
138150 break ;
@@ -145,6 +157,7 @@ void addOnClause(const DFLoop &DFL, const PerfectLoopInfo &PerfectInfo,
145157 Out.append (MatchItr->get <AST>()->getName ().begin (),
146158 MatchItr->get <AST>()->getName ().end ());
147159 Out.append ({' ]' });
160+ ParallelNest.emplace_back (LoopID, MatchItr->get <AST>()->getName ());
148161 }
149162 Out.append ({' )' });
150163}
@@ -189,12 +202,63 @@ bool ClangDVMHSMParallelization::exploitParallelism(
189202 auto &PerfectInfo = Provider.get <ClangPerfectLoopPass>().getPerfectLoopInfo ();
190203 auto &CanonicalInfo = Provider.get <CanonicalLoopPass>().getCanonicalLoopInfo ();
191204 auto &MemoryMatcher = Provider.get <MemoryMatcherImmutableWrapper>().get ();
192-
193205 SmallString<128 > ParallelFor (" #pragma dvm parallel" );
194- if (HostOnly)
195- ParallelFor += " (1)" ;
196- else
197- addOnClause (IR, PerfectInfo, CanonicalInfo, MemoryMatcher, ParallelFor);
206+ SmallVector<std::pair<ObjectID, StringRef>, 4 > ParallelNest;
207+ addOnClause (IR, PerfectInfo, CanonicalInfo, MemoryMatcher, ParallelNest,
208+ ParallelFor, HostOnly ? 1 : 0 );
209+ auto *AccessInfo = getAnalysis<DIArrayAccessWrapper>().getAccessInfo ();
210+ if (AccessInfo) {
211+ bool EmptyTie = true ;
212+ auto arraycmp = [](const DIEstimateMemory *LHS,
213+ const DIEstimateMemory *RHS) {
214+ return LHS->getVariable ()->getName () < RHS->getVariable ()->getName ();
215+ };
216+ std::map<DIEstimateMemory *, SmallVector<std::string, 5 >,
217+ decltype (arraycmp)>
218+ Mapping (arraycmp);
219+ for (auto &Access :
220+ AccessInfo->scope_accesses (ParallelNest.front ().first )) {
221+ if (!isa<DIEstimateMemory>(Access.getArray ()))
222+ continue ;
223+ auto MappingItr =
224+ Mapping.emplace (std::piecewise_construct,
225+ std::forward_as_tuple (cast<DIEstimateMemory>(Access.getArray ())),
226+ std::forward_as_tuple (Access.size (), " *" )).first ;
227+ auto StashSize{ ParallelFor.size () };
228+ for (auto *Subscript : Access) {
229+ if (!Subscript || MappingItr->second [Subscript->getDimension ()] != " *" )
230+ continue ;
231+ if (auto *Affine = dyn_cast<DIAffineSubscript>(Subscript)) {
232+ for (unsigned I = 0 , EI = Affine->getNumberOfMonoms (); I < EI; ++I) {
233+ if (Affine->getMonom (I).Value .isNullValue ())
234+ continue ;
235+ auto LoopItr = find_if (ParallelNest, [Affine, I](auto &Loop) {
236+ return Loop.first == Affine->getMonom (I).Column ;
237+ });
238+ if (LoopItr != ParallelNest.end ()) {
239+ MappingItr->second [Affine->getDimension ()] =
240+ ((Affine->getMonom (I).Value .isNegative () ? " -" : " " ) +
241+ LoopItr->second )
242+ .str ();
243+ EmptyTie = false ;
244+ }
245+ }
246+ }
247+ }
248+ }
249+ if (!EmptyTie) {
250+ ParallelFor += " tie(" ;
251+ for (auto &Map : Mapping) {
252+ if (all_of (Map.second , [](StringRef S) { return S == " *" ; }))
253+ continue ;
254+ ParallelFor +=
255+ cast<DIEstimateMemory>(Map.first )->getVariable ()->getName ();
256+ ParallelFor += " [" + join (Map.second , " ][" ) + " ]" ;
257+ ParallelFor += " ," ;
258+ }
259+ ParallelFor.back () = ' )' ;
260+ }
261+ }
198262 if (!ASTDepInfo.get <trait::Private>().empty ()) {
199263 ParallelFor += " private" ;
200264 addVarList (ASTDepInfo.get <trait::Private>(), ParallelFor);
0 commit comments