11/* !
22 * \file CGraphPartitioning.hpp
3- * \brief Headers for the classes realted to the algorithms that are used
3+ * \brief Headers for the classes realted to the algorithms that are used
44 to divide the matrix acyclic graph into parallel partitions.
55 * \author A. Raj
66 * \version 8.2.0 "Harrier"
2626 * License along with SU2. If not, see <http://www.gnu.org/licenses/>.
2727 */
2828
29+ #pragma once
30+
2931#include " ../CConfig.hpp"
3032#include " ../geometry/CGeometry.hpp"
3133#include " ../geometry/dual_grid/CPoint.hpp"
3537 * \brief Abstract base class for defining graph partitioning algorithms
3638 * \author A. Raj
3739 *
38- * In order to use certain parallel algorithms in the solution process -
39- * whether with linear solvers or preconditioners - we require the matrix
40- * to be partitioned into certain parallel divisions. These maybe in the form
41- * of levels, blocks, colors and so on. Since a number of different algorithms
42- * can be used to split the graph, we've introduced a base class containing the
43- * "Partition" member function from which child classes of the specific
44- * algorithm can be derived. Currently, we are only using direct declarations
40+ * In order to use certain parallel algorithms in the solution process -
41+ * whether with linear solvers or preconditioners - we require the matrix
42+ * to be partitioned into certain parallel divisions. These maybe in the form
43+ * of levels, blocks, colors and so on. Since a number of different algorithms
44+ * can be used to split the graph, we've introduced a base class containing the
45+ * "Partition" member function from which child classes of the specific
46+ * algorithm can be derived. Currently, we are only using direct declarations
4547 * of the derived classes in the code. However, this method was chosen as it
46- * allows us to pass different child class algorithms to a single implementation
48+ * allows us to pass different child class algorithms to a single implementation
4749 * of the function that requires it - similar to the CMatrixVectorProduct class.
4850 */
4951
5052template <class ScalarType >
5153
5254class CGraphPartitioning {
53-
5455 public:
5556 virtual ~CGraphPartitioning () = 0 ;
56- virtual void Partition (vector<ScalarType>& pointList, vector<ScalarType>& partitionOffsets) = 0;
57+ virtual void Partition (vector<ScalarType>& pointList, vector<ScalarType>& partitionOffsets,
58+ vector<ScalarType>& chainPtr) = 0;
5759};
5860template <class ScalarType >
59- CGraphPartitioning<ScalarType>::~CGraphPartitioning () {}
61+ CGraphPartitioning<ScalarType>::~CGraphPartitioning () {}
6062
6163template <class ScalarType >
6264
63- class CLevelScheduling final : public CGraphPartitioning<ScalarType> {
64-
65+ class CLevelScheduling final : public CGraphPartitioning<ScalarType> {
6566 private:
6667 ScalarType nPointDomain;
6768 CPoint* nodes;
68-
69+
6970 public:
7071 ScalarType nLevels;
7172 ScalarType maxLevelWidth;
7273 vector<ScalarType> levels;
7374
74- /* !
75+ /* !
7576 * \brief constructor of the class
7677 * \param[in] nPointDomain_ref - number of points associated with the problem
77- * \param[in] nodes - represents the relationships between the points
78+ * \param[in] nodes_ref - represents the relationships between the points
7879 */
79- inline CLevelScheduling<ScalarType>(ScalarType nPointDomain_ref, CPoint* nodes_ref)
80- : nPointDomain(nPointDomain_ref), nodes(nodes_ref)
81- { nLevels = 0ul ; maxLevelWidth = 0ul ; }
80+ inline CLevelScheduling<ScalarType>(ScalarType nPointDomain_ref, CPoint* nodes_ref)
81+ : nPointDomain(nPointDomain_ref), nodes(nodes_ref) {
82+ nLevels = 0ul ;
83+ maxLevelWidth = 0ul ;
84+ }
85+
86+ CLevelScheduling () = delete ; // Removing default constructor
8287
83- CLevelScheduling () = delete ; // Removing default constructor
88+ /* !
89+ * \brief Divides the levels into groups of chains depending on the preset GPU block and warp size.
90+ * \param[in] levelOffsets - Represents the vector array containing the ordered list of starting rows of each level.
91+ * \param[in] chainPtr - Represents the vector array containing the ordered list of starting levels of each chain.
92+ * \param[in] rowsPerBlock - Represents the maximum number of rows that can be accomodated per block.
93+ */
94+ void CalculateChain (vector<ScalarType> levelOffsets, vector<ScalarType>& chainPtr, int rowsPerBlock) {
95+ ScalarType levelWidth = 0 ;
96+ unsigned short chainLength = chainPtr.capacity ();
8497
85- void Reorder (vector<ScalarType>& pointList, vector<ScalarType>& inversePointList, vector<ScalarType> levelOffsets)
86- {
98+ /* This is not a magic number. We are simply initializing
99+ the point array with its first element that is always zero.*/
100+ chainPtr.push_back (0 );
101+
102+ for (ScalarType iLevel = 0ul ; iLevel < nLevels; iLevel++) {
103+ levelWidth = levelOffsets[iLevel + 1 ] - levelOffsets[iLevel];
104+ maxLevelWidth = std::max (levelWidth, maxLevelWidth);
105+
106+ if (levelWidth > rowsPerBlock) {
107+ if (chainPtr.back () != iLevel) {
108+ chainPtr.push_back (iLevel);
109+ }
110+
111+ chainPtr.push_back (iLevel + 1 );
112+ }
113+ }
114+
115+ chainPtr.push_back (nLevels);
116+ }
117+
118+ /* !
119+ * \brief Reorders the points according to the levels
120+ * \param[in] pointList - Ordered array that contains the list of all mesh points.
121+ * \param[in] inversePointList - Array utilized to access the index of each point in pointList.
122+ * \param[in] levelOffsets - Vector array containing the ordered list of starting rows of each level.
123+ */
124+ void Reorder (vector<ScalarType>& pointList, vector<ScalarType>& inversePointList, vector<ScalarType> levelOffsets) {
87125 for (auto localPoint = 0ul ; localPoint < nPointDomain; ++localPoint) {
88126 const auto globalPoint = pointList[localPoint];
89127 inversePointList[levelOffsets[levels[localPoint]]++] = globalPoint;
90128 }
91-
129+
92130 pointList = std::move (inversePointList);
93131 }
94132
95- void Partition (vector<ScalarType>& pointList, vector<ScalarType>& levelOffsets) override
96- {
133+ /* !
134+ * \brief Reorders the points according to the levels
135+ * \param[in] pointList - Ordered array that contains the list of all mesh points.
136+ * \param[in] levelOffsets - Vector array containing the ordered list of starting rows of each level.
137+ * \param[in] chainPtr - Represents the vector array containing the ordered list of starting levels of each chain.
138+ */
139+ void Partition (vector<ScalarType>& pointList, vector<ScalarType>& levelOffsets,
140+ vector<ScalarType>& chainPtr) override {
97141 vector<ScalarType> inversePointList;
98142 inversePointList.reserve (nPointDomain);
99143 levels.reserve (nPointDomain);
@@ -111,29 +155,34 @@ class CLevelScheduling final : public CGraphPartitioning<ScalarType> {
111155
112156 for (auto adjPoints = 0u ; adjPoints < nodes->GetnPoint (globalPoint); adjPoints++) {
113157 const auto adjGlobalPoint = nodes->GetPoint (globalPoint, adjPoints);
114-
158+
115159 if (adjGlobalPoint < nPointDomain) {
116160 const auto adjLocalPoint = inversePointList[adjGlobalPoint];
117-
161+
118162 if (adjLocalPoint < localPoint) {
119- levels[localPoint] = std::max (levels[localPoint], levels[adjLocalPoint] + 1 );
163+ levels[localPoint] = std::max (levels[localPoint], levels[adjLocalPoint] + 1 );
164+ }
120165 }
121- }
122166 }
123167
124168 nLevels = std::max (nLevels, levels[localPoint] + 1 );
125- }
169+ }
126170
127171 levelOffsets.resize (nLevels + 1 );
128- for (auto iPoint = 0ul ; iPoint < nPointDomain; iPoint++) ++levelOffsets[levels[iPoint] + 1 ];
172+ for (auto iPoint = 0ul ; iPoint < nPointDomain; iPoint++) {
173+ ++levelOffsets[levels[iPoint] + 1 ];
174+ }
129175
130176 for (auto iLevel = 2ul ; iLevel <= nLevels; ++iLevel) {
131177 levelOffsets[iLevel] += levelOffsets[iLevel - 1 ];
132178 }
133179
134- for (auto elem = levelOffsets.begin (); elem != (levelOffsets.end () - 1 ); elem++) maxLevelWidth = std::max (*(elem+1 ) - *elem, maxLevelWidth);
135-
136180 Reorder (pointList, inversePointList, levelOffsets);
181+
182+ #ifdef HAVE_CUDA
183+ CalculateChain (levelOffsets, chainPtr, 20 );
184+ #elif
185+ chainPtr = NULL ;
186+ #endif
137187 }
138188};
139-
0 commit comments