diff --git a/include/graphblas/algorithms/hpcg/average_coarsener.hpp b/include/graphblas/algorithms/hpcg/average_coarsener.hpp new file mode 100644 index 000000000..41abed9e2 --- /dev/null +++ b/include/graphblas/algorithms/hpcg/average_coarsener.hpp @@ -0,0 +1,412 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file average_coarsener.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Utilities to build the coarsening matrix for an HPCG simulation. + */ + +#ifndef _H_GRB_ALGORITHMS_AVERAGE_COARSENER +#define _H_GRB_ALGORITHMS_AVERAGE_COARSENER + +#include +#include +#include +#include +#include +#include + +#include +#include + + +namespace grb { + + namespace algorithms { + + namespace hpcg { + + // forward declaration + template< + size_t DIMS, + typename CoordType, + typename ValueType + > + class AverageCoarsenerBuilder; + + /** + * Iterator class to generate the coarsening matrix that averages over the + * elements of the finer domain corresponding to the element of the coarser + * domain. + * + * The coarsening matrix averages \b all elements that are coarsened into + * one. + * + * This coarsening method requires some computation but should be relatively + * robust to noise or to partitioning strategies that parallelize the + * smoother (usually run before coarsening). + * + * This iterator is random-access. + * + * @tparam DIMS number of dimensions + * @tparam CoordType type storing the coordinates and the sizes + * @tparam ValueType type of the nonzero: it must be able to represent 1 / + * + */ + template< + size_t DIMS, + typename CoordType, + typename ValueType + > + struct AverageGeneratorIterator { + + friend AverageCoarsenerBuilder< DIMS, CoordType, ValueType >; + + /** Numeric type of rows */ + typedef CoordType RowIndexType; + + /** Numeric type of columns */ + typedef CoordType ColumnIndexType; + + typedef typename grb::utils::multigrid::LinearizedNDimSystem< + CoordType, + grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > + > LinearSystemType; + + typedef typename LinearSystemType::Iterator LinearSystemIterType; + + typedef AverageGeneratorIterator< DIMS, CoordType, ValueType > SelfType; + + typedef std::array< CoordType, DIMS > ArrayType; + + class ValueGenerator { + + friend SelfType; + + + private: + + RowIndexType _i; + + ColumnIndexType _j; + + ValueType _value; + + + public: + + ValueGenerator( + RowIndexType i, + ColumnIndexType j, + ValueType value + ) noexcept : + _i( i ), + _j( j ), + _value( value ) + {} + + ValueGenerator( const ValueGenerator & ) = default; + + ValueGenerator & operator=( const ValueGenerator & ) = default; + + inline RowIndexType i() const { + return _i; + } + + inline ColumnIndexType j() const { + return _j; + } + + inline ValueType v() const { + return _value; + } + + }; + + // interface for std::random_access_iterator + typedef std::random_access_iterator_tag iterator_category; + + typedef ValueGenerator value_type; + + typedef const value_type pointer; + + typedef const value_type & reference; + + typedef typename LinearSystemIterType::difference_type difference_type; + + AverageGeneratorIterator( const SelfType &o ) = default; + + AverageGeneratorIterator( SelfType && ) = default; + + SelfType & operator=( const SelfType & ) = default; + + SelfType & operator=( SelfType && ) = default; + + /** + * Advances \c this by 1 in constant time. + */ + SelfType & operator++() noexcept { + (void) ++_subspace_iter; + size_t subspace_position = _subspace_iter->get_linear_position(); + if( subspace_position == _num_neighbors ) { + (void) ++_sys_iter; + _subspace_iter = _finer_subspace->begin(); + } + update_coords(); + return *this; + } + + /** + * Advances \c this by \p offset in constant time. + */ + SelfType & operator+=( size_t offset ) { + CoordType sub_offset = _subspace_iter->get_linear_position() + offset; + std::ldiv_t res = std::ldiv( sub_offset, _num_neighbors ); + _sys_iter += res.quot; + _subspace_iter = _finer_subspace->begin(); + _subspace_iter += res.rem; + update_coords(); + return *this; + } + + /** + * Computes the difference between \c this and \p o as integer. + */ + difference_type operator-( const SelfType &o ) const { + return this->_sys_iter - o._sys_iter; + } + + /** + * Returns whether \c this and \p o differ. + */ + bool operator!=( const SelfType &o ) const { + return this->_sys_iter != o._sys_iter; + } + + /** + * Returns whether \c this and \p o are equal. + */ + bool operator==( const SelfType &o ) const { + return ! this->operator!=( o ); + } + + reference operator*() const { + return _val; + } + + pointer operator->() const { + return &_val; + } + + /** + * Returns the current row, within the coarser system. + */ + inline RowIndexType i() const { + return _val.i(); + } + + /** + * Returns the current column, within the finer system. + */ + inline ColumnIndexType j() const { + return _val.j(); + } + + /** + * Returns always 1, as the coarsening keeps the same value. + */ + inline ValueType v() const { + return _val.v(); + } + + + private: + + const LinearSystemType * _lin_sys; + const LinearSystemType * _finer_subspace; + const ArrayType * _steps; + CoordType _num_neighbors; + LinearSystemIterType _sys_iter; + LinearSystemIterType _subspace_iter; + value_type _val; + + /** + * Construct a new AverageGeneratorIterator object starting from the + * LinearizedNDimSystem object \p system describing the \b coarser system + * and the \b ratios \p steps between each finer and the corresponding + * coarser dimension. + * + * @param system LinearizedNDimSystem object describing the coarser system + * @param finer_subspace LinearizedNDimSystem object describing the subspace + * of each element in the finer system + * @param steps Ratios per dimension between finer and coarser system + */ + AverageGeneratorIterator( + const LinearSystemType &system, + const LinearSystemType &finer_subspace, + const ArrayType &steps + ) noexcept : + _lin_sys( &system ), + _finer_subspace( &finer_subspace ), + _steps( &steps ), + _num_neighbors( std::accumulate( steps.cbegin(), steps.cend(), 1UL, + std::multiplies< CoordType >() ) ), + _sys_iter( system.begin() ), + _subspace_iter( finer_subspace.begin() ), + _val( 0, 0, static_cast< ValueType >( 1 ) / + static_cast< ValueType >( _num_neighbors ) ) + { + update_coords(); + } + + void update_coords() noexcept { + _val._i = _sys_iter->get_linear_position(); + _val._j = coarse_rows_to_finer_col(); + } + + /** + * Returns the row coordinates converted to the finer system, to compute + * the column value. + */ + ColumnIndexType coarse_rows_to_finer_col() const noexcept { + ColumnIndexType finer = 0; + ColumnIndexType s = 1; + for( size_t i = 0; i < DIMS; i++ ) { + finer += s * _subspace_iter->get_position()[ i ]; + s *= ( *_steps )[ i ]; + finer += s * _sys_iter->get_position()[ i ]; + s *= _lin_sys->get_sizes()[ i ]; + } + return finer; + } + + }; + + /** + * Builder object to create iterators that generate an averaging-coarsening + * matrix. + * + * It is a facility to generate beginning and end iterators and abstract the + * logic away from users. + * + * @tparam DIMS number of dimensions + * @tparam CoordType type storing the coordinates and the sizes + * @tparam ValueType type of the nonzero: it must be able to represent 1 + * (the value to sample the finer value) + */ + template< + size_t DIMS, + typename CoordType, + typename ValueType + > + class AverageCoarsenerBuilder { + + public: + + typedef std::array< CoordType, DIMS > ArrayType; + typedef AverageGeneratorIterator< DIMS, CoordType, ValueType > Iterator; + typedef AverageCoarsenerBuilder< DIMS, CoordType, ValueType > SelfType; + + /** + * Construct a new AverageCoarsenerBuilder object from the sizes of finer + * system and those of the coarser system; finer sizes must be an exact + * multiple of coarser sizes, otherwise an exception is raised. + */ + AverageCoarsenerBuilder( + const ArrayType &_finer_sizes, + const ArrayType &_coarser_sizes + ) : + system( _coarser_sizes.begin(), _coarser_sizes.end() ), + _finer_subspace( _coarser_sizes.cbegin(), _coarser_sizes.cend() ), + steps( DIMS ) + { + for( size_t i = 0; i < DIMS; i++ ) { + // finer size MUST be an exact multiple of coarser_size + std::ldiv_t ratio = std::ldiv( _finer_sizes[ i ], _coarser_sizes[ i ] ); + if( ratio.quot < 2 || ratio.rem != 0 ) { + throw std::invalid_argument( + std::string( "finer size of dimension " ) + std::to_string( i ) + + std::string( "is not an exact multiple of coarser size" ) ); + } + steps[ i ] = ratio.quot; + } + _finer_subspace.retarget( steps ); + } + + AverageCoarsenerBuilder( const SelfType & ) = delete; + + AverageCoarsenerBuilder( SelfType && ) = delete; + + SelfType & operator=( const SelfType & ) = delete; + + SelfType & operator=( SelfType && ) = delete; + + /** + * Returns the size of the finer system, i.e. its number of elements. + */ + size_t system_size() const { + return system.system_size(); + } + + /** + * Produces a beginning iterator to generate the coarsening matrix. + */ + Iterator make_begin_iterator() { + return Iterator( system, _finer_subspace, steps ); + } + + /** + * Produces an end iterator to stop the generation of the coarsening + * matrix. + */ + Iterator make_end_iterator() { + Iterator result( system, _finer_subspace, steps ); + // do not trigger boundary checks + result += ( system_size() * _finer_subspace.system_size() ); + return result; + } + + + private: + + const grb::utils::multigrid::LinearizedNDimSystem< + CoordType, + grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > + > system; + + grb::utils::multigrid::LinearizedNDimSystem< + CoordType, + grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > + > _finer_subspace; + + /** + * Array of steps, i.e. how much each column coordinate (finer system) must + * be incremented when incrementing the row coordinates; it is the ratio + * between #finer_sizes and row_generator#physical_sizes + */ + grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > steps; + }; + + } // namespace internal + + } // namespace algorithms + +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_AVERAGE_COARSENER + diff --git a/include/graphblas/algorithms/hpcg/greedy_coloring.hpp b/include/graphblas/algorithms/hpcg/greedy_coloring.hpp new file mode 100644 index 000000000..366465c41 --- /dev/null +++ b/include/graphblas/algorithms/hpcg/greedy_coloring.hpp @@ -0,0 +1,191 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file greedy_coloring.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Utilities to partition the elements of a mesh via a simple, greedy coloring algorithm. + */ + +#ifndef _H_GRB_ALGORITHMS_HPCG_GREEDY_COLORING +#define _H_GRB_ALGORITHMS_HPCG_GREEDY_COLORING + +#include +#include + +#include + +namespace grb { + namespace algorithms { + + /** + * Coloring algorithm for matrix generated by a \p DIMS - dimensional system. + * + * This function implements a < b>greedy heuristics< /b> to color the rows of a matrix generated by + * a \p DIMS - dimensional generator \p system, so that no two connected elements \a i,j + * in the system (corresponding to a nonzero \a (i,j) entry in the matrix) have the same color. + * If \p reorder_rows_per_color is false (as per default), the coloring information is stored into + * \p row_colors, while \p color_counters stores the number of rows for each color. + * + * If \p reorder_rows_per_color is true, the function performs the additional step of \b re-ordering + * the rows depending on their color: rows of color \a 0 are moved first, then rows of color \a 1 + * are moved to the following positions and so on. In this case, \p row_colors stores the new row number + * while \p color_counters stores at each position \a i the new position of the first row of color \a i. + * + * In both cases, \a color_counters.size() gives the number of found colors. + * + * This algorithm performs a \a global coloring of the input system, i.e. it must run on the entire system + * \a before any partitioning occurs. Although this is not scalable, it should not be a problem for + * most sizes, as the constants in front of this algorithms are very small. Implementing a distributed + * coloring algorithm is anyway out of the scope of this prototype. + * + * Colors are by default assigned in a greedy way from the lowest one up, making this coloring scheme very + * regular: close elements tend to have similar colors. This can be changed with \p lower_color_first + * \c = \c false , which assigns colors from the highest one. This may avoid "destructive interference" + * with following coarsening schemes. + * + * @tparam DIMS dimensions of the system + * @tparam CoordType type of the coordinates + * @tparam lower_color_first start greedy assignment of colors from lowest first + * + * @param[in] system generator for an \p DIMS - dimesional system with halo + * @param[out] row_colors if \p reorder_rows_per_color is false, stores the color of each row; + * if \p reorder_rows_per_color is true, stores the new position of each row, so that rows + * of the same color are grouped together; the initial content of the vector is destroyed + * @param[out] color_counters if \p reorder_rows_per_color is false, stores the number of rows per color; + * if \p reorder_rows_per_color is true, stores at each position \a i the offset in \p color_counters + * where the (clustered) rows of color \a i start from; the initial content of the vector is destroyed + * @param[in] reorder_rows_per_color whether to do the clustering after the coloring + */ + template< + size_t DIMS, + typename CoordType, + bool lowest_color_first = true + > void hpcg_greedy_color_ndim_system( + const grb::utils::multigrid::LinearizedHaloNDimSystem< DIMS, CoordType > & system, + std::vector< CoordType > & row_colors, + std::vector< CoordType > & color_counters, + bool reorder_rows_per_color = false + ) { + CoordType nrows = system.system_size(); + // value `nrows' means `uninitialized'; initialized colors go from 0 to nrow-1 + row_colors.insert( row_colors.begin(), nrows, nrows ); + CoordType totalColors = 1; + row_colors[ 0 ] = 0; // first point gets color 0 + + // Finds colors in a greedy (a likely non-optimal) fashion. + typename grb::utils::multigrid::LinearizedHaloNDimSystem< DIMS, CoordType >::Iterator begin = system.begin(); + begin.next_element(); // skip first row + + std::vector< bool > assigned( totalColors ); + while( begin.has_more_elements() ) { + CoordType curRow = begin->get_element_linear(); + + if( row_colors[ curRow ] != nrows ) { + // if color already assigned to curRow + continue; + } + assigned.assign( totalColors, false ); + CoordType currentlyAssigned = 0; + + while( begin.has_more_neighbours() ) { + CoordType curCol = begin->get_neighbor_linear(); + if( curCol < curRow ) { + assert( row_colors[ curCol ] < nrows ); // if curCol < curRow, curCol has already a color assigned + std::vector< bool >::reference color_is_assigned = assigned[ row_colors[ curCol ] ]; + if( ! color_is_assigned ) { + // count how many colors are already assigned + (void)currentlyAssigned++; + } + // track which colors are assigned + color_is_assigned = true; + } // else // could take advantage of indices being sorted + begin.next_neighbour(); + } + + if( currentlyAssigned < totalColors ) { + // if there is at least one color left to use, look for it + // smallest possible + if( lowest_color_first ) { + // here, assign colors greedily starting from the lowest available one + for( CoordType j = 0; j < totalColors; ++j ) { + if( ! assigned[ j ] ) { + // if no neighbor with this color, use it for this row + row_colors[ curRow ] = j; + break; + } + } + } else { + // here, assign colors greedily starting from the highest available one + for( CoordType j = totalColors; j > 0; --j ) { + CoordType color = j - 1; + if( ! assigned[ color ] ) { + // if no neighbor with this color, use it for this row + row_colors[ curRow ] = color; + break; + } + } + } + } else { + assert( row_colors[ curRow ] == nrows ); + if( row_colors[ curRow ] == nrows ) { + row_colors[ curRow ] = totalColors; + (void)totalColors++; + } else { + assert( 0 ); // should never get here + } + } + begin.next_element(); + } + +#ifdef _DEBUG + std::cout << "assigned colors: " << totalColors << " [ -> ]\n"; + for( size_t i = 0; i < row_colors.size(); i++ ) { + std::cout << i << " -> " << row_colors[ i ] << ", "; + } + std::cout << std::endl; +#endif + + // count number of vertices per color + color_counters.insert( color_counters.begin(), totalColors, 0 ); + for( CoordType i = 0; i < nrows; ++i ) { + (void)color_counters[ row_colors[ i ] ]++; + } + + if( ! reorder_rows_per_color ) { + return; + } + + // form in-place prefix scan + CoordType old = 0, old0; + for( CoordType i = 1; i < totalColors; ++i ) { + old0 = color_counters[ i ]; + color_counters[ i ] = color_counters[ i - 1 ] + old; + old = old0; + } + color_counters[ 0 ] = 0; + + // translate `colors' into a permutation + for( CoordType i = 0; i < nrows; ++i ) { + row_colors[ i ] = color_counters[ row_colors[ i ] ]++; + } + } + + } // namespace algorithms +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_HPCG_GREEDY_COLORING diff --git a/include/graphblas/algorithms/hpcg/hpcg.hpp b/include/graphblas/algorithms/hpcg/hpcg.hpp deleted file mode 100644 index 6caf22a1c..000000000 --- a/include/graphblas/algorithms/hpcg/hpcg.hpp +++ /dev/null @@ -1,228 +0,0 @@ - -/* - * Copyright 2021 Huawei Technologies Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file hpcg.hpp - * @author Alberto Scolari (alberto.scolari@huawei.com) - * @brief File with the main routine to run a full HPCG simulation, comprising multi-grid runs - * with Red-Black Gauss-Seidel smoothing. - * @date 2021-04-30 - */ - -#ifndef _H_GRB_ALGORITHMS_HPCG -#define _H_GRB_ALGORITHMS_HPCG - -#include - -#include "hpcg_data.hpp" -#include "multigrid_v_cycle.hpp" - - -namespace grb { - namespace algorithms { - - /** - * @brief High-Performance Conjugate Gradient algorithm implementation running entirely on GraphBLAS. - * - * Finds the solution x of an \f$ A x = b \f$ algebraic system by running the HPCG algorithm. - * The implementation here closely follows the reference HPCG benchmark used for the HPCG500 rank, - * visible at https://github.com/hpcg-benchmark/hpcg. - * The only difference is the usage of a Red-Black Gauss-Seidel smoother instead of the standard one - * for performance reasons, as the standard Gauss-Seidel algorithm is inherently sequential and not - * expressible in terms of standard linear algebra operations. - * In particular, this implementation (as the standard one) couples a standard CG algorithm with a V-cycle - * multi-grid solver to initially refine the tentative solution. This refinement step depends on the - * availability of coarsening information, which should be stored inside \p data; otherwise, - * the refinement is not performed and only the CG algorithm is run. For more information on inputs - * and on coarsening information, you may consult the \ref hpcg_data class documentation. - * - * This implementation assumes that the vectors and matrices inside \p data are all correctly initialized - * and populated with the proper values; in particular - * - hpcg_data#x with the initial tentative solution (iterative solutions are also stored here) - * - hpcg_data#A with the system matrix - * - hpcg_data#b with the right-hand side vector \f$ b \f$ - * - hpcg_data#A_diagonal with the diagonal values of the matrix - * - hpcg_data#color_masks with the color masks for this level - * - hpcg_data#coarser_level with the information for the coarser multi-grid run (if any) - * The other vectors are assumed to be inizialized (via the usual grb::Vector#Vector(size_t) constructor) - * but not necessarily populated with values, as they are internally populated when needed; hence, - * any previous values are overwritten. - * - * Failuers of GraphBLAS operations are handled by immediately stopping the execution and by returning - * the failure code. - * - * @tparam IOType type of result and intermediate vectors used during computation - * @tparam ResidualType type of the residual norm - * @tparam NonzeroType type of matrix values - * @tparam InputType type of values of the right-hand side vector b - * @tparam Ring the ring of algebraic operators zero-values - * @tparam Minus the minus operator for subtractions - * - * @param[in,out] data \ref hpcg_data object storing inputs, outputs and temporary vectors used for the computation, - * as long as the information for the recursive multi-grid runs - * @param[in] with_preconditioning whether to use pre-conditioning, i.e. to perform multi-grid runs - * @param[in] presmoother_steps number of pre-smoother steps, for multi-grid runs - * @param[in] postsmoother_steps nomber of post-smoother steps, for multi-grid runs - * @param[in] max_iterations maximum number if iterations the simulation may run for; once reached, - * the simulation stops even if the residual norm is above \p tolerance - * @param[in] tolerance the tolerance over the residual norm, i.e. the value of the residual norm to stop - * the simulation at - * @param[out] iterations numbers of iterations performed - * @param[out] norm_residual norm of the final residual - * @param[in] ring the ring to perform the operations on - * @param[in] minus the \f$ - \f$ operator for vector subtractions - * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first - * unsuccessful operation otherwise - */ - template< typename IOType, - typename ResidualType, - typename NonzeroType, - typename InputType, - class Ring = Semiring< grb::operators::add< IOType >, grb::operators::mul< IOType >, grb::identities::zero, grb::identities::one >, - class Minus = operators::subtract< IOType > > - grb::RC hpcg( hpcg_data< IOType, NonzeroType, InputType > &data, - bool with_preconditioning, - const size_t presmoother_steps, - const size_t postsmoother_steps, - const size_t max_iterations, - const ResidualType tolerance, - size_t &iterations, - ResidualType &norm_residual, - const Ring &ring = Ring(), - const Minus &minus = Minus() - ) { - ResidualType alpha; - - const grb::Matrix< NonzeroType > &A { data.A }; - grb::Vector< IOType > &x { data.x }; - const grb::Vector< InputType > &b { data.b }; - grb::Vector< IOType > &r { data.r }; // residual vector - grb::Vector< IOType > &p { data.p }; // direction vector - grb::Vector< IOType > &Ap { data.u }; // temp vector - grb::Vector< IOType > &z { data.z }; // pre-conditioned residual vector - grb::RC ret { SUCCESS }; - - ret = ret ? ret : grb::set( Ap, 0 ); - ret = ret ? ret : grb::set( r, 0 ); - ret = ret ? ret : grb::set( p, 0 ); - - ret = ret ? ret : grb::set( p, x ); - ret = ret ? ret : grb::mxv( Ap, A, x, ring ); // Ap = A * x - assert( ret == SUCCESS ); - - ret = ret ? ret : grb::eWiseApply( r, b, Ap, minus ); // r = b - Ap; - assert( ret == SUCCESS ); - - norm_residual = ring.template getZero< ResidualType >(); - ret = ret ? ret : grb::dot( norm_residual, r, r, ring ); // norm_residual = r' * r; - assert( ret == SUCCESS ); - - // compute sqrt to avoid underflow - norm_residual = std::sqrt( norm_residual ); - - // initial norm of residual - const ResidualType norm_residual_initial { norm_residual }; - ResidualType old_r_dot_z { 0.0 }, r_dot_z { 0.0 }, beta { 0.0 }; - size_t iter { 0 }; - -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( p, "start p" ); - DBG_print_norm( Ap, "start Ap" ); - DBG_print_norm( r, "start r" ); -#endif - - do { -#ifdef HPCG_PRINT_STEPS - DBG_println( "========= iteration " << iter << " =========" ); -#endif - if( with_preconditioning ) { - ret = ret ? ret : internal::multi_grid( data, data.coarser_level, presmoother_steps, postsmoother_steps, ring, minus ); - assert( ret == SUCCESS ); - } else { - ret = ret ? ret : grb::set( z, r ); // z = r; - assert( ret == SUCCESS ); - } -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( z, "initial z" ); -#endif - - ResidualType pAp; - - if( iter == 0 ) { - ret = ret ? ret : grb::set( p, z ); // p = z; - assert( ret == SUCCESS ); - - ret = ret ? ret : grb::dot( r_dot_z, r, z, ring ); // r_dot_z = r' * z; - assert( ret == SUCCESS ); - } else { - old_r_dot_z = r_dot_z; - - r_dot_z = ring.template getZero< ResidualType >(); - ret = ret ? ret : grb::dot( r_dot_z, r, z, ring ); // r_dot_z = r' * z; - assert( ret == SUCCESS ); - - beta = r_dot_z / old_r_dot_z; - ret = ret ? ret : grb::clear( Ap ); // Ap = 0; - ret = ret ? ret : grb::eWiseMulAdd( Ap, beta, p, z, ring ); // Ap += beta * p + z; - std::swap( Ap, p ); // p = Ap; - assert( ret == SUCCESS ); - } -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( p, "middle p" ); -#endif - - ret = ret ? ret : grb::set( Ap, 0 ); - ret = ret ? ret : grb::mxv( Ap, A, p, ring ); // Ap = A * p; - assert( ret == SUCCESS ); -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( Ap, "middle Ap" ); -#endif - pAp = static_cast< ResidualType >( 0.0 ); - ret = ret ? ret : grb::dot( pAp, Ap, p, ring ); // pAp = p' * Ap - assert( ret == SUCCESS ); - - alpha = r_dot_z / pAp; - - ret = ret ? ret : grb::eWiseMul( x, alpha, p, ring ); // x += alpha * p; - assert( ret == SUCCESS ); -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( x, "end x" ); -#endif - - ret = ret ? ret : grb::eWiseMul( r, -alpha, Ap, ring ); // r += - alpha * Ap; - assert( ret == SUCCESS ); -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( r, "end r" ); -#endif - - norm_residual = static_cast< ResidualType >( 0.0 ); - ret = ret ? ret : grb::dot( norm_residual, r, r, ring ); // residual = r' * r; - assert( ret == SUCCESS ); - - norm_residual = std::sqrt( norm_residual ); - - ++iter; - } while( iter < max_iterations && norm_residual / norm_residual_initial > tolerance && ret == SUCCESS ); - - iterations = iter; - return ret; - } - - } // namespace algorithms -} // namespace grb - -#endif // _H_GRB_ALGORITHMS_HPCG diff --git a/include/graphblas/algorithms/hpcg/hpcg_data.hpp b/include/graphblas/algorithms/hpcg/hpcg_data.hpp deleted file mode 100644 index 96b39856d..000000000 --- a/include/graphblas/algorithms/hpcg/hpcg_data.hpp +++ /dev/null @@ -1,195 +0,0 @@ - -/* - * Copyright 2021 Huawei Technologies Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file hpcg_data.hpp - * @author Alberto Scolari (alberto.scolari@huawei.com) - * @brief Data structures to store HPCG input/output data. - * @date 2021-04-30 - */ - -#ifndef _H_GRB_ALGORITHMS_HPCG_DATA -#define _H_GRB_ALGORITHMS_HPCG_DATA - -#include -#include - -#include - - -namespace grb { - - namespace algorithms { - - /** - * @brief basic data container for the HPCG algorithm, storing \b only the - * data in common between the full CG run and the V-cycle multi-grid solver. - * Additional data are stored in inheriting daata structures. - * - * @tparam IOType type of values of the vectors for intermediate results - * @tparam NonzeroType type of the values stored inside the system matrix #A - */ - template< typename IOType, typename NonzeroType > - struct system_data { - - const std::size_t system_size; ///< size of the system, i.e. side of the #A - - grb::Matrix< NonzeroType > A; ///< system matrix - grb::Vector< IOType > A_diagonal; ///< vector with the diagonal of #A - grb::Vector< IOType > z; ///< multi-grid solution - grb::Vector< IOType > r; ///< residual - grb::Vector< IOType > smoother_temp; ///< for smoother's intermediate results - std::vector< grb::Vector< bool > > color_masks; ///< for color masks - - /** - * @brief Constructor building all the stored vectors and matrices. - * - * Stored vectors and matrices are constructed according to \p sys_size but \b not initialized - * to any value internally, as initialization is up to users's code. - * - * @param[in] sys_size the size of the underlying physical system, i.e. the size of vectors and the number - * of rows and columns of the #A matrix. - */ - system_data( std::size_t sys_size ) : - system_size( sys_size ), A( sys_size, sys_size ), A_diagonal( sys_size ), z( sys_size ), r( sys_size ), - // temp(sys_size), - smoother_temp( sys_size ) {} - - // for safety, disable copy semantics - system_data( const system_data & o ) = delete; - - system_data & operator=( const system_data & ) = delete; - }; - - /** - * @brief Data container for all multi-grid inputs and outputs. - * - * @tparam IOType Type of values of the vectors for intermediate results - * @tparam NonzeroType Type of the values stored inside the system matrix \p A - * and the coarsening matrix #Ax_finer - * - * This data structure stores information for a full multi-grid V cycle, i.e. - * - input and output vectors for solution, residual and temporary vectors - * - coarsening information, in particular the #coarsening_matrix that - * coarsens a larger system of size #finer_size to the current system - * of size #system_size - * - the next level of coarsening, pointed to by #coarser_level, possibly being \c nullptr - * if no further coarsening is desired; note that this information is automatically - * destructed on object destruction (if any) - * - * Vectors stored here refer to the \b coarsened system (with the exception of #Ax_finer), - * thus having size #system_size; this also holds for the system matrix #A, - * while #coarsening_matrix has size #system_size \f$ \times \f$ #finer_size. - * Hence, the typical usage of this data structure is to coarsen \b external vectors, e.g. vectors - * coming from another \code multi_grid_data \endcode object whose #system_size equals - * \code this-> \endcode #fines_size, via \code this-> \endcode #coarsening_matrix and store the coarsened - * vectors internally. Mimicing the recursive behavior of standard multi-grid simulations, - * the information for a further coarsening is stored inside #coarser_level, so that the - * hierarchy of coarsened levels is reflected inside this data structure. - * - * As for \ref system_data, internal vectors and matrices are initialized to the proper size, - * but their values are \b not initialized. - */ - template< typename IOType, typename NonzeroType > - struct multi_grid_data : public system_data< IOType, NonzeroType > { - - const std::size_t finer_size; ///< ssize of the finer system to coarse from; - ///< typically \c finer_size \code == 8 * \endcode #system_size - - grb::Vector< IOType > Ax_finer; ///< finer vector for intermediate computations, of size #finer_size - - grb::Matrix< NonzeroType > coarsening_matrix; ///< matrix of size #system_size \f$ \times \f$ #finer_size - ///< to coarsen an input vector of size #finer_size into a vector of size #system_size - - struct multi_grid_data< IOType, NonzeroType > * coarser_level; ///< pointer to next coarsening level, for recursive - ///< multi-grid V cycle implementations - - /** - * @brief Construct a new \c multi_grid_data_object by initializing internal data structures and setting - * #coarser_level to \c nullptr. - * @param[in] coarser_size size of the current system, i.e. size \b after coarsening - * @param[in] _finer_size size of the finer system, i.e. size of external objects \b before coarsening - */ - multi_grid_data( std::size_t coarser_size, std::size_t _finer_size ) : - system_data< IOType, NonzeroType >( coarser_size ), finer_size( _finer_size ), Ax_finer( finer_size ), coarsening_matrix( coarser_size, finer_size ) { - coarser_level = nullptr; - } - - /** - * @brief Destroys the \c multi_grid_data_object object by destroying #coarser_level. - */ - virtual ~multi_grid_data() { - if( coarser_level != nullptr ) { - delete coarser_level; - } - } - }; - - /** - * @brief Data stucture to store the data for a full HPCG run: system vectors and matrix, - * coarsening information and temporary vectors. - * - * This data structures contains all the needed vectors and matrices to solve a linear system - * \f$ A x = b \f$. As for \ref system_data, internal elements are built and their sizes properly initialized - * to #system_size, but internal values are \b not initialized, as they are left to user's logic. - * Similarly, the coarsening information in #coarser_level is to be initialized by users by properly - * building a \code multi_grid_data \endcode object and storing its pointer into - * #coarser_level; on destruction, #coarser_level will also be properly destroyed without - * user's intervention. - * - * @tparam IOType type of values of the vectors for intermediate results - * @tparam NonzeroType type of the values stored inside the system matrix #A - * @tparam InputType type of the values of the right-hand side vector #b - */ - template< typename IOType, typename NonzeroType, typename InputType > - struct hpcg_data : public system_data< IOType, NonzeroType > { - - grb::Vector< InputType > b; ///< right-side vector of known values - grb::Vector< IOType > u; ///< temporary vectors (typically for CG exploration directions) - grb::Vector< IOType > p; ///< temporary vector (typically for x refinements coming from the multi-grid run) - grb::Vector< IOType > x; // system solution being refined over the iterations: it us up to the user - ///< to set the initial solution value - - struct multi_grid_data< IOType, NonzeroType > * coarser_level; ///< information about the coarser system, for - ///< the multi-grid run - - /** - * @brief Construct a new \c hpcg_data object by building vectors and matrices and by setting - * #coarser_level to \c nullptr (i.e. no coarser level is assumed). - * - * @param[in] sys_size the size of the simulated system, i.e. of all the internal vectors and matrices - */ - hpcg_data( std::size_t sys_size ) : system_data< IOType, NonzeroType >( sys_size ), b( sys_size ), u( sys_size ), p( sys_size ), x( sys_size ) { - coarser_level = nullptr; - } - - /** - * @brief Destroy the \c hpcg_data object by destroying the #coarser_level informartion, if any. - */ - virtual ~hpcg_data() { - if( coarser_level != nullptr ) { - delete coarser_level; - } - } - }; - - } // namespace algorithms - -} // namespace grb - -#endif // _H_GRB_ALGORITHMS_HPCG_DATA - diff --git a/include/graphblas/algorithms/hpcg/matrix_building_utils.hpp b/include/graphblas/algorithms/hpcg/matrix_building_utils.hpp deleted file mode 100644 index 1facabe49..000000000 --- a/include/graphblas/algorithms/hpcg/matrix_building_utils.hpp +++ /dev/null @@ -1,170 +0,0 @@ - -/* - * Copyright 2021 Huawei Technologies Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file hpcg_matrix_building_utils.hpp - * @author Alberto Scolari (alberto.scolari@huawei.com) - * @brief Utilities to build the matrices for HPCG simulations in an arbitrary number of dimensions. - * @date 2021-04-30 - */ - -#ifndef _H_GRB_ALGORITHMS_MATRIX_BUILDING_UTILS -#define _H_GRB_ALGORITHMS_MATRIX_BUILDING_UTILS - -#include -#include -#include -#include -#include -#include - -#include - -#include "ndim_matrix_builders.hpp" - - -namespace grb { - namespace algorithms { - - /** - * @brief Builds a \p DIMS -dimensional system matrix for HPCG simulation. - * - * This routine initializes \p M to a matrix representing a \p DIMS -dimensions system of sizes - * \p sys_sizes, with an iteration halo of size \p halo_size . The matrix diagonal values are initialized - * to \p diag_value while the other non-zero values are initialized to \p non_diag_value . - * - * @tparam DIMS system dimensions - * @tparam T type of matrix values - * @tparam B matrix GraphBLAS backend - * @param M the matrix to be initialized; it must be already constructed - * @param sys_sizes the sizes of the physical system - * @param halo_size the size of the halo of point to iterate in - * @param diag_value diagonal value - * @param non_diag_value value outside of the diagonal - * @return grb::RC the success value returned when trying to build the matrix - */ - template< std::size_t DIMS, typename T, enum grb::Backend B > - grb::RC build_ndims_system_matrix( grb::Matrix< T, B > & M, const std::array< std::size_t, DIMS > & sys_sizes, std::size_t halo_size, T diag_value, T non_diag_value ) { - static_assert( DIMS > 0, "DIMS must be > 0" ); - std::size_t n { std::accumulate( sys_sizes.cbegin(), sys_sizes.cend(), 1UL, std::multiplies< std::size_t >() ) }; - if( grb::nrows( M ) != n || grb::nrows( M ) != grb::ncols( M ) ) { - throw std::invalid_argument( "wrong matrix dimensions: matrix should " - "be square" - " and in accordance with given system " - "sizes" ); - } - grb::algorithms::matrix_generator_iterator< DIMS, T > begin( sys_sizes, 0UL, halo_size, diag_value, non_diag_value ); - grb::algorithms::matrix_generator_iterator< DIMS, T > end( sys_sizes, n, halo_size, diag_value, non_diag_value ); - return buildMatrixUnique( M, begin, end, grb::IOMode::SEQUENTIAL ); - } - - /** - * @brief Builds a coarsener matrix for an HPCG simulation. - * - * It initializes \p M as a rectangular matrix, with rows corresponding to the coarser system - * (of dimensions \p coarser_sizes - output) and columns corresponding to the finer system - * (of dimensions \p finer_sizes - input). The resulting coarsening matrix takes in input the finer system - * and coarsens it by keeping one element every \a S , where \a S is the ratio between the finer and - * the coarser dimension (computed for each dimension). In this way each \p DIMS -dimensional finer element - * corresponds to its bounding coarser element. - * - * For the coarsening to be feasible, the sizes of the finer system \b must be a multiple of those of the - * coarser system. If this condition is not met, an exception is thrown. - * - * @tparam DIMS system dimensions - * @tparam T type of matrix values - * @tparam B matrix GraphBLAS backend - * @param M the matrix to be initialized; it must be already constructed with proper dimensions - * @param coarser_sizes sizes of the coarser system - * @param finer_sizes sizes of the finer system; each one \b must be a multiple of the corresponding value - * in \p coarser_size , otherwise an exception is thrown - * @return grb::RC the success value returned when trying to build the matrix - */ - template< std::size_t DIMS, typename T, enum grb::Backend B > - grb::RC build_ndims_coarsener_matrix( grb::Matrix< T, B > & M, const std::array< std::size_t, DIMS > & coarser_sizes, const std::array< std::size_t, DIMS > & finer_sizes ) { - static_assert( DIMS > 0, "DIMS must be > 0" ); - std::size_t const rows { std::accumulate( coarser_sizes.cbegin(), coarser_sizes.cend(), 1UL, std::multiplies< std::size_t >() ) }; - for( std::size_t i { 0 }; i < coarser_sizes.size(); i++ ) { - std::size_t step = finer_sizes[ i ] / coarser_sizes[ i ]; - if( step * coarser_sizes[ i ] != finer_sizes[ i ] ) { - throw std::invalid_argument( "finer sizes should be a multiple of " - "coarser sizes" ); - } - } - std::size_t const cols { std::accumulate( finer_sizes.cbegin(), finer_sizes.cend(), 1UL, std::multiplies< std::size_t >() ) }; - if( grb::nrows( M ) != rows || grb::ncols( M ) != cols ) { - throw std::invalid_argument( "wrong matrix dimensions: matrix should " - "be rectangular" - " with rows == " - "and cols == " ); - } - - grb::algorithms::coarsener_generator_iterator< DIMS, T > begin( coarser_sizes, finer_sizes, 0 ); - grb::algorithms::coarsener_generator_iterator< DIMS, T > end( coarser_sizes, finer_sizes, rows ); - return buildMatrixUnique( M, begin, end, grb::IOMode::SEQUENTIAL ); - } - - /** - * @brief Populates \p masks with static color mask generated for a squared matrix of size \p matrix_size . - * - * Colors are built in the range [0, \p colors ), with the mask for color 0 being the array - * of values true in the positions \f$ [0, colors, 2*colors, ..., floor((system_size - 1)/colors) * color] \f$, - * for color 1 in the positions \f$ [1, 1+colors, 1+2*colors, ..., floor((system_size - 2)/colors) * color] \f$, - * etc.; the mask for color 0 is in \c masks[0], for color 1 in \c masks[1] and so on. - * - * The vectors stored in \p masks (assumed empty at the beginning) are built inside the function and populated - * only with the \c true values, leading to sparse vectors. This saves on storage space and allows - * GraphBLAS routines (like \c eWiseLambda() ) to iterate only on true values. - * - * @tparam B GraphBLAS backend for the vector - * @param masks output vector of color masks - * @param matrix_size size of the system matrix - * @param colors numbers of colors masks to build; it must be < \p matrix_size - * @return grb::RC the success value returned when trying to build the vector - */ - template< enum grb::Backend B > - grb::RC build_static_color_masks( std::vector< grb::Vector< bool, B > > & masks, std::size_t matrix_size, std::size_t colors ) { - if( ! masks.empty() ) { - throw std::invalid_argument( "vector of masks is expected to be " - "empty" ); - } - if( matrix_size < colors ) { - throw std::invalid_argument( "syztem size is < number of colors: too " - "small" ); - } - grb::RC rc { grb::SUCCESS }; - masks.reserve( colors ); - for( std::size_t i { 0U }; i < colors; i++ ) { - // build in-place, assuming the compiler deduces the right constructor according to B - masks.emplace_back( matrix_size ); - grb::Vector< bool > & mask = masks.back(); - // grb::set(mask, false); // DO NOT initialize false's explicitly, otherwise - // RBGS will touch them too and the runtime will increase! - for( std::size_t j = i; j < matrix_size; j += colors ) { - rc = grb::setElement( mask, true, j ); - assert( rc == grb::SUCCESS ); - if( rc != grb::SUCCESS ) - return rc; - } - } - return rc; - } - - } // namespace algorithms -} // namespace grb - -#endif // _H_GRB_ALGORITHMS_MATRIX_BUILDING_UTILS diff --git a/include/graphblas/algorithms/hpcg/multigrid_v_cycle.hpp b/include/graphblas/algorithms/hpcg/multigrid_v_cycle.hpp deleted file mode 100644 index f40296f91..000000000 --- a/include/graphblas/algorithms/hpcg/multigrid_v_cycle.hpp +++ /dev/null @@ -1,252 +0,0 @@ - -/* - * Copyright 2021 Huawei Technologies Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file multigrid_v_cycle.hpp - * @author Alberto Scolari (alberto.scolari@huawei.com) - * @brief This file contains the routines for multi-grid solution refinement, including the main routine - * and those for coarsening and refinement of the tentative solution. - * @date 2021-04-30 - */ - -#ifndef _H_GRB_ALGORITHMS_MULTIGRID_V_CYCLE -#define _H_GRB_ALGORITHMS_MULTIGRID_V_CYCLE - -#include -#include - -#include - -#include "hpcg_data.hpp" -#include "red_black_gauss_seidel.hpp" - - -namespace grb { - namespace algorithms { - /** - * @brief Namespace for interfaces that should not be used outside of the algorithm namespace. - */ - namespace internal { - - /** - * @brief computes the coarser residual vector \p coarsening_data.r by coarsening - * \p coarsening_data.Ax_finer - \p r_fine via \p coarsening_data.coarsening_matrix. - * - * The coarsening information are stored inside \p coarsening_data. - * - * @tparam IOType type of result and intermediate vectors used during computation - * @tparam NonzeroType type of matrix values - * @tparam Ring the ring of algebraic operators zero-values - * @tparam Minus the minus operator for subtractions - * - * @param[in] r_fine fine residual vector - * @param[in,out] coarsening_data \ref multi_grid_data data structure storing the information for coarsening - * @param[in] ring the ring to perform the operations on - * @param[in] minus the \f$ - \f$ operator for vector subtractions - * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first - * unsuccessful operation otherwise - */ - template< typename IOType, - typename NonzeroType, - class Ring, - class Minus > - grb::RC compute_coarsening( const grb::Vector< IOType > & r_fine, // fine residual - struct multi_grid_data< IOType, NonzeroType > & coarsening_data, - const Ring & ring, - const Minus & minus ) { - RC ret { SUCCESS }; - ret = ret ? ret : grb::eWiseApply( coarsening_data.Ax_finer, r_fine, coarsening_data.Ax_finer, - minus ); // Ax_finer = r_fine - Ax_finer - assert( ret == SUCCESS ); - - // actual coarsening, from ncols(*coarsening_data->A) == *coarsening_data->system_size * 8 - // to *coarsening_data->system_size - ret = ret ? ret : grb::set( coarsening_data.r, 0 ); - ret = ret ? ret : grb::mxv( coarsening_data.r, coarsening_data.coarsening_matrix, coarsening_data.Ax_finer, - ring ); // r = coarsening_matrix * Ax_finer - return ret; - } - - /** - * @brief computes the prolongation of the coarser solution \p coarsening_data.z and stores it into - * \p x_fine. - * - * For prolongation, this function uses the matrix \p coarsening_data.coarsening_matrix by transposing it. - * - * @tparam IOType type of result and intermediate vectors used during computation - * @tparam NonzeroType type of matrix values - * @tparam Ring the ring of algebraic operators zero-values - * - * @param[out] x_fine the solution vector to store the prolonged solution into - * @param[in,out] coarsening_data information for coarsening - * @param[in] ring the ring to perform the operations on - * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first - * unsuccessful operation otherwise - */ - template< typename IOType, - typename NonzeroType, - class Ring > - grb::RC compute_prolongation( grb::Vector< IOType > & x_fine, // fine residual - struct multi_grid_data< IOType, NonzeroType > & coarsening_data, - const Ring & ring ) { - RC ret { SUCCESS }; - // actual refining, from *coarsening_data->syztem_size == nrows(*coarsening_data->A) / 8 - // to nrows(x_fine) - ret = ret ? ret : set( coarsening_data.Ax_finer, 0 ); - - ret = ret ? ret : grb::mxv< grb::descriptors::transpose_matrix >( coarsening_data.Ax_finer, coarsening_data.coarsening_matrix, coarsening_data.z, ring ); - assert( ret == SUCCESS ); - - ret = ret ? ret : grb::foldl( x_fine, coarsening_data.Ax_finer, ring.getAdditiveMonoid() ); // x_fine += Ax_finer; - assert( ret == SUCCESS ); - return ret; - } - - /** - * @brief Runs \p smoother_steps iteration of the Red-Black Gauss-Seidel smoother, with inputs and outputs stored - * inside \p data. - * - * @tparam IOType type of result and intermediate vectors used during computation - * @tparam NonzeroType type of matrix values - * @tparam Ring the ring of algebraic operators zero-values - * - * @param[in,out] data \ref system_data data structure with relevant inpus and outputs: system matrix, initial solution, - * residual, system matrix colors, temporary vectors - * @param[in] smoother_steps how many smoothing steps to run - * @param[in] ring the ring to perform the operations on - * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first - * unsuccessful operation otherwise - */ - template< typename IOType, typename NonzeroType, class Ring > - grb::RC run_smoother( system_data< IOType, NonzeroType > & data, const std::size_t smoother_steps, const Ring & ring ) { - RC ret { SUCCESS }; - - for( std::size_t i { 0 }; i < smoother_steps && ret == SUCCESS; i++ ) { - ret = ret ? ret : red_black_gauss_seidel( data, ring ); - assert( ret == SUCCESS ); - } - return ret; - } - - /** - * @brief Multi-grid V cycle implementation to refine a given solution. - * - * A full multi-grid run goes through the following steps: - * -# if \p presmoother_steps \f$ > 0 \f$, \p presmoother_steps of the Red-Black Gauss-Seidel smoother are run - * to improve on the initial solution stored into \p data.z - * -# the coarsening of \f$ r - A*z \f$ is computed to find the coarser residual vector - * -# a multi-grid run is recursively performed on the coarser system - * -# the tentative solution from the coarser multi-grid run is prolonged and added to the current tentative solution - * into \p data.z - * -# this solution is further smoothed for \p postsmoother_steps steps - * - * If coarsening information is not available, the multi-grid run consists in a single smmothing run. - * - * Failuers of GraphBLAS operations are handled by immediately stopping the execution and by returning - * the failure code. - * - * @tparam IOType type of result and intermediate vectors used during computation - * @tparam NonzeroType type of matrix values - * @tparam Ring the ring of algebraic operators zero-values - * @tparam Minus the minus operator for subtractions - * - * @param[in,out] data \ref multi_grid_data object storing the relevant data for the multi-grid run of the current - * clevel - * @param[in,out] coarsening_data pointer to information for the coarsening/refinement operations and for the - * recursive multi-grid run on the coarsened system; if \c nullptr, no coarsening/refinement occurs - * and only smoothing occurs on the current solution - * @param[in] presmoother_steps number of pre-smoother steps - * @param[in] postsmoother_steps number of post-smoother steps - * @param[in] ring the ring to perform the operations on - * @param[in] minus the \f$ - \f$ operator for vector subtractions - * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first - * unsuccessful operation otherwise - */ - template< typename IOType, typename NonzeroType, class Ring, class Minus > - grb::RC multi_grid( system_data< IOType, NonzeroType > & data, - struct multi_grid_data< IOType, NonzeroType > * const coarsening_data, - const size_t presmoother_steps, - const size_t postsmoother_steps, - const Ring & ring, - const Minus & minus ) { - RC ret { SUCCESS }; -#ifdef HPCG_PRINT_STEPS - DBG_println( "mg BEGINNING {" ); -#endif - - // clean destination vector - ret = ret ? ret : grb::set( data.z, 0 ); -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( data.r, "initial r" ); -#endif - if( coarsening_data == nullptr ) { - // compute one round of Gauss Seidel and return - ret = ret ? ret : run_smoother( data, 1, ring ); - assert( ret == SUCCESS ); -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( data.z, "smoothed z" ); - DBG_println( "} mg END" ); -#endif - return ret; - } - - struct multi_grid_data< IOType, NonzeroType > & cd { - *coarsening_data - }; - - // pre-smoother - ret = ret ? ret : run_smoother( data, presmoother_steps, ring ); - assert( ret == SUCCESS ); -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( data.z, "pre-smoothed z" ); -#endif - - ret = ret ? ret : grb::set( cd.Ax_finer, 0 ); - ret = ret ? ret : grb::mxv( cd.Ax_finer, data.A, data.z, ring ); - assert( ret == SUCCESS ); - - ret = ret ? ret : compute_coarsening( data.r, cd, ring, minus ); - assert( ret == SUCCESS ); -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( cd.r, "coarse r" ); -#endif - - ret = ret ? ret : multi_grid( cd, cd.coarser_level, presmoother_steps, postsmoother_steps, ring, minus ); - assert( ret == SUCCESS ); - - ret = ret ? ret : compute_prolongation( data.z, cd, ring ); - assert( ret == SUCCESS ); -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( data.z, "prolonged z" ); -#endif - - // post-smoother - ret = ret ? ret : run_smoother( data, postsmoother_steps, ring ); - assert( ret == SUCCESS ); -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( data.z, "post-smoothed z" ); - DBG_println( "} mg END" ); -#endif - - return ret; - } - - } // namespace internal - } // namespace algorithms -} // namespace grb - -#endif // _H_GRB_ALGORITHMS_MULTIGRID_V_CYCLE diff --git a/include/graphblas/algorithms/hpcg/ndim_matrix_builders.hpp b/include/graphblas/algorithms/hpcg/ndim_matrix_builders.hpp deleted file mode 100644 index c00eb65b2..000000000 --- a/include/graphblas/algorithms/hpcg/ndim_matrix_builders.hpp +++ /dev/null @@ -1,596 +0,0 @@ - -/* - * Copyright 2021 Huawei Technologies Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file ndim_matrix_builders.hpp - * @author Alberto Scolari (alberto.scolari@huawei.com) - * @brief Utilities to build matrices for an HPCG simulation in a generic number of dimensions - * - * In particular, the main matrices are: - * - a system matrix, generated from an N-dimenional space of coordinates by iterating along - * each dimension in priority order, where the first dimension has highest priority and the last - * dimension least priority; for each point (row), all its N-dimensional neighbours within - * a given distance are generated for the column - * - a coarsening matrix, generated by iterating on a coarser system of N dimensions (row) and projecting - * each point to a corresponding system of finer sizes - * - * @date 2021-04-30 - */ - -#ifndef _H_GRB_ALGORITHMS_NDIM_MATRIX_BUILDERS -#define _H_GRB_ALGORITHMS_NDIM_MATRIX_BUILDERS - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace grb { - - namespace algorithms { - - /** - * @brief Base class that iterates on DIMS dimensions starting from the first one. - * - * The coordinates are assumed to generate the row number in a matrix whose number of rows is - * the product of all sizes. This class generates row numbers for physical problems described as - * systems of linear equations in an n-dimensional space. - * - * Example of iterations in a 3D (x, y, z) system of size (4,3,2), with generated row numbers - * reported as '=> ROW': - * - z[0] - * - y[0] - * - x[0] => 0, x[1] => 1, x[2] => 2, x[3] => 3 - * - y[1] - * - x[0] => 4, x[1] => 5, x[2] => 6, x[3] => 7 - * - y[2] - * - x[0] => 8, x[1] => 9, x[2] => 10, x[3] => 11 - * - z[1] - * - y[0] - * - x[0] => 12, x[1] => 13, x[2] => 14, x[3] => 15 - * - y[1] - * - x[0] => 16, x[1] => 17, x[2] => 18, x[3] => 19 - * - y[2] - * - x[0] => 20, x[1] => 21, x[2] => 22, x[3] => 23 - * - * The main goal of this class is to be derived by other classes to generate matrices in an - * STL-iterator-fashion; hence, this class contains all the code for basic coordinate-to-row-column - * conversion in \p DIM dimensions and the basic logic to increment the row number. - * - * @tparam DIMS number os dimensions of the system - */ - template< std::size_t DIMS > - struct row_generator { - - using RowIndexType = std::size_t; ///< numeric type of rows - using array_t = std::array< RowIndexType, - DIMS >; ///< type for the array storing the coordinates. - - const array_t physical_sizes; ///< size of each dimension, starting from the one to be explored first - - /** - * @brief Construct a new row generator object - * @param[in] _sizes array of sizes of each dimension; no dimension should be 0, otherwise an exception - * is thrown - * @param[in] first_row first row to iterate from; it is allowed to be beyond the matrix size, e.g. to create - * an end iterator (no check occurs) - */ - row_generator( const array_t & _sizes, RowIndexType first_row ) : physical_sizes( _sizes ) { - static_assert( DIMS > 0, "DIMS should be higher than 0" ); - for( const auto i : _sizes ) { - if( i == static_cast< RowIndexType >( 0U ) ) { - throw std::invalid_argument( "All dimension sizes must " - "be > 0" ); - } - } - row_to_coords( first_row ); - } - - row_generator( const row_generator & o ) = default; - - row_generator( row_generator && o ) = default; - - protected: - // x: row_coords[0], y: row_coords[1], z: row_coords[2], ... - array_t row_coords; ///< n-D coordinates from which to compute the row - - /** - * @brief converts a row number into a n-D coordinates according to the sizes in #physical_sizes - * - * In case the input is higher than the nunber of rows, the last coordinate is allowed to - * go beyond its physical size. E.g., if the system has size (4,3,2) and \p rowcol is 24, - * the coordinates are (0,0,3). - * - * @param[in] rowcol row number to convert; it can be any number - */ - void row_to_coords( RowIndexType rowcol ) { - std::size_t s = 1; - for( std::size_t i { 0 }; i < row_coords.size() - 1; i++ ) - s *= physical_sizes[ i ]; - - for( typename array_t::size_type i { row_coords.size() - 1 }; i > 0; i-- ) { - row_coords[ i ] = rowcol / s; - rowcol -= row_coords[ i ] * s; - s /= physical_sizes[ i ]; - } - row_coords[ 0 ] = rowcol % physical_sizes[ 0 ]; - } - - /** - * @brief Pure function converting an array of coordinates into a row number, based on #physical_sizes. - * @param a the #array_t array of coordinates to convert - * @return #RowIndexType the row corresponding to the coordinates in \p a - */ - RowIndexType coords_to_rowcol( const array_t & a ) const { - RowIndexType row { 0 }; - RowIndexType s { 1 }; - for( typename array_t::size_type i { 0 }; i < a.size(); i++ ) { - row += s * a[ i ]; - s *= physical_sizes[ i ]; - } - return row; - } - - /** - * @brief Increment #row_coords in order to move to the next coordinate (according to the - * n-dimensional iteration order) and update #current_row accordingly. - * - * To be used by derived classes in order to generate the matrix, e.g. via the \c operator()++ - * operator prescribed for STL-like iterators. - */ - void increment_row() { - bool rewind; - typename array_t::size_type i { 0 }; - do { - typename array_t::value_type & coord = row_coords[ i ]; - // must rewind dimension if we wrap-around - typename array_t::value_type new_coord = ( coord + 1 ) % physical_sizes[ i ]; - rewind = new_coord < coord; - coord = new_coord; - ++i; - } while( rewind && i < row_coords.size() - 1 ); // rewind only the first N-1 coordinates - - // if we still have to rewind, increment the last coordinate, which is unbounded - if( rewind ) { - row_coords.back()++; - } - } - }; - - // =============================================================== - - /** - * @brief STL-like iterable class to generate the values for a matrix by iterating in an n-dimensional - * space along the coordinates. - * - * For each \f$ X=(x0, x1, ...,xn) \f$ point of the underlying (n+1)-dimensional space, - * this class iterates through the points of the n-dimensional halo of radius \p halo around \f$ X \f$, - * generating the row number corresponding to \f$ X \f$ and the column number corresponding to - * each halo point. At each coordinate \code (row, col) \endcode generated this way, the corresponding matrix value - * being generated depends on whether \code row == col \endcode. - * - * @tparam DIMS number of dimensions of the system - * @tparam HALO halo size, determining the number of points to iterate around and thus the column coordinates - * @tparam T type of matrix values - */ - template< std::size_t DIMS, typename T = double > - struct matrix_generator_iterator : public row_generator< DIMS > { - - using RowIndexType = typename row_generator< DIMS >::RowIndexType; - using ColumnIndexType = typename row_generator< DIMS >::RowIndexType; - using ValueType = T; - using array_t = typename row_generator< DIMS >::array_t; - using value_type = std::pair< std::pair< RowIndexType, ColumnIndexType >, T >; - - // halo may in future become a DIM-size array to iterate in arbitrary shapes - const RowIndexType halo; ///< number of points per dimension to iterate around - const ValueType diagonal_value; ///< value to be emitted when the object has moved to the diagonal - const ValueType non_diagonal_value; ///< value to emit outside of the diagonal - - /** - * @brief Construct a new \c matrix_generator_iterator object, setting the current row as \p row - * and emitting \p diag if the iterator has moved on the diagonal, \p non_diag otherwise. - * - * @param sizes array with the sizes along the dimensions - * @param row current row to initialize the matrix on - * @param _halo halo of points to iterate around; must be > 0 - * @param diag value to emit when on the diagonal - * @param non_diag value to emit outside the diagonal - */ - matrix_generator_iterator( const array_t & sizes, RowIndexType row, RowIndexType _halo, ValueType diag, ValueType non_diag ) : - row_generator< DIMS >( sizes, row ), halo( _halo ), diagonal_value( diag ), non_diagonal_value( non_diag ) { - if( halo <= 0 ) { - throw std::invalid_argument( "halo should be higher than 0" ); - } - for( const auto i : sizes ) { - if( i < static_cast< RowIndexType >( 2 * halo + 1 ) ) { - throw std::invalid_argument( "Iteration halo goes beyond system sizes" ); - } - } - current_values.first.first = row; - update_column_max_values(); - reset_all_columns(); - current_values.first.second = this->coords_to_rowcol( col_coords ); - current_values.second = v(); - } - - matrix_generator_iterator( const matrix_generator_iterator & o ) = default; - - matrix_generator_iterator( matrix_generator_iterator && o ) = default; - - /** - * @brief Increments the iterator by moving coordinates to the next (row, column) to iterate on. - * - * This operator internally increments the columns coordinates until wrap-around, when it increments - * the row coordinates and resets the column coordinates to the first possible columns; this column coordinate - * depends on the row coordinates according to the dimensions iteration order and on the parameter \p halo. - * - * @return matrix_generator_iterator& \c this object, with the updated state - */ - matrix_generator_iterator< DIMS, T > & operator++() { - bool must_rewind = increment_column(); - if( must_rewind ) { - this->increment_row(); - // after changing row, we must find the first non-zero column - reset_all_columns(); - current_values.first.first = this->coords_to_rowcol( this->row_coords ); - update_column_max_values(); - } - // trigger column update after row update, as a row update - // triggers a column update - current_values.first.second = this->coords_to_rowcol( col_coords ); - current_values.second = this->v(); - return *this; - } - - /** - * @brief Operator to compare \c this against \p o and return whether they differ. - * - * @param o object to compare \c this against - * @return true of the row or the column is different between \p o and \c this - * @return false if both row and column of \p o and \c this are equal - */ - bool operator!=( const matrix_generator_iterator< DIMS, T > & o ) const { - if( o.i() != this->i() ) { - return true; - } - return o.j() != this->j(); - } - - /** - * @brief Operator to compare \c this against \p o and return whether they are equal. - * - * @param o object to compare \c this against - * @return true of the row or the column is different between \p o and \c this - * @return false if both row and column of \p o and \c this are equal - */ - bool operator==( const matrix_generator_iterator< DIMS, T > & o ) const { - return o.i() == this->i() && o.j() == this->j(); - } - - /** - * @brief Operator returning the triple to directly access row, column and element values. - * - * Useful when building the matrix by copying the triple of coordinates and value, - * like for the BSP1D backend. - */ - const value_type & operator*() const { - return current_values; - } - - /** - * @brief Returns current row. - */ - inline RowIndexType i() const { - return current_values.first.first; - } - - /** - * @brief Returns current column. - */ - inline ColumnIndexType j() const { - return current_values.first.second; - } - - /** - * @brief Returns the current matrix value. - * - * @return ValueType #diagonal_value if \code row == column \endcode (i.e. if \code this-> \endcode - * #i() \code == \endcode \code this-> \endcode #j()), #non_diagonal_value otherwise - */ - inline ValueType v() const { - return j() == i() ? diagonal_value : non_diagonal_value; - } - - private: - // offsets w.r.t. rows - array_t col_coords; ///< coordinates corresponding to current column - array_t column_max_values; ///< maximum values for the column coordinates, to stop column increment - //// and reset the column coordinates - value_type current_values; ///< triple storing the current value for row, column and matrix element - - /** - * @brief Updates the maximum values each column coordinate can reach, according to the row coordinates. - * - * To be called after each row coordinates update. - */ - void update_column_max_values() { - for( std::size_t i { 0 }; i < column_max_values.size(); i++ ) { - column_max_values[ i ] = std::min( this->physical_sizes[ i ] - 1, this->row_coords[ i ] + halo ); - } - } - - /** - * @brief Resets the value of column dimension \p dim to the first possible value. - * - * The final value of #col_coords[dim] depends on the current row (#row_coords) and on the \p halo - * and is \f$ max(0, \f$ #row_coords \f$[dim])\f$. - * - * @param dim the dimension to reset - */ - void reset_column_coords( std::size_t dim ) { - // cannot use std::max because row_coords is unsigned and can wrap-around - col_coords[ dim ] = this->row_coords[ dim ] <= halo ? 0 : ( this->row_coords[ dim ] - halo ); - } - - /** - * @brief resets all values in #col_coords to the initial coordinates, - * iterating from on the current row. - */ - void reset_all_columns() { - for( std::size_t i { 0 }; i < col_coords.size(); i++ ) { - reset_column_coords( i ); - } - } - - /** - * @brief Increment the column according to the iteration order, thus resetting the column coordinates - * when the last possible column value for the current row has been reached. - * - * @return true if the column coordinates have been reset, and thus also the row must be incremented - * @return false if the column coordinates - */ - bool increment_column() { - bool rewind; - typename array_t::size_type i { 0 }; - do { - typename array_t::value_type & col = col_coords[ i ]; - // must rewind dimension if the column offset is already at the max value - // or if the column coordinates are already at the max value - rewind = ( col == column_max_values[ i ] ); - if( rewind ) { - // col = this->row_coords[i] == 0 ? 0 : this->row_coords[i] - (halo); - reset_column_coords( i ); - } else { - ++col; - } - ++i; - } while( rewind && i < col_coords.size() ); - - // if we change z, then we also must reset x and y; if only y, we must reset x, and so on - return rewind; - } - }; - - // =============================================================== - - /** - * @brief Class to generate the coarsening matrix of an underlying \p DIMS -dimensional system. - * - * This class coarsens a finer system to a coarser system by projecting each input value (column), - * espressed in finer coordinates, to an output (row) value espressed in coarser coordinates. - * The coarser sizes are assumed to be row_generator#physical_sizes, while the finer sizes are here - * stored inside #finer_sizes. - * - * The corresponding refinement matrix is obtained by transposing the coarsening matrix. - * - * @tparam DIMS number of dimensions of the system - * @tparam T type of matrix values - */ - template< std::size_t DIMS, typename T = double > - struct coarsener_generator_iterator : public row_generator< DIMS > { - - using RowIndexType = typename row_generator< DIMS >::RowIndexType; - using ColumnIndexType = typename row_generator< DIMS >::RowIndexType; - using ValueType = T; - using array_t = typename row_generator< DIMS >::array_t; - using value_type = std::pair< std::pair< RowIndexType, ColumnIndexType >, T >; - - // the sizes to project from - const array_t finer_sizes; ///< the size of the finer system (columns) - array_t steps; ///< array of steps, i.e. how much each column coordinate (finer system) must be - //// incremented when incrementing the row coordinates; is is the ration between - //// #finer_sizes and row_generator#physical_sizes - - /** - * @brief Construct a new \c coarsener_generator_iterator object from the coarser and finer sizes, - * setting its row at \p _current_row and the column at the corresponding value. - * - * Each finer size must be an exact multiple of the corresponding coarser size, otherwise the - * construction will throw an exception. - * - * @param _coarser_sizes sizes of the coarser system (rows) - * @param _finer_sizes sizes of the finer system (columns) - * @param _current_row row (in the coarser system) to set the iterator on - */ - coarsener_generator_iterator( const array_t & _coarser_sizes, const array_t & _finer_sizes, RowIndexType _current_row ) : - row_generator< DIMS >( _coarser_sizes, _current_row ), finer_sizes( _finer_sizes ), steps( { 0 } ) { - for( std::size_t i { 0 }; i < DIMS; i++ ) { - // finer size MUST be an exact multiple of coarser_size - typename array_t::value_type step { _finer_sizes[ i ] / _coarser_sizes[ i ] }; - if( step == 0 || finer_sizes[ i ] / step != this->physical_sizes[ i ] ) { - throw std::invalid_argument( std::string( "finer size " - "of " - "dimension" - " " ) + - std::to_string( i ) + - std::string( "is not an exact multiple of coarser " - "size" ) ); - } - steps[ i ] = step; - } - current_values.first.first = _current_row; - current_values.first.second = coords_to_finer_col(); - current_values.second = v(); - } - - coarsener_generator_iterator( const coarsener_generator_iterator & o ) = default; - - coarsener_generator_iterator( coarsener_generator_iterator && o ) = default; - - /** - * @brief Increments the row and the column according to the respective physical sizes, - * thus iterating onto the coarsening matrix coordinates. - * - * @return \code *this \endcode, i.e. the same object with the updates row and column - */ - coarsener_generator_iterator< DIMS, T > & operator++() { - this->increment_row(); - current_values.first.first = this->coords_to_rowcol( this->row_coords ); - current_values.first.second = coords_to_finer_col(); - current_values.second = v(); - return *this; - } - - /** - * @brief Returns whether \c this and \p o differ. - */ - bool operator!=( const coarsener_generator_iterator< DIMS, T > & o ) const { - if( this->i() != o.i() ) { - return true; - } - return this->j() != o.j(); - } - - /** - * @brief Returns whether \c this and \p o are equal. - */ - bool operator==( const coarsener_generator_iterator< DIMS, T > & o ) const { - return this->i() == o.i() && this->j() == o.j(); - } - - /** - * @brief Operator returning the triple to directly access row, column and element values. - * - * Useful when building the matrix by copying the triple of coordinates and value, - * like for the BSP1D backend. - */ - const value_type & operator*() const { - return current_values; - } - - /** - * @brief Returns the current row, according to the coarser system. - */ - inline RowIndexType i() const { - return current_values.first.first; - } - - /** - * @brief Returns the current column, according to the finer system. - */ - inline ColumnIndexType j() const { - return current_values.first.second; - } - - /** - * @brief Returns always 1, as the coarsening keeps the same value. - */ - inline ValueType v() const { - return static_cast< ValueType >( 1 ); - } - - private: - value_type current_values; ///< triple storing the current value for row, column and matrix element - - /** - * @brief Returns the row coordinates converted to the finer system, to compute - * the column value. - */ - ColumnIndexType coords_to_finer_col() const { - ColumnIndexType row { 0 }; - ColumnIndexType s { 1 }; - for( typename array_t::size_type i { 0 }; i < this->row_coords.size(); i++ ) { - s *= steps[ i ]; - row += s * this->row_coords[ i ]; - s *= this->physical_sizes[ i ]; - } - return row; - } - }; - - } // end namespace algorithms - -} // end namespace grb - -namespace std { - - /** - * Specialises the standard STL iterator traits for - * #grb::algorithms::matrix_generator_iterator - */ - template< size_t DIMS, typename T > - class iterator_traits< - grb::algorithms::matrix_generator_iterator< DIMS, T > - > { - - private: - - typedef grb::algorithms::matrix_generator_iterator< DIMS, T > SelfType; - - - public: - - typedef typename SelfType::ValueType value_type; - typedef const value_type * pointer; - typedef const value_type & reference; - typedef size_t difference_type; - typedef forward_iterator_tag iterator_category; - - }; - - template< size_t DIMS, typename T > - class iterator_traits< - grb::algorithms::coarsener_generator_iterator< DIMS, T > - > { - - private: - - typedef grb::algorithms::coarsener_generator_iterator< DIMS, T > SelfType; - - - public: - - typedef typename SelfType::ValueType value_type; - typedef const value_type * pointer; - typedef const value_type & reference; - typedef size_t difference_type; - typedef forward_iterator_tag iterator_category; - - }; - -} // end namespace std - -#endif // _H_GRB_ALGORITHMS_NDIM_MATRIX_BUILDERS - diff --git a/include/graphblas/algorithms/hpcg/red_black_gauss_seidel.hpp b/include/graphblas/algorithms/hpcg/red_black_gauss_seidel.hpp deleted file mode 100644 index 718e5015c..000000000 --- a/include/graphblas/algorithms/hpcg/red_black_gauss_seidel.hpp +++ /dev/null @@ -1,129 +0,0 @@ - -/* - * Copyright 2021 Huawei Technologies Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file red_black_gauss_seidel.hpp - * @author Alberto Scolari (alberto.scolari@huawei.com) - * @brief Contains the routines to perform a forward-backward pass of a Red-Black Gauss-Seidel smoother. - * @date 2021-04-30 - */ - -#ifndef _H_GRB_ALGORITHMS_RED_BLACK_GAUSS_SEIDEL -#define _H_GRB_ALGORITHMS_RED_BLACK_GAUSS_SEIDEL - -#include - -#include - - -namespace grb { - namespace algorithms { - namespace internal { - - /** - * @brief Runs a single step of Red-Black Gauss-Seidel for a specific color. - * - * @tparam IOType type of result and intermediate vectors used during computation - * @tparam NonzeroType type of matrix values - * @tparam Ring the ring of algebraic operators zero-values - * - * @param[in] A the system matrix - * @param[in] A_diagonal a vector storing the diagonal elements of \p A - * @param[in] r the residual - * @param[in,out] x the initial solution to start from, and where the smoothed solution is stored to - * @param[out] smoother_temp a vector for temporary values - * @param[in] color_mask the mask of colors to filter the rows to smooth - * @param[in] ring the ring to perform the operations on - * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first - * unsuccessful operation otherwise - */ - template< typename IOType, typename NonzeroType, class Ring > - grb::RC __rbgs_single_step( const grb::Matrix< NonzeroType > & A, - const grb::Vector< IOType > & A_diagonal, - const grb::Vector< IOType > & r, - grb::Vector< IOType > & x, - grb::Vector< IOType > & smoother_temp, - const grb::Vector< bool > & color_mask, - const Ring & ring ) { - RC ret { SUCCESS }; - ret = ret ? ret : grb::set( smoother_temp, 0 ); - - // acc_temp[mask] = A[mask] * x[mask] - ret = ret ? ret : grb::mxv< grb::descriptors::safe_overlap >( smoother_temp, color_mask, A, x, ring ); - assert( ret == SUCCESS ); - - // TODO internal issue #201 - // Replace below with masked calls: - // x[mask] = r[mask] - smoother_temp[mask] + x[mask] .* diagonal[mask] - // x[mask] = x[maks] ./ diagonal[mask] - ret = ret ? ret : - grb::eWiseLambda( - [ &x, &r, &smoother_temp, &color_mask, &A_diagonal ]( const size_t i ) { - // if the mask was properly initialized, the check on the mask value is unnecessary; - // nonetheless, it is left not to violate the semantics of RBGS in case also the false values - // had been initialized (in which case the check is fundamental); if only true values were initialized, - // we expect CPU branch prediction to neutralize the branch cost - if( color_mask[ i ] ) { - IOType d = A_diagonal[ i ]; - IOType v = r[ i ] - smoother_temp[ i ] + x[ i ] * d; - x[ i ] = v / d; - } - }, - color_mask, x, r, smoother_temp, A_diagonal ); - assert( ret == SUCCESS ); - return ret; - } - - /** - * @brief Runs a single forward and backward pass of Red-Black Gauss-Seidel smoothing on the system stored in \p data. - * - * This routine performs a forward and a backward step of Red-Black Gauss-Seidel for each color stored in \p data.color_masks. - * Color stored inside this container are assumed to be mutually exclusive and to cover all rows of the solution vector<\b>, - * and no check is performed to ensure these assumptions hold. Hence, it is up to user logic to generate and pass correct - * coloring information. Otherwise, \b no guarantees hold on the result. - * - * @tparam IOType type of result and intermediate vectors used during computation - * @tparam NonzeroType type of matrix values - * @tparam Ring the ring of algebraic operators zero-values - * - * @param data \ref system_data data structure with relevant inpus and outputs: system matrix, initial solution, - * residual, system matrix colors, temporary vectors - * @param[in] ring the ring to perform the operations on - * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first - * unsuccessful operation otherwise - */ - template< typename IOType, typename NonzeroType, class Ring > - grb::RC red_black_gauss_seidel( system_data< IOType, NonzeroType > & data, const Ring & ring ) { - RC ret { SUCCESS }; - // forward step - std::vector< grb::Vector< bool > >::const_iterator end { data.color_masks.cend() }; - for( std::vector< grb::Vector< bool > >::const_iterator it { data.color_masks.cbegin() }; it != end && ret == SUCCESS; ++it ) { - ret = ret ? ret : __rbgs_single_step( data.A, data.A_diagonal, data.r, data.z, data.smoother_temp, *it, ring ); - } - // backward step - std::vector< grb::Vector< bool > >::const_reverse_iterator rend { data.color_masks.crend() }; - for( std::vector< grb::Vector< bool > >::const_reverse_iterator rit { data.color_masks.crbegin() }; rit != rend && ret == SUCCESS; ++rit ) { - ret = ret ? ret : __rbgs_single_step( data.A, data.A_diagonal, data.r, data.z, data.smoother_temp, *rit, ring ); - } - return ret; - } - - } // namespace internal - } // namespace algorithms -} // namespace grb - -#endif // H_GRB_ALGORITHMS_RED_BLACK_GAUSS_SEIDEL diff --git a/include/graphblas/algorithms/hpcg/single_point_coarsener.hpp b/include/graphblas/algorithms/hpcg/single_point_coarsener.hpp new file mode 100644 index 000000000..e412a630c --- /dev/null +++ b/include/graphblas/algorithms/hpcg/single_point_coarsener.hpp @@ -0,0 +1,324 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file single_point_coarsener.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Utilities to build the coarsening matrix for an HPCG simulation. + */ + +#ifndef _H_GRB_ALGORITHMS_HPCG_SINGLE_POINT_COARSENER +#define _H_GRB_ALGORITHMS_HPCG_SINGLE_POINT_COARSENER + +#include +#include +#include +#include +#include + +#include +#include + +namespace grb { + namespace algorithms { + + // forward declaration + template< + size_t DIMS, + typename CoordType, + typename ValueType + > class SinglePointCoarsenerBuilder; + + /** + * Iterator class to generate the coarsening matrix for an HPCG simulation. + * + * The coarsening matrix samples a single value from the finer space for every element + * of the coarser space; this value is the first one (i.e. the one with smallest coordinates) + * in the finer sub-space corresponding to each coarser element. + * + * This coarsening method is simple but can lead to unstable results, especially with certain combinations + * of smoothers and partitioning methods. + * + * This iterator is random-access. + * + * @tparam DIMS number of dimensions + * @tparam CoordType type storing the coordinates and the sizes + * @tparam ValueType type of the nonzero: it must be able to represent 1 (the value to sample + * the finer value) + */ + template< + size_t DIMS, + typename CoordType, + typename ValueType + > struct SinglePointCoarsenerIterator { + + friend SinglePointCoarsenerBuilder< DIMS, CoordType, ValueType >; + + using RowIndexType = CoordType; ///< numeric type of rows + using ColumnIndexType = CoordType; + using LinearSystemType = grb::utils::multigrid::LinearizedNDimSystem< CoordType, grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > >; + using LinearSystemIterType = typename LinearSystemType::Iterator; + using SelfType = SinglePointCoarsenerIterator< DIMS, CoordType, ValueType >; + using ArrayType = std::array< CoordType, DIMS >; + + struct _HPCGValueGenerator { + + friend SelfType; + + _HPCGValueGenerator( + RowIndexType i, + ColumnIndexType j + ) noexcept : + _i( i ), + _j( j ) {} + + _HPCGValueGenerator( const _HPCGValueGenerator & ) = default; + + _HPCGValueGenerator & operator=( const _HPCGValueGenerator & ) = default; + + inline RowIndexType i() const { + return _i; + } + inline ColumnIndexType j() const { + return _j; + } + inline ValueType v() const { + return static_cast< ValueType >( 1 ); + } + + private: + RowIndexType _i; + ColumnIndexType _j; + }; + + // interface for std::random_access_iterator + using iterator_category = std::random_access_iterator_tag; + using value_type = _HPCGValueGenerator; + using pointer = const value_type; + using reference = const value_type &; + using difference_type = typename LinearSystemIterType::difference_type; + + SinglePointCoarsenerIterator( const SelfType & o ) = default; + + SinglePointCoarsenerIterator( SelfType && o ) = default; + + SelfType & operator=( const SelfType & ) = default; + + SelfType & operator=( SelfType && ) = default; + + /** + * Advances \c this by 1 in constant time. + */ + SelfType & operator++() noexcept { + (void)++_sys_iter; + update_coords(); + return *this; + } + + /** + * Advances \c this by \p offset in constant time. + */ + SelfType & operator+=( size_t offset ) { + _sys_iter += offset; + update_coords(); + return *this; + } + + /** + * Computes the difference between \c this and \p o as integer. + */ + difference_type operator-( const SelfType & o ) const { + return this->_sys_iter - o._sys_iter; + } + + /** + * Returns whether \c this and \p o differ. + */ + bool operator!=( const SelfType & o ) const { + return this->_sys_iter != o._sys_iter; + } + + /** + * Returns whether \c this and \p o are equal. + */ + bool operator==( const SelfType & o ) const { + return ! this->operator!=( o ); + } + + reference operator*() const { + return _val; + } + + pointer operator->() const { + return &_val; + } + + /** + * Returns the current row, within the coarser system. + */ + inline RowIndexType i() const { + return _val.i(); + } + + /** + * Returns the current column, within the finer system. + */ + inline ColumnIndexType j() const { + return _val.j(); + } + + /** + * Returns always 1, as the coarsening keeps the same value. + */ + inline ValueType v() const { + return _val.v(); + } + + private: + const LinearSystemType * _lin_sys; + const ArrayType * _steps; + LinearSystemIterType _sys_iter; + value_type _val; + + /** + * Construct a new SinglePointCoarsenerIterator object starting from the LinearizedNDimSystem + * object \p system describing the \b coarser system and the \b ratios \p steps between each finer and + * the corresponding corser dimension. + * + * @param system LinearizedNDimSystem object describing the coarser system + * @param steps ratios per dimension between finer and coarser system + */ + SinglePointCoarsenerIterator( + const LinearSystemType & system, + const ArrayType & steps + ) noexcept : + _lin_sys( &system ), + _steps( &steps ), + _sys_iter( _lin_sys->begin() ), + _val( 0, 0 ) + { + update_coords(); + } + + void update_coords() noexcept { + _val._i = _sys_iter->get_linear_position(); + _val._j = coarse_rows_to_finer_col(); + } + + /** + * Returns the row coordinates converted to the finer system, to compute + * the column value. + */ + ColumnIndexType coarse_rows_to_finer_col() const noexcept { + ColumnIndexType finer = 0; + ColumnIndexType s = 1; + for( size_t i = 0; i < DIMS; i++ ) { + s *= ( *_steps )[ i ]; + finer += s * _sys_iter->get_position()[ i ]; + s *= _lin_sys->get_sizes()[ i ]; + } + return finer; + } + }; + + /** + * Builder object to create iterators that generate a coarsening matrix. + * + * It is a facility to generate beginning and end iterators and abstract the logic away from users. + * + * @tparam DIMS number of dimensions + * @tparam CoordType type storing the coordinates and the sizes + * @tparam ValueType type of the nonzero: it must be able to represent 1 (the value to sample + * the finer value) + */ + template< + size_t DIMS, + typename CoordType, + typename ValueType + > class SinglePointCoarsenerBuilder { + public: + using ArrayType = std::array< CoordType, DIMS >; + using Iterator = SinglePointCoarsenerIterator< DIMS, CoordType, ValueType >; + using SelfType = SinglePointCoarsenerBuilder< DIMS, CoordType, ValueType >; + + /** + * Construct a new SinglePointCoarsenerBuilder object from the sizes of finer system + * and those of the coarser system; finer sizes must be an exact multiple of coarser sizes, + * otherwise an exception is raised. + */ + SinglePointCoarsenerBuilder( + const ArrayType & _finer_sizes, + const ArrayType & _coarser_sizes + ) : + system( _coarser_sizes.begin(), + _coarser_sizes.end() ) + { + for( size_t i = 0; i < DIMS; i++ ) { + // finer size MUST be an exact multiple of coarser_size + std::ldiv_t ratio = std::ldiv( _finer_sizes[ i ], _coarser_sizes[ i ] ); + if( ratio.quot < 2 || ratio.rem != 0 ) { + throw std::invalid_argument( std::string( "finer size of dimension " ) + std::to_string( i ) + std::string( "is not an exact multiple of coarser size" ) ); + } + steps[ i ] = ratio.quot; + } + } + + SinglePointCoarsenerBuilder( const SelfType & ) = delete; + + SinglePointCoarsenerBuilder( SelfType && ) = delete; + + SelfType & operator=( const SelfType & ) = delete; + + SelfType & operator=( SelfType && ) = delete; + + /** + * Returns the size of the finer system, i.e. its number of elements. + */ + size_t system_size() const { + return system.system_size(); + } + + /** + * Produces a beginning iterator to generate the coarsening matrix. + */ + Iterator make_begin_iterator() { + return Iterator( system, steps ); + } + + /** + * Produces an end iterator to stop the generation of the coarsening matrix. + */ + Iterator make_end_iterator() { + Iterator result( system, steps ); + result += system_size(); // do not trigger boundary checks + return result; + } + + private: + const grb::utils::multigrid::LinearizedNDimSystem< CoordType, + grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > > system; + + /// + /// array of steps, i.e. how much each column coordinate (finer system) must be + /// incremented when incrementing the row coordinates; it is the ratio between + /// #finer_sizes and row_generator#physical_sizes + ArrayType steps; + }; + + } // namespace algorithms +} // namespace grb +#endif // _H_GRB_ALGORITHMS_HPCG_SINGLE_POINT_COARSENER diff --git a/include/graphblas/algorithms/hpcg/system_builder.hpp b/include/graphblas/algorithms/hpcg/system_builder.hpp new file mode 100644 index 000000000..84600414c --- /dev/null +++ b/include/graphblas/algorithms/hpcg/system_builder.hpp @@ -0,0 +1,181 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @dir include/graphblas/algorithms/hpcg + * This folder contains the code specific to the HPCG benchmark implementation: generation of the physical system, + * generation of the single point coarsener and coloring algorithm. + */ + +/** + * @file system_builders.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Utilities to build the system matrix for an HPCG simulation in a generic number of dimensions. + */ + +#ifndef _H_GRB_ALGORITHMS_HPCG_SYSTEM_BUILDER +#define _H_GRB_ALGORITHMS_HPCG_SYSTEM_BUILDER + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace grb { + namespace algorithms { + + /** + * Builder class to build the iterators that generate an HPCG system matrix, describing a + * \p DIMS -dimensional simulation mesh for Fourier-like heat propagation. + * + * @tparam DIMS dimensions of the mesh + * @tparam CoordType type storing the coordinates and sizes of the matrix + * @tparam ValueType nonzero type + */ + template< + size_t DIMS, + typename CoordType, + typename ValueType + > class HPCGSystemBuilder { + public: + struct HPCGDiagGenerator { + + HPCGDiagGenerator( + ValueType diag, + ValueType non_diag + ) noexcept : + _diag( diag ), + _non_diag( non_diag ) {} + + HPCGDiagGenerator & operator=( const HPCGDiagGenerator & ) = default; + + inline ValueType operator()( + const CoordType & i, + const CoordType & j + ) const noexcept { + return j == i ? _diag : _non_diag; + } + + ValueType _diag; + ValueType _non_diag; + }; + + using HaloSystemType = grb::utils::multigrid::LinearizedHaloNDimSystem< DIMS, CoordType >; + using Iterator = grb::utils::multigrid::HaloMatrixGeneratorIterator< DIMS, + CoordType, ValueType, HPCGDiagGenerator >; + + /** + * Construct a new HPCGSystemBuilder object from the data of the physical system. + * + * @param sizes sizes along each dimension + * @param halo halo size + * @param diag value along the diagonal, for self-interactions + * @param non_diag value outside the diagonal, for element-element interaction + */ + HPCGSystemBuilder( + const std::array< CoordType, DIMS > & sizes, + CoordType halo, + ValueType diag, + ValueType non_diag + ) : + _system( sizes, halo ), + _diag_generator( diag, non_diag ) + { + if( halo <= 0 ) { + throw std::invalid_argument( "halo should be higher than 0" ); + } + for( const auto i : sizes ) { + if( i < halo + 1 ) { + throw std::invalid_argument( "Iteration halo goes beyond system sizes" ); + } + } + } + + HPCGSystemBuilder( const HPCGSystemBuilder< DIMS, CoordType, ValueType > & ) = default; + + HPCGSystemBuilder( HPCGSystemBuilder< DIMS, CoordType, ValueType > && ) = default; + + HPCGSystemBuilder< DIMS, CoordType, ValueType > & operator=( + const HPCGSystemBuilder< DIMS, CoordType, ValueType > & ) = default; + + HPCGSystemBuilder< DIMS, CoordType, ValueType > & operator=( + HPCGSystemBuilder< DIMS, CoordType, ValueType > && ) = default; + + /** + * Number of elements of the mesh. + */ + size_t system_size() const { + return _system.base_system_size(); + } + + /** + * Total number of neighbors for all elements of the mesh. + */ + size_t num_neighbors() const { + return _system.halo_system_size(); + } + + /** + * Get the generator object, i.e. the HaloSystemType object that describes the geometry + * of the simulation mesh. + */ + const HaloSystemType & get_generator() const { + return _system; + } + + /** + * Builds the beginning iterator to generate the system matrix. + */ + Iterator make_begin_iterator() const { + return Iterator( _system, _diag_generator ); + } + + /** + * Builds the end iterator to generate the system matrix. + */ + Iterator make_end_iterator() const { + Iterator result( _system, _diag_generator ); + result += num_neighbors(); + return result; + } + + ValueType get_diag_value() const { + return _diag_generator._diag; + } + + ValueType get_non_diag_value() const { + return _diag_generator._non_diag; + } + + private: + HaloSystemType _system; + HPCGDiagGenerator _diag_generator; + }; + + } // namespace algorithms +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_HPCG_SYSTEM_BUILDER diff --git a/include/graphblas/algorithms/hpcg/system_building_utils.hpp b/include/graphblas/algorithms/hpcg/system_building_utils.hpp index 11adf82c1..37e6da311 100644 --- a/include/graphblas/algorithms/hpcg/system_building_utils.hpp +++ b/include/graphblas/algorithms/hpcg/system_building_utils.hpp @@ -1,6 +1,6 @@ /* - * Copyright 2021 Huawei Technologies Co., Ltd. + * Copyright 2022 Huawei Technologies Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,140 +16,430 @@ */ /** - * @file hpcg_system_building_utils.hpp + * @file system_building_utils.hpp * @author Alberto Scolari (alberto.scolari@huawei.com) - * @brief Utilities to build an antire system for HPCG simulations in an arbitrary number of dimensions. - * @date 2021-04-30 + * Utilities to build an antire system for HPCG simulations in an arbitrary number of dimensions. */ -#ifndef _H_GRB_ALGORITHMS_SYSTEM_BUILDING_UTILS -#define _H_GRB_ALGORITHMS_SYSTEM_BUILDING_UTILS +#ifndef _H_GRB_ALGORITHMS_HPCG_SYSTEM_BUILDING_UTILS +#define _H_GRB_ALGORITHMS_HPCG_SYSTEM_BUILDING_UTILS +#include #include #include +#include #include +#include #include +#include +#include #include +#include +#include +#include -#include "hpcg_data.hpp" -#include "matrix_building_utils.hpp" - +#include "average_coarsener.hpp" +#include "greedy_coloring.hpp" +#include "single_point_coarsener.hpp" +#include "system_builder.hpp" namespace grb { namespace algorithms { /** - * @brief Divide each value of \p source by \p step and store the result into \p destination. + * Container of the parameter for HPCG simulation generation: physical system characteristics and + * coarsening information. + * + * @tparam DIMS dimensions of the physical system + * @tparam T type of matrix values + */ + template< + size_t DIMS, + typename NonzeroType + > struct HPCGSystemParams { + std::array< size_t, DIMS > physical_sys_sizes; + size_t halo_size; + NonzeroType diag_value; + NonzeroType non_diag_value; + size_t min_phys_size; + size_t max_levels; + size_t coarsening_step; + }; + + /** + * Builds all required system generators for an entire multi-grid simulation; each generator + * corresponds to a level of the HPCG system multi-grid, with increasingly coarser sizes, and can + * generate the system matrix of that level. All required pieces of information required to build + * the levels is stored in \p params. * - * @tparam DIMS size of passed arrays + * @tparam DIMS number of dimensions + * @tparam CoordType type storing the coordinates and the sizes + * @tparam NonzeroType type of the nonzero + * @param[in] params structure with the parameters to build an entire HPCG simulation + * @param[out] mg_generators std::vector of HPCGSystemBuilder, one per layer of the multi-grid */ - template< std::size_t DIMS > - void divide_array( std::array< std::size_t, DIMS > & destination, const std::array< std::size_t, DIMS > & source, std::size_t step ) { - for( std::size_t i { 0 }; i < destination.size(); i++ ) { - destination[ i ] = source[ i ] / step; + template< + size_t DIMS, + typename CoordType, + typename NonzeroType + > void hpcg_build_multigrid_generators( + const HPCGSystemParams< DIMS, NonzeroType > & params, + std::vector< grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > > & mg_generators + ) { + static_assert( DIMS > 0, "DIMS must be > 0" ); + + size_t const current_size = std::accumulate( params.physical_sys_sizes.cbegin(), + params.physical_sys_sizes.cend(), 1UL, std::multiplies< size_t >() ); + if( current_size > std::numeric_limits< CoordType >::max() ) { + throw std::domain_error( "CoordType cannot store the matrix coordinates" ); + } + size_t min_physical_size = *std::min_element( params.physical_sys_sizes.cbegin(), + params.physical_sys_sizes.cend() ); + if( min_physical_size < params.min_phys_size ) { + throw std::domain_error( "the initial system is too small" ); + } + + std::array< CoordType, DIMS > coord_sizes; + // type-translate coordinates + std::copy( params.physical_sys_sizes.cbegin(), params.physical_sys_sizes.cend(), + coord_sizes.begin() ); + + // generate hierarchical coarseners + for( size_t coarsening_level = 0UL; min_physical_size >= params.min_phys_size + && coarsening_level <= params.max_levels; coarsening_level++ ) { + + // build generator + mg_generators.emplace_back( coord_sizes, params.halo_size, params.diag_value, + params.non_diag_value ); + + // prepare for new iteration + min_physical_size /= params.coarsening_step; + std::for_each( coord_sizes.begin(), coord_sizes.end(), [ ¶ms ]( CoordType & v ) { + std::ldiv_t ratio = std::ldiv( v, params.coarsening_step ); + if( ratio.rem != 0 ) { + throw std::invalid_argument( std::string( "system size " ) + std::to_string( v ) + + std::string( " is not divisible by " ) + std::to_string( params.coarsening_step ) ); + } + v = ratio.quot; + } ); } } /** - * @brief Container of the parameter for HPCG simulation generation: physical system characteristics and - * coarsening information. + * Populates the system matrix \p M out of the builder \p system_generator. * - * @tparam DIMS dimensions of the physical system - * @tparam T type of matrix values + * The matrix \p M must have been previously allocated and initialized with the proper sizes, + * as this procedure only populates it with the nozeroes generated by \p system_generator. + * + * This function takes care of the parallelism by employing random-access iterators and by + * \b parallelizing the generation across multiple processes in case of distributed execution. */ - template< std::size_t DIMS, typename T > - struct hpcg_system_params { - const std::array< std::size_t, DIMS > & physical_sys_sizes; - const std::size_t halo_size; - const std::size_t num_colors; - const T diag_value; - const T non_diag_value; - const std::size_t min_phys_size; - const std::size_t max_levels; - const std::size_t coarsening_step; - }; + template< + size_t DIMS, + typename CoordType, + typename NonzeroType, + typename Logger + > grb::RC hpcg_populate_system_matrix( + const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > & system_generator, + grb::Matrix< NonzeroType > & M, Logger & logger + ) { + logger << "- generating system matrix..."; + typename grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType >::Iterator + begin( system_generator.make_begin_iterator() ); + typename grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType >::Iterator + end( system_generator.make_end_iterator() ); + grb::utils::partition_iteration_range_on_procs( spmd<>::nprocs(), spmd<>::pid(), + system_generator.num_neighbors(), begin, end ); + return buildMatrixUnique( M, begin, end, grb::IOMode::PARALLEL ); + } /** - * @brief Generates an entire HPCG problem according to the parameters in \p params , storing it in \p holder . + * Populates the coarsening data \p coarsener (in particular the coarsening matrix) from the + * builder of the finer system \p finer_system_generator and that of the coarser system + * \p coarser_system_generator. * - * @tparam DIMS dimensions of the system - * @tparam T type of matrix values - * @param holder std::unique_ptr to store the HPCG problem into - * @param params parameters container to build the HPCG problem - * @return grb::SUCCESS if every GraphBLAS operation (to generate vectors and matrices) succeeded, - * otherwise the first unsuccessful return value + * This function takes care of parallelizing the generation by using a random-access iterator + * to generate the coarsening matrix and by distributing the generation across nodes + * of a distributed system (if any). + * @tparam IterBuilderType type of the matrix builder, either SinglePointCoarsenerBuilder + * or AverageCoarsenerBuilder + * @tparam DIMS number of dimensions + * @tparam CoordType type storing the coordinates and the sizes + * @tparam NonzeroType type of the nonzero + * + * @param finer_system_generator object generating the finer system + * @param coarser_system_generator object generating the finer system + * @param coarsener structure with the matrix to populate */ - template< std::size_t DIMS, typename T = double > - grb::RC build_hpcg_system( std::unique_ptr< grb::algorithms::hpcg_data< T, T, T > > & holder, hpcg_system_params< DIMS, T > & params ) { - // n is the system matrix size - const std::size_t n { std::accumulate( params.physical_sys_sizes.cbegin(), params.physical_sys_sizes.cend(), 1UL, std::multiplies< std::size_t >() ) }; + template< + typename IterBuilderType, + size_t DIMS, + typename CoordType, + typename IOType, + typename NonzeroType + > grb::RC hpcg_populate_coarsener_any_builder( + const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > & finer_system_generator, + const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > & coarser_system_generator, + CoarseningData< IOType, NonzeroType > & coarsener + ) { + static_assert( DIMS > 0, "DIMS must be > 0" ); - grb::algorithms::hpcg_data< T, T, T > * data { new grb::algorithms::hpcg_data< T, T, T >( n ) }; + const std::array< CoordType, DIMS > & finer_sizes = finer_system_generator.get_generator().get_sizes(); + const std::array< CoordType, DIMS > & coarser_sizes = coarser_system_generator.get_generator().get_sizes(); + const size_t finer_size = finer_system_generator.system_size(); + const size_t coarser_size = coarser_system_generator.system_size(); - assert( ! holder ); // should be empty - holder = std::unique_ptr< grb::algorithms::hpcg_data< T, T, T > >( data ); + if( coarser_size >= finer_size ) { + throw std::invalid_argument( "wrong sizes" ); + } - // initialize the main (=uncoarsened) system matrix - grb::RC rc { grb::SUCCESS }; - rc = build_ndims_system_matrix< DIMS, T >( data->A, params.physical_sys_sizes, params.halo_size, params.diag_value, params.non_diag_value ); + size_t const rows = coarser_size; + size_t const cols = finer_size; - if( rc != grb::SUCCESS ) { - std::cerr << "Failure to generate the initial system (" << toString( rc ) << ") of size " << n << std::endl; - return rc; + assert( finer_sizes.size() == coarser_sizes.size() ); + + grb::Matrix< NonzeroType > & M = coarsener.coarsening_matrix; + if( grb::nrows( M ) != rows || grb::ncols( M ) != cols ) { + throw std::invalid_argument( "wrong matrix dimensions: matrix should be rectangular" + " with rows == and cols == " ); } - // set values of diagonal vector - set( data->A_diagonal, params.diag_value ); - - build_static_color_masks( data->color_masks, n, params.num_colors ); - - // initialize coarsening with additional pointers and dimensions copies to iterate and divide - grb::algorithms::multi_grid_data< T, T > ** coarser = &data->coarser_level; - assert( *coarser == nullptr ); - std::array< std::size_t, DIMS > coarser_sizes; - std::array< std::size_t, DIMS > previous_sizes( params.physical_sys_sizes ); - std::size_t min_physical_coarsened_size { *std::min_element( previous_sizes.cbegin(), previous_sizes.cend() ) / params.coarsening_step }; - // coarsen system sizes into coarser_sizes - divide_array( coarser_sizes, previous_sizes, params.coarsening_step ); - std::size_t coarsening_level = 0UL; - - // generate linked list of hierarchical coarseners - while( min_physical_coarsened_size >= params.min_phys_size && coarsening_level < params.max_levels ) { - assert( *coarser == nullptr ); - // compute size of finer and coarser matrices - std::size_t coarser_size { std::accumulate( coarser_sizes.cbegin(), coarser_sizes.cend(), 1UL, std::multiplies< std::size_t >() ) }; - std::size_t previous_size { std::accumulate( previous_sizes.cbegin(), previous_sizes.cend(), 1UL, std::multiplies< std::size_t >() ) }; - // build data structures for new level - grb::algorithms::multi_grid_data< double, double > * new_coarser { new grb::algorithms::multi_grid_data< double, double >( coarser_size, previous_size ) }; - // install coarser level immediately to cleanup in case of build error - *coarser = new_coarser; - // initialize coarsener matrix, system matrix and diagonal vector for the coarser level - rc = build_ndims_coarsener_matrix< DIMS >( new_coarser->coarsening_matrix, coarser_sizes, previous_sizes ); - if( rc != grb::SUCCESS ) { - std::cerr << "Failure to generate coarsening matrix (" << toString( rc ) << ")." << std::endl; - return rc; + IterBuilderType coarsener_builder( finer_sizes, coarser_sizes ); + typename IterBuilderType::Iterator begin( coarsener_builder.make_begin_iterator() ), end( coarsener_builder.make_end_iterator() ); + grb::utils::partition_iteration_range_on_procs( spmd<>::nprocs(), spmd<>::pid(), coarsener_builder.system_size(), begin, end ); + return buildMatrixUnique( M, begin, end, grb::IOMode::PARALLEL ); + } + + /** + * Populates a coarsener that samples one element every \a 2^DIMS . + */ + template< + size_t DIMS, + typename CoordType, + typename IOType, + typename NonzeroType + > grb::RC hpcg_populate_coarsener( + const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > & finer_system_generator, + const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > & coarser_system_generator, + CoarseningData< IOType, NonzeroType > & coarsener + ) { + return hpcg_populate_coarsener_any_builder< + grb::algorithms::SinglePointCoarsenerBuilder< DIMS, CoordType, NonzeroType > >( + finer_system_generator, coarser_system_generator, coarsener ); + } + + /** + * Populates a coarsener that averages over \a 2^DIMS elements. + */ + template< + size_t DIMS, + typename CoordType, + typename IOType, + typename NonzeroType + > grb::RC hpcg_populate_coarsener_avg( + const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > & finer_system_generator, + const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > & coarser_system_generator, + CoarseningData< IOType, NonzeroType > & coarsener + ) { + return hpcg_populate_coarsener_any_builder< + grb::algorithms::hpcg::AverageCoarsenerBuilder< DIMS, CoordType, NonzeroType > >( + finer_system_generator, coarser_system_generator, coarsener ); + } + + namespace internal { + + /** + * Store row values based on their color into separate vectors. + * + * @param[in] row_colors for each row (corresponding to a vector position) its color + * @param[in] num_colors number of colors, i.e. max across all values in \p row_colors + 1 + * @param[out] per_color_rows for each position \a i it stores an std::vector with all rows + * of color \a i inside \p row_colors + */ + template< typename CoordType > void hpcg_split_rows_by_color( + const std::vector< CoordType > & row_colors, + size_t num_colors, std::vector< std::vector< CoordType > > & per_color_rows + ) { + per_color_rows.resize( num_colors ); + for( CoordType i = 0; i < row_colors.size(); i++ ) { + per_color_rows[ row_colors[ i ] ].push_back( i ); } - rc = build_ndims_system_matrix< DIMS, T >( new_coarser->A, coarser_sizes, params.halo_size, params.diag_value, params.non_diag_value ); - if( rc != grb::SUCCESS ) { - std::cerr << "Failure to generate system matrix (" << toString( rc ) << ")for size " << coarser_size << std::endl; - return rc; + } + + /** + * Utility class implementing a random-access iterator that always returns a + * \c true value. + * + * It is used in the following to build mask vectors via buildVectorUnique(), where + * all the non-zero positions are \c true. + * + * @tparam CoordType type of the internal coordinate + */ + template< typename CoordType > struct true_iter { + + using self_t = true_iter< CoordType >; + using iterator_category = std::random_access_iterator_tag; + using value_type = bool; + using pointer = const bool *; + using reference = const bool &; + using difference_type = long; + + true_iter() = delete; + + true_iter( CoordType first ) : index( first ) {} + + true_iter( const self_t & ) = default; + + self_t & operator=( const self_t & ) = default; + + bool operator!=( const self_t & other ) const { + return this->index != other.index; } - set( new_coarser->A_diagonal, params.diag_value ); - // build color masks for coarser level (same masks, but with coarser system size) - rc = build_static_color_masks( new_coarser->color_masks, coarser_size, params.num_colors ); - // prepare for new iteration - coarser = &new_coarser->coarser_level; - min_physical_coarsened_size /= params.coarsening_step; - previous_sizes = coarser_sizes; - divide_array( coarser_sizes, coarser_sizes, params.coarsening_step ); - coarsening_level++; + self_t & operator++() noexcept { + (void)index++; + return *this; + } + + self_t & operator+=( size_t increment ) noexcept { + index += increment; + return *this; + } + + difference_type operator-( const self_t & other ) noexcept { + return static_cast< difference_type >( this->index - other.index ); + } + + pointer operator->() const { + return &__TRUE; + } + + reference operator*() const { + return *( this->operator->() ); + } + + private: + CoordType index; + const bool __TRUE = true; // for its address to be passed outside + }; + + /** + * Populates \p masks with static color mask generated for a squared matrix of size \p matrix_size . + * + * Colors are built in the range [0, \p colors ), with the mask for color 0 being the array + * of values true in the positions \f$ [0, colors, 2*colors, ..., floor((system_size - 1)/colors) * color] \f$, + * for color 1 in the positions \f$ [1, 1+colors, 1+2*colors, ..., floor((system_size - 2)/colors) * color] \f$, + * etc.; the mask for color 0 is in \c masks[0], for color 1 in \c masks[1] and so on. + * + * The vectors stored in \p masks (assumed empty at the beginning) are built inside the function and populated + * only with the \c true values, leading to sparse vectors. This saves on storage space and allows + * GraphBLAS routines (like \c eWiseLambda() ) to iterate only on true values. + * + * @param masks output vector of color masks + * @param matrix_size size of the system matrix + * @param colors numbers of colors masks to build; it must be < \p matrix_size + * @return grb::RC the success value returned when trying to build the vector + */ + grb::RC hpcg_build_static_color_masks( + size_t matrix_size, + const std::vector< std::vector< size_t > > & per_color_rows, + std::vector< grb::Vector< bool > > & masks + ) { + if( ! masks.empty() ) { + throw std::invalid_argument( "vector of masks is expected to be empty" ); + } + for( size_t i = 0; i < per_color_rows.size(); i++ ) { + const std::vector< size_t > & rows = per_color_rows[ i ]; +#ifdef _DEBUG + { + std::cout << "\ncolor " << i << std::endl; + for( size_t row : rows ) { + std::cout << row << " "; + } + std::cout << std::endl; + } +#endif + masks.emplace_back( matrix_size ); + grb::Vector< bool > & output_mask = masks.back(); + std::vector< size_t >::const_iterator begin = rows.cbegin(); + std::vector< size_t >::const_iterator end = rows.cend(); + // partition_iteration_range( rows.size(), begin, end ); + grb::RC rc = grb::buildVectorUnique( output_mask, begin, end, + true_iter< size_t >( 0 ), true_iter< size_t >( rows.size() ), IOMode::SEQUENTIAL ); + if( rc != SUCCESS ) { + std::cerr << "error while creating output mask for color " << i << ": " << toString( rc ) << std::endl; + return rc; + } +#ifdef _DEBUG + { + std::cout << "mask color " << i << std::endl; + size_t count = 0; + for( const auto & v : output_mask ) { + std::cout << v.first << " "; + count++; + if( count > 20 ) + break; + } + std::cout << std::endl; + } +#endif + } + return grb::SUCCESS; + } + + } // namespace internal + + /** + * Populates the smoothing information \p smoothing_info for a Red-Black Gauss-Seidel smoother + * to be used for an HPCG simulation. The information about the mesh to smooth are passed + * via \p system_generator. + * + * Steps for the smoother generation: + * + * 1. the mesh elements (the system matrix rows) are colored via a greedy algorithm, so that + * no two neighboring elements have the same color; this phase colors the \b entire system + * and cannot be parallelized, even in a distributed system, since the current coloring algorithm + * is \b not distributed + * 2. rows are split according to their color + * 3. for each color \a c the color mask with the corresponding rows is generated: + * a dedicated sparse grb::Vector signals the rows of color \a c (by marking them as \c true + * ); such a vector allows updating all rows of color \a c in \b parallel when used as a mask + * to an mxv() operation (as done during smoothing) + */ + template< + size_t DIMS, + typename CoordType, + typename NonzeroType, + typename Logger + > grb::RC hpcg_populate_smoothing_data( + const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > & system_generator, + SmootherData< NonzeroType > & smoothing_info, Logger & logger + ) { + grb::RC rc = set( smoothing_info.A_diagonal, system_generator.get_diag_value() ); + if( rc != grb::SUCCESS ) { + logger << "error: " << __LINE__ << std::endl; + return rc; + } + + logger << "- running coloring heuristics..."; + std::vector< CoordType > colors, color_counters; + hpcg_greedy_color_ndim_system( system_generator.get_generator(), colors, color_counters ); + std::vector< std::vector< CoordType > > per_color_rows; + internal::hpcg_split_rows_by_color( colors, color_counters.size(), per_color_rows ); + colors.clear(); + colors.shrink_to_fit(); + if( rc != grb::SUCCESS ) { + logger << "error: " << __LINE__ << std::endl; + return rc; } - return rc; + logger << "- found " << color_counters.size() << " colors," + << " generating color masks..."; + return internal::hpcg_build_static_color_masks( system_generator.system_size(), + per_color_rows, smoothing_info.color_masks ); } } // namespace algorithms } // namespace grb -#endif // _H_GRB_ALGORITHMS_SYSTEM_BUILDING_UTILS +#endif // _H_GRB_ALGORITHMS_HPCG_SYSTEM_BUILDING_UTILS diff --git a/include/graphblas/algorithms/multigrid/multigrid_building_utils.hpp b/include/graphblas/algorithms/multigrid/multigrid_building_utils.hpp new file mode 100644 index 000000000..9c95b50cc --- /dev/null +++ b/include/graphblas/algorithms/multigrid/multigrid_building_utils.hpp @@ -0,0 +1,101 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file multigrid_building_utils.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Utilities to allocate data for an entire multi-grid simulation. + */ + +#ifndef _H_GRB_ALGORITHMS_MULTIGRID_BUILDING_UTILS +#define _H_GRB_ALGORITHMS_MULTIGRID_BUILDING_UTILS + +#include +#include +#include + +namespace grb { + namespace algorithms { + + /** + * Allocates all the levels for an entire multi-grid simulation for the multi-grid v-cycle, + * the coarsener and the smoother. This routine just allocates and initializes the data structures, + * but does \b not populate them, which depends on the specific algorithms. + * + * Thanks to the templating, this routine is meant to be independent from the specific algorithm + * choosen for the simulation, but simply implements the logic to move from one level (finer) + * to the next one (coarser). To be used with any data structure, the constructor of each + * structure must meet a certain interface, as explained in the following. + * + * Note: structures are allocated on the heap and manged via an std::unique_ptr for efficiency + * and convenience: since they may store large data amounts, moving them via their move (copy) + * constructor (as required for the growth of an std::vector) may be costly, and forces the user + * to implement the move constructor for each type (which may be annoying). + * Furthermore, avoiding movement (copy) entirely protects against possible bugs + * in move (copy)-constructor logic (not uncommon in prototypes). + * + * @tparam MGInfoType type holding the information to run the chosen multi-grid algorithm: + * its constructor must take in input the coarsening level (0 to \p mg_sizes.size() ) + * and the size of the system matrix for that level + * @tparam CoarsenerInfoType type holding the information for the coarsener; + * its constructor must take in input the size of the finer system matrix and that of + * the coarser system matrix (in this order) + * @tparam SmootherInfoType type holding the information for the smoother; + * its constructor must take in input the size of the system matrix for that level + * @tparam TelControllerType telemetry controller type, to (de)activate time measurement at compile-time + * + * @param mg_sizes sizes of the system matrix for each level of the multi-grid + * @param system_levels system data (system matrix, residual, solution, ...) for each level + * @param coarsener_levels at position \a i of this vector, data to coarsen from level \a i + * (system size \p mg_sizes [i] ) to level \a i+1 (system size \p mg_sizes [i+1] ) + * @param smoother_levels smoother data for each level + * @param tt telemetry controller to control time tracing + */ + template< + typename MGInfoType, + typename CoarsenerInfoType, + typename SmootherInfoType, + typename TelControllerType + > void multigrid_allocate_data( + std::vector< std::unique_ptr< MGInfoType > > & system_levels, + std::vector< std::unique_ptr< CoarsenerInfoType > > & coarsener_levels, + std::vector< std::unique_ptr< SmootherInfoType > > & smoother_levels, + const std::vector< size_t > & mg_sizes, + const TelControllerType & tt + ) { + if( mg_sizes.size() == 0 ) { + throw std::invalid_argument( "at least one size should be available" ); + } + size_t finer_size = mg_sizes[ 0 ]; + system_levels.emplace_back( new MGInfoType( tt, 0, finer_size ) ); // create main system + smoother_levels.emplace_back( new SmootherInfoType( finer_size ) ); // create smoother for main + for( size_t i = 1; i < mg_sizes.size(); i++ ) { + size_t coarser_size = mg_sizes[ i ]; + if( coarser_size >= finer_size ) { + throw std::invalid_argument( "system sizes not monotonically decreasing" ); + } + coarsener_levels.emplace_back( new CoarsenerInfoType( finer_size, coarser_size ) ); + system_levels.emplace_back( new MGInfoType( tt, i, coarser_size ) ); + smoother_levels.emplace_back( new SmootherInfoType( coarser_size ) ); + finer_size = coarser_size; + } + } + + } // namespace algorithms +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_MULTIGRID_BUILDING_UTILS diff --git a/include/graphblas/algorithms/multigrid/multigrid_cg.hpp b/include/graphblas/algorithms/multigrid/multigrid_cg.hpp new file mode 100644 index 000000000..5fa1a3772 --- /dev/null +++ b/include/graphblas/algorithms/multigrid/multigrid_cg.hpp @@ -0,0 +1,343 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @dir include/graphblas/algorithms/mutligrid + * This folder contains the implementation of the algorithms for a basic multi-grid V-cycle solver: + * Conjugate Gradient with multi-grid, a basic V-cycle multi-grid implementation, a single-matrix coarsener/ + * prolonger, an implementation of a Red-Black Gauss-Seidel smoother. These algorithms can be composed + * via their specific runners, as in the example HPCG benchmark. + */ + +/** + * @file multigrid_cg.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Algorithm and runner for a Conjugate Gradient solver augmented with a multi-grid solver. + */ + +#ifndef _H_GRB_ALGORITHMS_MULTIGRID_CG +#define _H_GRB_ALGORITHMS_MULTIGRID_CG + +#include +#include + +#include +#include +#include + +#include "multigrid_data.hpp" + +namespace grb { + namespace algorithms { + + /** + * Data stucture to store the vectors specific to the Conjugate Gradient algorithm, + * including inputs, outputs and temporary vectors. + * + * Input and output vectors use the same naming scheme as for the corresponding mathematics, + * where the equation to solve is conventionally written as \f$ A x = b \f$. + * + * @tparam IOType type of values of the vectors for intermediate results + * @tparam NonzeroType type of the values stored inside the system matrix #A + * @tparam InputType type of the values of the right-hand side vector #b + */ + template< + typename IOType, + typename NonzeroType, + typename InputType + > struct MultiGridCGData { + + grb::Vector< InputType > b; ///< Right-side vector of known values. + grb::Vector< IOType > u; ///< temporary vectors (typically for CG exploration directions) + grb::Vector< IOType > p; ///< temporary vector (typically for x refinements coming from the multi-grid run) + grb::Vector< IOType > x; ///< system solution being refined over the iterations: it us up to the user + ///< to set the initial solution value to something meaningful + + /** + * Construct a new \c MultiGridCGData object by building its vectors with size \p sys_size. + */ + MultiGridCGData( size_t sys_size ) : + b( sys_size ), + u( sys_size ), + p( sys_size ), + x( sys_size ) {} + + grb::RC init_vectors( IOType zero ) { + grb::RC rc = grb::set( u, zero ); + rc = rc ? rc : grb::set( p, zero ); + return rc; + } + }; + + /** + * Structure for the output information of a CG run. + */ + template< typename ResidualType > struct CGOutInfo { + size_t iterations; ///< number of iterations performed + ResidualType norm_residual; ///< norm of the final residual + }; + + /** + * Runner object incapsulating all information to run a Conjugate Gradient solver + * with multi-grid. + * + * The multi-grid runner must be constructed separately (depending on the chosen algorithm) + * and move-transfered during construction of this runner. + * The \p MultiGridrunnerType must implement a functional interface whose input (from CG) + * is the structure with the system information for one level of the grid. + * + * @tparam MGCGTypes types container for algebraic information (IOType, NonzeroType, + * InputType, ResidualType, Ring, Minus) + * @tparam MultiGridrunnerType type for the multi-grid runner object + * @tparam descr descriptors with statically-known data for computation and containers + * @tparam DbgOutputStreamType type for the debugging stream, i.e. the stream to trace simulation + * results alongside execution; the default type #grb::utils::telemetry::OutputStreamOff disables + * all output at compile time + */ + template< + typename MGCGTypes, + typename MultiGridRunnerType, + typename TelControllerType, + Descriptor descr = descriptors::no_operation, + typename DbgOutputStreamType = grb::utils::telemetry::OutputStreamOff + > struct MultiGridCGRunner : public grb::utils::telemetry::Timeable< TelControllerType > { + + // algebraic types + using IOType = typename MGCGTypes::IOType; + using NonzeroType = typename MGCGTypes::NonzeroType; + using InputType = typename MGCGTypes::InputType; + using ResidualType = typename MGCGTypes::ResidualType; + using Ring = typename MGCGTypes::Ring; + using Minus = typename MGCGTypes::Minus; + // input types for simulation (CG and MG) + using HPCGInputType = MultiGridCGData< IOType, NonzeroType, InputType >; + using MGRunnerType = MultiGridRunnerType; + + static_assert( std::is_default_constructible< Ring >::value, + "cannot construct the Ring with default values" ); + static_assert( std::is_default_constructible< Minus >::value, + "cannot construct the Minus operator with default values" ); + static_assert( std::is_move_constructible< MultiGridRunnerType >::value, + "cannot construct the Multi-Grid runner by move" ); + + Ring ring; ///< algebraic ring to be used + Minus minus; ///< minus operator to be used + bool with_preconditioning = true; ///< whether preconditioning is enabled + size_t max_iterations = 10; ///< max number of allowed iterations for CG: + ///< after that, the solver is halted and the result achieved so far returned + ResidualType tolerance = ring.template getZero< ResidualType >(); ///< ratio + ///< between initial residual and current residual that halts the solver + ///< if reached, for the solution is to be considered "good enough" + MultiGridRunnerType & mg_runner; ///< runner object for MG + DbgOutputStreamType dbg_logger; ///< logger to trace execution + + /** + * Construct a new MultiGridCGRunner object with the required MG runner. + * + * The debug logger is unavailable. + */ + MultiGridCGRunner( + const TelControllerType & tt, + MultiGridRunnerType & _mg_runner + ) : + grb::utils::telemetry::Timeable< TelControllerType >( tt ), + mg_runner( _mg_runner ), + dbg_logger() + { + static_assert( std::is_default_constructible< DbgOutputStreamType >::value ); + } + + /** + * Construct a new MultiGridCGRunner object with the required MG runner and + * the user-given debug logger. + */ + MultiGridCGRunner( + const TelControllerType & tt, + MultiGridRunnerType & _mg_runner, + DbgOutputStreamType & _dbg_logger + ) : + grb::utils::telemetry::Timeable< TelControllerType >( tt ), + mg_runner( _mg_runner ), + dbg_logger( _dbg_logger ) {} + + /** + * Functional operator to invoke a full CG-MG computation. + * + * @param grid_base base level of the grid + * @param cg_data data for CG + * @param out_info output information from CG + * @return grb::RC indicating the success or the error occurred + */ + inline grb::RC operator()( + typename MultiGridRunnerType::MultiGridInputType & grid_base, + MultiGridCGData< IOType, NonzeroType, InputType > & cg_data, + CGOutInfo< ResidualType > & out_info + ) { + this->start(); + grb::RC ret = multigrid_conjugate_gradient( cg_data, grid_base, out_info ); + this->stop(); + return ret; + } + + /** + * Conjugate Gradient algorithm implementation augmented by a Multi-Grid solver, + * inspired to the High Performance Conjugate Gradient benchmark. + * + * This CG solver calls the MG solver at the beginning of each iteration to improve + * the initial solution via the residual (thanks to the smoother) and then proceeds with + * the standard CG iteration. + * + * Failures of GraphBLAS operations are handled by immediately stopping the execution and by returning + * the failure code. + * + * @param cg_data data for the CG solver only + * @param grid_base base (i.e., finer) level of the multi-grid, with the information of the physical system + * @param out_info solver output information + * @return grb::RC SUCCESS in case of succesful run + */ + grb::RC multigrid_conjugate_gradient( + HPCGInputType & cg_data, + typename MultiGridRunnerType::MultiGridInputType & grid_base, + CGOutInfo< ResidualType > & out_info + ) { + const grb::Matrix< NonzeroType > & A = grid_base.A; // system matrix + grb::Vector< IOType > & r = grid_base.r; // residual vector + grb::Vector< IOType > & z = grid_base.z; // pre-conditioned residual vector + grb::Vector< IOType > & x = cg_data.x; // initial (and final) solution + const grb::Vector< InputType > & b = cg_data.b; // right-side value + grb::Vector< IOType > & p = cg_data.p; // direction vector + grb::Vector< IOType > & Ap = cg_data.u; // temp vector + grb::RC ret = SUCCESS; + + const IOType io_zero = ring.template getZero< IOType >(); + ret = ret ? ret : grb::set( Ap, io_zero ); + ret = ret ? ret : grb::set( r, io_zero ); + ret = ret ? ret : grb::set( p, io_zero ); + + ret = ret ? ret : grb::set( p, x ); + // Ap = A * x + ret = ret ? ret : grb::mxv< descr >( Ap, A, x, ring ); + assert( ret == SUCCESS ); + // r = b - Ap + ret = ret ? ret : grb::eWiseApply< descr >( r, b, Ap, minus ); + assert( ret == SUCCESS ); + + const ResidualType residual_zero = ring.template getZero< ResidualType >(); + ResidualType norm_residual = residual_zero; + // norm_residual = r' * r + ret = ret ? ret : grb::dot< descr >( norm_residual, r, r, ring ); + assert( ret == SUCCESS ); + + // compute sqrt to avoid underflow + norm_residual = std::sqrt( norm_residual ); + + // initial norm of residual + out_info.norm_residual = norm_residual; + const ResidualType norm_residual_initial = norm_residual; + ResidualType old_r_dot_z = residual_zero, r_dot_z = residual_zero, beta = residual_zero; + size_t iter = 0; + + dbg_logger << ">>> start p: " << p << std::endl; + dbg_logger << ">>> start Ap: " << Ap << std::endl; + dbg_logger << ">>> start r: " << r << std::endl; + + do { + dbg_logger << "========= iteration " << iter << " =========" << std::endl; + + if( with_preconditioning ) { + ret = ret ? ret : mg_runner( grid_base ); + assert( ret == SUCCESS ); + } else { + // z = r + ret = ret ? ret : grb::set( z, r ); + assert( ret == SUCCESS ); + } + dbg_logger << ">>> initial z: " << z << std::endl; + + if( iter == 0 ) { + // p = z + ret = ret ? ret : grb::set< descr >( p, z ); + assert( ret == SUCCESS ); + // r_dot_z = r' * z + ret = ret ? ret : grb::dot< descr >( r_dot_z, r, z, ring ); + assert( ret == SUCCESS ); + } else { + old_r_dot_z = r_dot_z; + // r_dot_z = r' * z + r_dot_z = ring.template getZero< ResidualType >(); + ret = ret ? ret : grb::dot< descr >( r_dot_z, r, z, ring ); + assert( ret == SUCCESS ); + + beta = r_dot_z / old_r_dot_z; + // Ap = 0 + ret = ret ? ret : grb::set< descr >( Ap, io_zero ); + assert( ret == SUCCESS ); + // Ap += beta * p + ret = ret ? ret : grb::eWiseMul< descr >( Ap, beta, p, ring ); + assert( ret == SUCCESS ); + // Ap = Ap + z + ret = ret ? ret : grb::eWiseApply< descr >( Ap, Ap, z, ring.getAdditiveOperator() ); + assert( ret == SUCCESS ); + // p = Ap + std::swap( Ap, p ); + assert( ret == SUCCESS ); + } + dbg_logger << ">>> middle p: " << p << std::endl; + + // Ap = A * p + ret = ret ? ret : grb::set< descr >( Ap, io_zero ); + ret = ret ? ret : grb::mxv< descr >( Ap, A, p, ring ); + assert( ret == SUCCESS ); + dbg_logger << ">>> middle Ap: " << Ap << std::endl; + + // pAp = p' * Ap + ResidualType pAp = ring.template getZero< ResidualType >(); + ret = ret ? ret : grb::dot< descr >( pAp, Ap, p, ring ); + assert( ret == SUCCESS ); + + ResidualType alpha = r_dot_z / pAp; + // x += alpha * p + ret = ret ? ret : grb::eWiseMul< descr >( x, alpha, p, ring ); + assert( ret == SUCCESS ); + dbg_logger << ">>> end x: " << x << std::endl; + + // r += - alpha * Ap + ret = ret ? ret : grb::eWiseMul< descr >( r, -alpha, Ap, ring ); + assert( ret == SUCCESS ); + dbg_logger << ">>> end r: " << r << std::endl; + + // residual = r' * r + norm_residual = ring.template getZero< ResidualType >(); + ret = ret ? ret : grb::dot< descr >( norm_residual, r, r, ring ); + assert( ret == SUCCESS ); + + norm_residual = std::sqrt( norm_residual ); + + ++iter; + out_info.iterations = iter; + out_info.norm_residual = norm_residual; + } while( iter < max_iterations && norm_residual / norm_residual_initial > tolerance + && ret == SUCCESS ); + + return ret; + } + }; + + } // namespace algorithms +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_MULTIGRID_CG diff --git a/include/graphblas/algorithms/multigrid/multigrid_data.hpp b/include/graphblas/algorithms/multigrid/multigrid_data.hpp new file mode 100644 index 000000000..a0a76191e --- /dev/null +++ b/include/graphblas/algorithms/multigrid/multigrid_data.hpp @@ -0,0 +1,101 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file multigrid_data.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Data structure definition to store the information of a single multi-grid level. + */ + +#ifndef _H_GRB_ALGORITHMS_HPCG_DATA +#define _H_GRB_ALGORITHMS_HPCG_DATA + +#include +#include + +#include +#include + +namespace grb { + + namespace algorithms { + + /** + * This data structure stores information for a \b single multi-grid level. This information + * dependes exclusively on the size of the underlying physical system. + + * + * Internal ALP/GraphBLAS containers are initialized to the proper size, + * but their values are \b not initialized as this depends on the specific algorithm chosen + * for the multi-grid solver. Populating them is user's task. + * + * @tparam IOType Type of values of the vectors for intermediate results + * @tparam NonzeroType Type of the values stored inside the system matrix \p A + * and the coarsening matrix #Ax_finer + * @tparam TelControllerType type of the controller for telemetry, to compile-time (de)activate + * the (mg_sm)_stopwatches + */ + template< + typename IOType, + typename NonzeroType, + typename TelControllerType + > struct MultiGridData { + + grb::utils::telemetry::Stopwatch< TelControllerType > mg_stopwatch; ///< stopwatch + ///< to measure the execution time in MG + grb::utils::telemetry::Stopwatch< TelControllerType > sm_stopwatch; ///< stopwatch + ///< to measure the execution time in the smoother + const size_t level; ///< level of the grid (0 for the finest physical system) + const size_t system_size; ///< size of the system, i.e. side of the #A system matrix + grb::Matrix< NonzeroType > A; ///< system matrix + grb::Vector< IOType > z; ///< multi-grid solution + grb::Vector< IOType > r; ///< residual + + /** + * Construct a new multigrid data object from level information and system size. + */ + MultiGridData( + const TelControllerType & _tt, + size_t _level, + size_t sys_size + ) : + mg_stopwatch( _tt ), + sm_stopwatch( _tt ), + level( _level ), + system_size( sys_size ), + A( sys_size, sys_size ), + z( sys_size ), + r( sys_size ) {} + + // for safety, disable copy semantics + MultiGridData( const MultiGridData< IOType, NonzeroType, TelControllerType > & o ) = delete; + + MultiGridData< IOType, NonzeroType, TelControllerType > & operator=( + const MultiGridData< IOType, NonzeroType, TelControllerType > & ) = delete; + + grb::RC init_vectors( IOType zero ) { + grb::RC rc = grb::set( z, zero ); + rc = rc ? rc : grb::set( r, zero ); + return rc; + } + }; + + } // namespace algorithms + +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_HPCG_DATA diff --git a/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp b/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp new file mode 100644 index 000000000..bd9a393a4 --- /dev/null +++ b/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp @@ -0,0 +1,240 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file multigrid_v_cycle.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * This file contains the routines for multi-grid solution refinement, including the main routine + * and those for coarsening and refinement of the tentative solution. + */ + +#ifndef _H_GRB_ALGORITHMS_MULTIGRID_V_CYCLE +#define _H_GRB_ALGORITHMS_MULTIGRID_V_CYCLE + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "multigrid_data.hpp" + + +namespace grb { + namespace algorithms { + + /** + * Callable object to invoke the V-cycle multi-grid algorithm, which also requires + * a smoother and a coarsener object. + * + * It is built by transferring into it the state of both the smoother and the coarsener, + * in order to avoid use-after-free issues. + * + * @tparam MGTypes types container for algebraic information (IOType, NonzeroType, Ring, Minus) + * @tparam MGSmootherType type of the smoother runner, with prescribed methods for the various + * smoothing steps + * @tparam CoarsenerType type of the coarsener runner, with prescribed methods for coarsening + * @tparam descr descriptors with statically-known data for computation and containers + * @tparam DbgOutputStreamType type for the debugging stream, i.e. the stream to trace simulation + * results alongside execution; the default type #grb::utils::telemetry::OutputStreamOff disables + * all output at compile time + */ + template< + typename MGTypes, + typename MGSmootherType, + typename CoarsenerType, + typename TelControllerType, + Descriptor descr = descriptors::no_operation, + typename DbgOutputStreamType = grb::utils::telemetry::OutputStreamOff + > struct MultiGridRunner { + + using self_t = MultiGridRunner< MGTypes, MGSmootherType, CoarsenerType, TelControllerType, descr >; + // algebraic types + using IOType = typename MGTypes::IOType; + using NonzeroType = typename MGTypes::NonzeroType; + using Ring = typename MGTypes::Ring; + using Minus = typename MGTypes::Minus; + using MultiGridInputType = MultiGridData< IOType, NonzeroType, TelControllerType >; + // runners + using SmootherRunnerType = MGSmootherType; + using CoarsenerRunnerType = CoarsenerType; + + static_assert( std::is_default_constructible< Ring >::value, + "cannot construct the Ring with default values" ); + static_assert( std::is_default_constructible< Minus >::value, + "cannot construct the Minus operator with default values" ); + + // check the interface between HPCG and MG match + static_assert( std::is_base_of< typename MGSmootherType::SmootherInputType, MultiGridInputType >::value, + "input type of the Smoother kernel must match the input from Multi-Grid" ); + + MGSmootherType & smoother_runner; ///< object to run the smoother + CoarsenerType & coarsener_runner; ///< object to run the coarsener + DbgOutputStreamType dbg_logger; ///< logger to trace execution + + std::vector< std::unique_ptr< MultiGridInputType > > system_levels; ///< levels of the grid (finest first) + Ring ring; ///< algebraic ring + Minus minus; ///< minus operator + + // operator to extract the reference out of an std::unique_ptr object + struct __extractor { + MultiGridInputType * operator()( + typename std::vector< std::unique_ptr< MultiGridInputType > >::reference & ref ) { + return ref.get(); + } + + const MultiGridInputType * operator()( + typename std::vector< std::unique_ptr< MultiGridInputType > >::const_reference & ref ) const { + return ref.get(); + } + }; + + using __unique_ptr_extractor = grb::utils::IteratorValueAdaptor< + typename std::vector< std::unique_ptr< MultiGridInputType > >::iterator, __extractor >; + + /** + * Construct a new MultiGridRunner object by moving in the state of the pre-built + * smoother and coarsener. + * + * The debug logger is deactivated. + */ + MultiGridRunner( + MGSmootherType & _smoother_runner, + CoarsenerType & _coarsener_runner + ) : + smoother_runner( _smoother_runner ), + coarsener_runner( _coarsener_runner ) + { + static_assert( std::is_default_constructible< DbgOutputStreamType >::value ); + } + + /** + * Construct a new MultiGridRunner object by moving in the state of the pre-built + * smoother and coarsener and with a user-given debug logger. + */ + MultiGridRunner( + MGSmootherType & _smoother_runner, + CoarsenerType & _coarsener_runner, + DbgOutputStreamType & _dbg_logger + ) : + smoother_runner( _smoother_runner ), + coarsener_runner( _coarsener_runner ), + dbg_logger( _dbg_logger ) {} + + /** + * Operator to invoke a full multi-grid run starting from the given level. + */ + inline grb::RC operator()( MultiGridInputType & system ) { + return this->operator()( __unique_ptr_extractor( system_levels.begin() += system.level ), + __unique_ptr_extractor( system_levels.end() ) ); + } + + /** + * Operator to invoke a multi-grid run among given levels. + */ + inline grb::RC operator()( + __unique_ptr_extractor begin, + const __unique_ptr_extractor end + ) { + begin->mg_stopwatch.start(); + grb::RC ret = multi_grid( begin, end ); + begin->mg_stopwatch.stop(); + return ret; + } + + /** + * Multi-grid V cycle implementation to refine a given solution. + * + * A full multi-grid run goes through the following steps: + * + * 1. calls the pre-smoother to improve on the initial solution stored into \p mgiter_begin->z + * 2. coarsens the residual vector + * 3. recursively solves the coarser system + * 4. prolongs the coarser solution into the \p mgiter_begin->z + * 5. further smooths the solution wih a post-smoother call + * + * The algorithm moves across grid levels via the STL-like iterators \p mgiter_begin + * and \p mgiter_end and accesses the grid data via the former (using the operator \c * ): when + * \p mgiter_begin \c == \p mgiter_end , a smoothing round is invoked and the recursion halted. + * + * Failuers of GraphBLAS operations are handled by immediately stopping the execution + * and returning the failure code. + * + * @param mgiter_begin iterator pointing to the current level of the multi-grid + * @param mgiter_end end iterator, indicating the end of the recursion + * @return grb::RC if the algorithm could correctly terminate, the error code of the first + * unsuccessful operation otherwise + */ + grb::RC multi_grid( + __unique_ptr_extractor mgiter_begin, + const __unique_ptr_extractor mgiter_end + ) { + RC ret = SUCCESS; + assert( mgiter_begin != mgiter_end ); + MultiGridInputType & finer_system = *mgiter_begin; + ++mgiter_begin; + + dbg_logger << "mg BEGINNING {" << std::endl; + + // clean destination vector + ret = ret ? ret : grb::set< descr >( finer_system.z, ring.template getZero< IOType >() ); + dbg_logger << ">>> initial r: " << finer_system.r << std::endl; + + if( ! ( mgiter_begin != mgiter_end ) ) { + // compute one round of Gauss Seidel and return + ret = ret ? ret : smoother_runner.nonrecursive_smooth( finer_system ); + assert( ret == SUCCESS ); + dbg_logger << ">>> smoothed z: " << finer_system.z << std::endl; + dbg_logger << "} mg END" << std::endl; + return ret; + } + MultiGridInputType & coarser_system = *mgiter_begin; + + // pre-smoother + ret = ret ? ret : smoother_runner.pre_smooth( finer_system ); + assert( ret == SUCCESS ); + dbg_logger << ">>> pre-smoothed z: " << finer_system.z << std::endl; + + ret = ret ? ret : coarsener_runner.coarsen_residual( finer_system, coarser_system ); + assert( ret == SUCCESS ); + dbg_logger << ">>> coarse r: " << coarser_system.r << std::endl; + + ret = ret ? ret : this->operator()( mgiter_begin, mgiter_end ); + assert( ret == SUCCESS ); + + ret = ret ? ret : coarsener_runner.prolong_solution( coarser_system, finer_system ); + assert( ret == SUCCESS ); + dbg_logger << ">>> prolonged z: " << finer_system.z << std::endl; + + // post-smoother + ret = ret ? ret : smoother_runner.post_smooth( finer_system ); + assert( ret == SUCCESS ); + dbg_logger << ">>> post-smoothed z: " << finer_system.z << std::endl; + dbg_logger << "} mg END" << std::endl; + + return ret; + } + }; + + } // namespace algorithms +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_MULTIGRID_V_CYCLE diff --git a/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp b/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp new file mode 100644 index 000000000..3b558e9f1 --- /dev/null +++ b/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp @@ -0,0 +1,244 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file red_black_gauss_seidel.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Contains the routines to perform a forward-backward pass of a Red-Black Gauss-Seidel smoother. + */ + +#ifndef _H_GRB_ALGORITHMS_RED_BLACK_GAUSS_SEIDEL +#define _H_GRB_ALGORITHMS_RED_BLACK_GAUSS_SEIDEL + +#include + +#include + +#include "multigrid_data.hpp" + +namespace grb { + namespace algorithms { + + /** + * Data structures to run the RBGS smoother on a single level of the multi-grid. + */ + template< typename IOType > struct SmootherData { + + grb::Vector< IOType > A_diagonal; ///< vector with the diagonal of #A + grb::Vector< IOType > smoother_temp; ///< for smoother's intermediate results + std::vector< grb::Vector< bool > > color_masks; ///< for color masks + + /** + * Construct a new SmootherData object from the level size. + */ + SmootherData( size_t sys_size ) : + A_diagonal( sys_size ), + smoother_temp( sys_size ) {} + + // for safety, disable copy semantics + SmootherData( const SmootherData & o ) = delete; + + SmootherData & operator=( const SmootherData & ) = delete; + + grb::RC init_vectors( IOType zero ) { + return grb::set( smoother_temp, zero ); + } + }; + + /** + * Runner object for the RBGS smoother, with multiple methods for each type of smoothing step: + * pre-, post- and non-recursive, as invoked during a full run of a multi-grid V-cycle. + * + * It stores the information to smooth each level of the grid, to be initalized separately. + * + * @tparam SmootherTypes container of algebraic tyoes for the smoother (IOType, NonzeroType, Ring) + * @tparam TelControllerType telemetry controller to (de)activate time tracing within passed MultiGridData objects + * @tparam descr descriptors with statically-known data for computation and containers + */ + template< + class SmootherTypes, + typename TelControllerType, + Descriptor descr = descriptors::no_operation + > struct RedBlackGSSmootherRunner { + + // algebraic types + using IOType = typename SmootherTypes::IOType; + using NonzeroType = typename SmootherTypes::NonzeroType; + using Ring = typename SmootherTypes::Ring; + using Minus = typename SmootherTypes::Minus; + using Divide = typename SmootherTypes::Divide; + using SmootherInputType = MultiGridData< IOType, NonzeroType, TelControllerType >; ///< external + ///< input structure + using SmootherDataType = SmootherData< IOType >; ///< smoothing information + ///< and temporary variables (per MG level) + + size_t presmoother_steps = 1UL; ///< number of pre-smoother steps + size_t postsmoother_steps = 1UL; ///< number of post-smoother steps + size_t non_recursive_smooth_steps = 1UL; ///< number of smoother steps for the last grid level + std::vector< std::unique_ptr< SmootherDataType > > levels; ///< for each grid level, + ///< the smoothing data (finest first) + Ring ring; ///< the algebraic ring + Minus minus; + Divide divide; + + static_assert( std::is_default_constructible< Ring >::value, + "cannot construct the Ring operator with default values" ); + + inline grb::RC pre_smooth( SmootherInputType & data ) { + return run_smoother( data, presmoother_steps ); + } + + inline grb::RC post_smooth( SmootherInputType & data ) { + return run_smoother( data, postsmoother_steps ); + } + + inline grb::RC nonrecursive_smooth( SmootherInputType & data ) { + return run_smoother( data, non_recursive_smooth_steps ); + } + + protected: + /** + * Runs \p smoother_steps iteration of the Red-Black Gauss-Seidel smoother, + * with inputs and outputs stored inside \p data. + * + * This is an internal method called by all user-facing methods, because this specific + * smoother performs all smoothing steps the same way. + */ + grb::RC run_smoother( + SmootherInputType & data, + const size_t smoother_steps + ) { + RC ret = SUCCESS; + + SmootherDataType & smoothing_info = *( levels.at( data.level ).get() ); + + data.sm_stopwatch.start(); + for( size_t i = 0; i < smoother_steps && ret == SUCCESS; i++ ) { + ret = ret ? ret : red_black_gauss_seidel( data, smoothing_info ); + assert( ret == SUCCESS ); + } + data.sm_stopwatch.stop(); + return ret; + } + + /** + * Runs a single step of Red-Black Gauss-Seidel for a specific color. + * + * @param[in,out] data structure with external containers, corresponsign to an MG level: + * vector to smooth, system matrix, residual + * @param[in,out] smoothing_info smoothing-specific information: temporary vectors, color masks + * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first + * unsuccessful operation otherwise + */ + grb::RC red_black_gauss_seidel_single_step( + SmootherInputType & data, + SmootherDataType & smoothing_info, + size_t color + ) { + const grb::Matrix< NonzeroType > & A = data.A; + const grb::Vector< IOType > & A_diagonal = smoothing_info.A_diagonal; + const grb::Vector< IOType > & r = data.r; + grb::Vector< IOType > & z = data.z; + grb::Vector< IOType > & smoother_temp = smoothing_info.smoother_temp; + const grb::Vector< bool > & color_mask = smoothing_info.color_masks[ color ]; + + // smoother_temp[color_mask] = A[color_mask] * z[color_mask] + // use the structural descriptors, assuming ONLY the values of the current color are set + // note that if this assumption does not hold, also the following eWiseLambda() is wrong + RC ret = grb::mxv< grb::descriptors::safe_overlap | grb::descriptors::structural >( + smoother_temp, color_mask, A, z, ring ); + assert( ret == SUCCESS ); + + // TODO internal issue #201 + // Replace below with masked calls: + // z[mask] = r[mask] - smoother_temp[mask] + z[mask] .* diagonal[mask] + // z[mask] = z[maks] ./ diagonal[mask] + +// by default use foldl()'s, although eWiseLambda() might be more performing +// TODO: leave this choice for future experimentation +#if defined(RBGS_EWL) + Ring & ri = ring; + Minus & mi = minus; + Divide & di = divide; + + ret = ret ? ret : + grb::eWiseLambda( + [ &z, &r, &smoother_temp, &color_mask, &A_diagonal , + &ri, &mi, &di ]( const size_t i ) { + IOType d = A_diagonal[ i ]; + IOType v; + ri.getMultiplicativeOperator().apply( z[ i ], d, v ); + ri.getAdditiveOperator().apply( v, r[ i ], v ); + mi.apply( v, smoother_temp[ i ], v ); + di.apply( v, d, z[ i ] ); + }, + color_mask, z, r, smoother_temp, A_diagonal ); +#else + grb::foldl( z, color_mask, A_diagonal, ring.getMultiplicativeOperator() ); + grb::foldl( z, color_mask, smoother_temp, minus ); + grb::foldl( z, color_mask, r, ring.getAdditiveOperator() ); + grb::foldl( z, color_mask, A_diagonal, divide ); +#endif + assert( ret == SUCCESS ); + return ret; + } + + /** + * Runs a single forward and backward pass of Red-Black Gauss-Seidel smoothing + * on the system stored in \p data. + * + * This routine performs a forward and a backward step of Red-Black Gauss-Seidel for each color + * stored in \p data.color_masks. Colors stored inside this container + * are assumed to be mutually exclusive and to cover all rows of the solution vector<\b>, + * and no check is performed to ensure these assumptions hold. Hence, it is up to user logic + * to pass correct coloring information. Otherwise, \b no guarantees hold on the result. + * + * @param[in,out] data structure with external containers, corresponsign to an MG level: + * vector to smooth, system matrix, residual + * @param[in,out] smoothing_info smoothing-specific information: temporary vectors, color masks + * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first + * unsuccessful operation otherwise + */ + grb::RC red_black_gauss_seidel( + SmootherInputType & data, + SmootherDataType & smoothing_info + ) { + RC ret = SUCCESS; + // zero the temp output just once, assuming proper masking avoids + // interference among different colors + ret = ret ? ret : grb::set< descr >( smoothing_info.smoother_temp, + ring.template getZero< IOType >() ); + + // forward step + for( size_t color = 0; color < smoothing_info.color_masks.size(); ++color ) { + ret = red_black_gauss_seidel_single_step( data, smoothing_info, color ); + } + ret = ret ? ret : grb::set< descr >( smoothing_info.smoother_temp, + ring.template getZero< IOType >() ); + + // backward step + for( size_t color = smoothing_info.color_masks.size(); color > 0; --color ) { + ret = red_black_gauss_seidel_single_step( data, smoothing_info, color - 1 ); + } + return ret; + } + }; + + } // namespace algorithms +} // namespace grb + +#endif // H_GRB_ALGORITHMS_RED_BLACK_GAUSS_SEIDEL diff --git a/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp b/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp new file mode 100644 index 000000000..40f8163f5 --- /dev/null +++ b/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp @@ -0,0 +1,197 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file single_matrix_coarsener.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Implementation of a coarsener using the same matrix for both coarsening and prolongation. + */ + +#ifndef _H_GRB_ALGORITHMS_HPCG_SINGLE_MATRIX_COARSENER +#define _H_GRB_ALGORITHMS_HPCG_SINGLE_MATRIX_COARSENER + +#include +#include + +#include + +#include "multigrid_data.hpp" + +namespace grb { + namespace algorithms { + + /** + * Structure storing the data for the coarsener. + */ + template< + typename IOType, + typename NonzeroType + > struct CoarseningData { + + grb::Matrix< NonzeroType > coarsening_matrix; ///< matrix of size #system_size \f$ \times \f$ #finer_size + ///< to coarsen an input vector of size #finer_size into a vector of size #system_size + grb::Vector< IOType > Ax_finer; ///< finer vector for intermediate computations, of size #finer_size + + /** + * Construct a new CoarseningData object by initializing internal data structures. + * + * @param[in] _finer_size size of the finer system, i.e. size of external objects \b before coarsening + * @param[in] coarser_size size of the current system, i.e. size \b after coarsening + */ + CoarseningData( + size_t _finer_size, + size_t coarser_size + ) : + coarsening_matrix( coarser_size, _finer_size ), + Ax_finer( _finer_size ) {} + + grb::RC init_vectors( IOType zero ) { + return grb::set( Ax_finer, zero ); + } + }; + + /** + * Runner structure, holding the data to coarsen the levels of a multi-grid simulation. + * + * This coarsener just uses the same matrix to perform the coarsening (via an mxv()) + * and the prolongation, using it transposed. + */ + template< + class CoarsenerTypes, + typename TelControllerType, + Descriptor descr = descriptors::no_operation + > struct SingleMatrixCoarsener { + + // algebraic types + using IOType = typename CoarsenerTypes::IOType; + using NonzeroType = typename CoarsenerTypes::NonzeroType; + using Ring = typename CoarsenerTypes::Ring; + using Minus = typename CoarsenerTypes::Minus; + + using MultiGridInputType = MultiGridData< IOType, NonzeroType, TelControllerType >; ///< input data from MG + using CoarseningDataType = CoarseningData< IOType, NonzeroType >; ///< internal data + ///< with coarsening information + + static_assert( std::is_default_constructible< Ring >::value, + "cannot construct the Ring with default values" ); + static_assert( std::is_default_constructible< Minus >::value, + "cannot construct the Minus operator with default values" ); + + /** + * Data to coarsen each level, from finer to coarser. + */ + std::vector< std::unique_ptr< grb::algorithms::CoarseningData< IOType, NonzeroType > > > coarsener_levels; + Ring ring; + Minus minus; + + /** + * Method required by MultiGridRunner before the recursive call, to coarsen + * the residual vector of \p finer (the finer system) into the residual of + * \p coarser (the coarser system). + */ + inline grb::RC coarsen_residual( + const MultiGridInputType & finer, + MultiGridInputType & coarser + ) { + // first compute the residual + CoarseningData< IOType, NonzeroType > & coarsener = *coarsener_levels[ finer.level ]; + grb::RC ret = grb::set< descr >( coarsener.Ax_finer, ring.template getZero< IOType >() ); + ret = ret ? ret : grb::mxv< descr >( coarsener.Ax_finer, finer.A, finer.z, ring ); + + return ret ? ret : compute_coarsening( finer.r, coarser.r, coarsener ); + } + + /** + * Method required by MultiGridRunner after the recursive call, to "prolong" the coarser solution + * into the finer solution. + */ + inline grb::RC prolong_solution( + const MultiGridInputType & coarser, + MultiGridInputType & finer + ) { + return compute_prolongation( coarser.z, finer.z, *coarsener_levels[ finer.level ] ); + } + + protected: + /** + * computes the coarser residual vector \p CoarseningData.r by coarsening + * \p coarsening_data.Ax_finer - \p r_fine via \p coarsening_data.coarsening_matrix. + * + * The coarsening information are stored inside \p CoarseningData. + * + * @param[in] r_fine fine residual vector + * @param[out] r_coarse coarse residual vector, the output + * @param[in,out] coarsening_data \ref MultiGridData data structure storing the information for coarsening + * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first + * unsuccessful operation otherwise + */ + grb::RC compute_coarsening( + const grb::Vector< IOType > & r_fine, + grb::Vector< IOType > & r_coarse, + CoarseningData< IOType, NonzeroType > & coarsening_data + ) { + RC ret = SUCCESS; + ret = ret ? ret : grb::eWiseApply< descr >( coarsening_data.Ax_finer, r_fine, + coarsening_data.Ax_finer, minus ); // Ax_finer = r_fine - Ax_finer + assert( ret == SUCCESS ); + + // actual coarsening, from ncols(*coarsening_data->A) == *coarsening_data->system_size * 8 + // to *coarsening_data->system_size + ret = ret ? ret : grb::set< descr >( r_coarse, ring.template getZero< IOType >() ); + ret = ret ? ret : grb::mxv< descr >( r_coarse, coarsening_data.coarsening_matrix, + coarsening_data.Ax_finer, ring ); // r = coarsening_matrix * Ax_finer + return ret; + } + + /** + * computes the prolongation of the coarser solution \p coarsening_data.z and stores it into + * \p z_fine. + * + * For prolongation, this function uses the matrix \p coarsening_data.coarsening_matrix by transposing it. + * + * @param[out] z_coarse input solution vector, to be coarsened + * @param[out] z_fine the solution vector to store the prolonged solution into + * @param[in,out] coarsening_data information for coarsening + * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first + * unsuccessful operation otherwise + */ + grb::RC compute_prolongation( + const grb::Vector< IOType > & z_coarse, + grb::Vector< IOType > & z_fine, // fine residual + grb::algorithms::CoarseningData< IOType, NonzeroType > & coarsening_data + ) { + RC ret = SUCCESS; + // actual refining, from *coarsening_data->syztem_size == nrows(*coarsening_data->A) / 8 + // to nrows(z_fine) + ret = ret ? ret : grb::set< descr >( coarsening_data.Ax_finer, + ring.template getZero< IOType >() ); + + ret = ret ? ret : grb::mxv< descr | grb::descriptors::transpose_matrix >( + coarsening_data.Ax_finer, coarsening_data.coarsening_matrix, z_coarse, ring ); + assert( ret == SUCCESS ); + + ret = ret ? ret : grb::foldl< descr >( z_fine, coarsening_data.Ax_finer, + ring.getAdditiveMonoid() ); // z_fine += Ax_finer; + assert( ret == SUCCESS ); + return ret; + } + }; + + } // namespace algorithms +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_HPCG_SINGLE_MATRIX_COARSENER diff --git a/include/graphblas/nonblocking/matrix.hpp b/include/graphblas/nonblocking/matrix.hpp index 251e2037d..5554d78ae 100644 --- a/include/graphblas/nonblocking/matrix.hpp +++ b/include/graphblas/nonblocking/matrix.hpp @@ -50,7 +50,6 @@ #include #include -#include #include #include diff --git a/include/graphblas/reference/vector.hpp b/include/graphblas/reference/vector.hpp index f0db908b2..e6167a868 100644 --- a/include/graphblas/reference/vector.hpp +++ b/include/graphblas/reference/vector.hpp @@ -478,18 +478,19 @@ namespace grb { // perform straight copy fwd_iterator it = start; - for( size_t i = 0; start != end && i < _coordinates.size(); ++i ) { + for( size_t i = 0; it != end && i < _coordinates.size(); ++i ) { // flag coordinate as assigned if( _coordinates.assign( i ) ) { if( descr & descriptors::no_duplicates ) { return ILLEGAL; } // nonzero already existed, so fold into existing one - foldl( _raw[ i ], *it++, dup ); + foldl( _raw[ i ], *it, dup ); } else { // new nonzero, so overwrite - _raw[ i ] = static_cast< D >( *it++ ); + _raw[ i ] = static_cast< D >( *it ); } + ++it; } // write back final position @@ -538,7 +539,9 @@ namespace grb { nnz_iterator nnz = nnz_start; ind_iterator ind = ind_start; while( nnz != nnz_end || ind != ind_end ) { - const size_t i = static_cast< size_t >( *ind++ ); + const size_t i = static_cast< size_t >( *ind ); + ++ind; + // sanity check if( i >= _coordinates.size() ) { return MISMATCH; @@ -547,10 +550,11 @@ namespace grb { if( descr & descriptors::no_duplicates ) { return ILLEGAL; } - foldl( _raw[ i ], *nnz++, dup ); + foldl( _raw[ i ], *nnz, dup ); } else { - _raw[ i ] = static_cast< D >( *nnz++ ); + _raw[ i ] = static_cast< D >( *nnz ); } + ++nnz; } // done diff --git a/include/graphblas/utils/iterators/IteratorValueAdaptor.hpp b/include/graphblas/utils/iterators/IteratorValueAdaptor.hpp new file mode 100644 index 000000000..ebac6ca02 --- /dev/null +++ b/include/graphblas/utils/iterators/IteratorValueAdaptor.hpp @@ -0,0 +1,161 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @dir include/graphblas/utils/iterators + * Various utilities to work with STL-like iterators and ALP/GraphBLAS iterators: + * adaptors, partitioning facilities, traits and functions to check compile-time + * and runtime properties. + */ + +/** + * @file IteratorValueAdaptor.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Definition of an adaptor to extract a given value out of an iterator. + */ + +#ifndef H_GRB_UTILS_ITERATOR_VALUE_ADAPTOR +#define H_GRB_UTILS_ITERATOR_VALUE_ADAPTOR + +#include +#include +#include + +namespace grb { + namespace utils { + + /** + * Adaptor for an iterator, to extract the value pointed to by the * operator. + * It wraps an iterator under the same interface, using an object of type \a AdaptorType + * to adapt the returned value. + * + * @tparam InnerIterType type of the underlying iterator + * @tparam AdaptorType type of the adaptor, to be instantiated by default + */ + template< + typename InnerIterType, + typename AdaptorType + > struct IteratorValueAdaptor { + + static_assert( std::is_copy_constructible< AdaptorType >::value, + "AdaptorType must be copy-constructible" ); + static_assert( std::is_copy_assignable< AdaptorType >::value, + "AdaptorType must be copy-assignable" ); + + typedef typename std::decay< + decltype( *std::declval< AdaptorType >()( *std::declval< InnerIterType >() ) )>::type value_type; + typedef value_type & reference; + typedef value_type * pointer; + typedef const value_type * const_pointer; + typedef typename std::iterator_traits< InnerIterType >::iterator_category iterator_category; + typedef typename std::iterator_traits< InnerIterType >::difference_type difference_type; + + static constexpr bool is_random_access = std::is_base_of< + std::random_access_iterator_tag, iterator_category >::value; + + InnerIterType iter; + AdaptorType adaptor; + + using SelfType = IteratorValueAdaptor< InnerIterType, AdaptorType >; + + /** + * Construct a new IteratorValueAdaptor object from an actual iterator. + * The adaptor is built via its default constructor. + */ + IteratorValueAdaptor( typename std::enable_if< std::is_default_constructible< AdaptorType >::value, + const InnerIterType & >::type _iter ) : + iter( _iter ), + adaptor() {} + + /** + * Construct a new IteratorValueAdaptor object from an iterator and an existing adaptor object. + */ + IteratorValueAdaptor( + const InnerIterType &_iter, + const AdaptorType &_adaptor + ) : + iter( _iter ), + adaptor( _adaptor ) {} + + /** + * Construct a new Iterator Value Adaptor object from an actual iterator. + * The adaptor is built via its default constructor. + * + * @param _iter the underlying iterator, to be moved + */ + IteratorValueAdaptor( typename std::enable_if< std::is_default_constructible< AdaptorType >::value, + InnerIterType && >::type _iter + ) : + iter( std::move( _iter ) ), + adaptor() {} + + /** + * Construct a new IteratorValueAdaptor object from an actual iterator + * and an existing adaptor object by moving their state. + */ + IteratorValueAdaptor( + InnerIterType &&_iter, + AdaptorType &&_adaptor + ) : + iter( std::move( _iter ) ), + adaptor( std::move( _adaptor ) ) {} + + IteratorValueAdaptor() = delete; + + // since it is an iterator, we MUST have copy and move semantics + IteratorValueAdaptor( const SelfType & ) = default; + + IteratorValueAdaptor( SelfType && ) = default; + + SelfType& operator=( const SelfType & ) = default; + + SelfType& operator=( SelfType && ) = default; + + bool operator!=( const SelfType & o ) const { return o.iter != iter; } + + bool operator==( const SelfType & o ) const { return ! operator!=( o ); } + + reference operator*() { return *adaptor( *iter ); } + + const reference operator*() const { return *adaptor( *iter ); } + + pointer operator->() { return adaptor( *iter ); } + + const_pointer operator->() const { return adaptor( *iter ); } + + SelfType& operator++() { ++iter; return *this; } + + SelfType & operator+=( + typename std::enable_if< is_random_access, + const size_t >::type offset + ) { + iter += offset; + return *this; + } + + difference_type operator-( + typename std::enable_if< is_random_access, + const SelfType & >::type other + ) { + return iter - other.iter; + } + }; + + } // end namespace utils +} // end namespace grb + +#endif // H_GRB_UTILS_ITERATOR_VALUE_ADAPTOR diff --git a/include/graphblas/utils/iterators/partition_range.hpp b/include/graphblas/utils/iterators/partition_range.hpp new file mode 100644 index 000000000..60d228b3a --- /dev/null +++ b/include/graphblas/utils/iterators/partition_range.hpp @@ -0,0 +1,106 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file partition_range.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Definition of utilities to partition iterators across processes. + */ + +#include +#include +#include + +#ifndef H_GRB_UTILS_PARTITION_RANGE +#define H_GRB_UTILS_PARTITION_RANGE + +namespace grb { + namespace utils { + + /** + * Partitions the size of a collection across processes and computes the first offset + * and the size for the local partition. + * + * @tparam T size type + * @param[in] num_procs total number of processes + * @param[in] this_proc ID of current process + * @param[in] num_elements total number of elements in the collection + * @param[out] first_offset offset to the first element of the local partition + * @param[out] local_size size of the local partition + */ + template< typename T > void partition_collection_size( + size_t num_procs, + size_t this_proc, + T num_elements, + T& first_offset, + T& local_size + ) { + const T per_process = ( num_elements + num_procs - 1 ) / num_procs; // round up + first_offset = std::min( per_process * static_cast< T >( this_proc ), num_elements ); + local_size = std::min( first_offset + per_process, num_elements ); + } + + /** + * Partitions an iteration range across processes according to the given information. + * + * With \p num_procs processes and \p this_proc < \p num_procs and a collection of \p num_elements + * elements across all processes, it partitions the collection evenly among processes and sets + * \p begin and \p end so that they iterate over the local partition designated by \p this_proc. + * + * It works also for a single-process scenario. + * + * Note: the number of processes and the ID of the current process is expected in input + * not to introduce dependencies on separate code paths. + * + * @tparam IterT iterator type + * @param[in] num_procs number of processes + * @param[in] this_proc Id of current process + * @param[in] num_elements number of elements of the collection; it can be computed as + * \code std::distance( begin, end ) \endcode + * @param[out] begin beginning iterator to the whole collection + * @param[out] end end iterator + */ + template< typename IterT > void partition_iteration_range_on_procs( + size_t num_procs, + size_t this_proc, + size_t num_elements, + IterT &begin, + IterT &end + ) { + static_assert( std::is_base_of< std::random_access_iterator_tag, + typename std::iterator_traits< IterT >::iterator_category >::value, + "the given iterator is not a random access one" ); + assert( this_proc < num_procs ); + assert( num_elements == static_cast< size_t >( end - begin ) ); + if( num_procs == 1 ) { + return; + } + size_t first, num_local_elements; + partition_collection_size( num_procs, this_proc, num_elements, first, num_local_elements ); + if( num_local_elements < num_elements ) { + end = begin; + end += num_local_elements; + } + if( first > 0 ) { + begin += first; + } + } + + } // namespace utils +} // namespace grb + +#endif // H_GRB_UTILS_PARTITION_RANGE diff --git a/include/graphblas/utils/iterators/utils.hpp b/include/graphblas/utils/iterators/utils.hpp index b56899c83..0b635578d 100644 --- a/include/graphblas/utils/iterators/utils.hpp +++ b/include/graphblas/utils/iterators/utils.hpp @@ -25,6 +25,8 @@ #define _H_GRB_ITERATOR_UTILS #include +#include +#include #include #include @@ -78,6 +80,28 @@ namespace grb { return SUCCESS; } + /** + * Computes the difference between \p a \a - \p b and returns it as the given + * type \p DiffType. + * + * Raises an exception if \p DiffType cannot store the difference. + */ + template< + typename DiffType, + typename SizeType + > DiffType compute_signed_distance( + const SizeType a, + const SizeType b + ) { + static_assert( std::is_signed< DiffType >::value, "DiffType should be signed" ); + const SizeType diff = std::max( a, b ) - std::min( a, b ); + if( diff > static_cast< SizeType >( std::numeric_limits< DiffType >::max() ) ) { + throw std::range_error( "cannot represent difference" ); + } + DiffType result = static_cast< DiffType >( diff ); + return a >= b ? result : -result ; + } + } // end namespace utils } // end namespace grb diff --git a/include/graphblas/utils/multigrid/array_vector_storage.hpp b/include/graphblas/utils/multigrid/array_vector_storage.hpp new file mode 100644 index 000000000..cfca1dda2 --- /dev/null +++ b/include/graphblas/utils/multigrid/array_vector_storage.hpp @@ -0,0 +1,111 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file array_vector_storage.cpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Extension of std::array<> exposing a larger interface and the underlying + * storage structure. + */ + +#ifndef _H_GRB_ALGORITHMS_MULTIGRID_ARRAY_VECTOR_STORAGE +#define _H_GRB_ALGORITHMS_MULTIGRID_ARRAY_VECTOR_STORAGE + +#include +#include +#include +#include + +namespace grb { + namespace utils { + namespace multigrid { + + /** + * Array with fixed size based on std::array with an interface compliant to what other classes + * in the geometry namespace expect, like #storage() and #dimensions() methods. + * + * It describes a vector of dimensions #dimensions(). + * + * @tparam DIMS the dimensions of the vector + * @tparam DataType the data type of the vector elements + */ + template< + size_t DIMS, + typename DataType + > class ArrayVectorStorage : public std::array< DataType, DIMS > { + public: + using VectorStorageType = std::array< DataType, DIMS > &; + using ConstVectorStorageType = const std::array< DataType, DIMS > &; + using SelfType = ArrayVectorStorage< DIMS, DataType >; + + /** + * Construct a new Array Vector Storage object of given dimensions; + * internal values are \b not initialized. + * + * \p _dimensions must be equal to \p DIMS, or an exception is thrown. + */ + ArrayVectorStorage( size_t _dimensions ) { + static_assert( DIMS > 0, "cannot allocate 0-sized array" ); + if( _dimensions != DIMS ) { + throw std::invalid_argument( "given dimensions must match the type dimensions" ); + } + } + + ArrayVectorStorage() = delete; + + // only copy constructor/assignment, since there's no external storage + ArrayVectorStorage( const SelfType & o ) noexcept { + std::copy_n( o.cbegin(), DIMS, this->begin() ); + } + + ArrayVectorStorage( SelfType && o ) = delete; + + SelfType & operator=( const SelfType & original ) noexcept { + std::copy_n( original.begin(), DIMS, this->begin() ); + return *this; + } + + SelfType & operator=( SelfType && original ) = delete; + + /** + * Returns the geometrical dimensions of this vector, i.e. of the + * geometrical space it refers to. + */ + constexpr size_t dimensions() const { + return DIMS; + } + + /** + * Returns a reference to the underlying storage object. + */ + inline VectorStorageType storage() { + return *this; + } + + /** + * Returns a const reference to the underlying storage object. + */ + inline ConstVectorStorageType storage() const { + return *this; + } + }; + + } // namespace multigrid + } // namespace utils +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_MULTIGRID_ARRAY_VECTOR_STORAGE diff --git a/include/graphblas/utils/multigrid/dynamic_vector_storage.hpp b/include/graphblas/utils/multigrid/dynamic_vector_storage.hpp new file mode 100644 index 000000000..0d6250aae --- /dev/null +++ b/include/graphblas/utils/multigrid/dynamic_vector_storage.hpp @@ -0,0 +1,150 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file dynamic_vector_storage.cpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Extension of a heap-allocated array exposing the underlying storage and iterators. + */ + +#ifndef _H_GRB_ALGORITHMS_MULTIGRID_DYNAMIC_VECTOR_STORAGE +#define _H_GRB_ALGORITHMS_MULTIGRID_DYNAMIC_VECTOR_STORAGE + +#include +#include + +namespace grb { + namespace utils { + namespace multigrid { + + /** + * Array with fixed size (i.e. decided at object creation) allocated on the heap + * with an interface compliant to what other classes in the geometry namespace expect, + * like storage() and dimensions() methods. + * + * It describes a vector of dimensions #dimensions(). + * + * @tparam DataType the data type of the vector elements + */ + template< typename DataType > class DynamicVectorStorage { + + size_t _dimensions; + DataType * _storage; + + void clean() { + if( this->_storage != nullptr ) { + delete[] this->_storage; + } + } + + public: + // iterator fields + using reference = DataType &; + using const_reference = const DataType &; + using iterator = DataType *; + using const_iterator = const DataType *; + using pointer = DataType *; + using const_pointer = const DataType *; + + using VectorStorageType = DataType *; + using ConstVectorStorageType = DataType *; + using SelfType = DynamicVectorStorage< DataType >; + + DynamicVectorStorage( size_t __dimensions ) : _dimensions( __dimensions ) { + if( __dimensions == 0 ) { + throw std::invalid_argument( "dimensions cannot be 0" ); + } + this->_storage = new DataType[ __dimensions ]; + } + + DynamicVectorStorage() = delete; + + DynamicVectorStorage( const SelfType & o ) : + _dimensions( o._dimensions ), + _storage( new DataType[ o._dimensions ] ) + { + std::copy_n( o._storage, o._dimensions, this->_storage ); + } + + DynamicVectorStorage( SelfType && o ) = delete; + + SelfType & operator=( const SelfType & original ) { + if( original._dimensions != this->_dimensions ) { + this->clean(); + this->_storage = new DataType[ original._dimensions ]; + } + this->_dimensions = original._dimensions; + std::copy_n( original._storage, original._dimensions, this->_storage ); + return *this; + } + + SelfType & operator=( SelfType && original ) = delete; + + ~DynamicVectorStorage() { + this->clean(); + } + + size_t dimensions() const { + return this->_dimensions; + } + + inline iterator begin() { + return this->_storage; + } + + inline iterator end() { + return this->_storage + this->_dimensions; + } + + inline const_iterator begin() const { + return this->_storage; + } + + inline const_iterator end() const { + return this->_storage + this->_dimensions; + } + + inline const_iterator cbegin() const { + return this->_storage; + } + + inline const_iterator cend() const { + return this->_storage + this->_dimensions; + } + + inline VectorStorageType storage() { + return this->_storage; + } + + inline ConstVectorStorageType storage() const { + return this->_storage; + } + + inline reference operator[]( size_t pos ) { + return *( this->_storage + pos ); + } + + inline const_reference operator[]( size_t pos ) const { + return *( this->_storage + pos ); + } + }; + + } // namespace multigrid + } // namespace utils +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_MULTIGRID_DYNAMIC_VECTOR_STORAGE diff --git a/include/graphblas/utils/multigrid/halo_matrix_generator_iterator.hpp b/include/graphblas/utils/multigrid/halo_matrix_generator_iterator.hpp new file mode 100644 index 000000000..ebda27890 --- /dev/null +++ b/include/graphblas/utils/multigrid/halo_matrix_generator_iterator.hpp @@ -0,0 +1,246 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @dir include/graphblas/utils/multigrid + * This folder contains various utilities to describe an N-dimensional mesh (possibly with halo) + * and iterate through its elements and through the neighbors of each element, possible generating + * a matrix out of this information. + * + * These facilities are used to generate system matrices and various inputs for multi-grid simulations. + */ + +/** + * @file halo_matrix_generator_iterator.cpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Definition of HaloMatrixGeneratorIterator. + */ + +#ifndef _H_GRB_ALGORITHMS_MULTIGRID_HALO_MATRIX_GENRATOR_ITERATOR +#define _H_GRB_ALGORITHMS_MULTIGRID_HALO_MATRIX_GENRATOR_ITERATOR + +#include + +#include "array_vector_storage.hpp" +#include "linearized_halo_ndim_system.hpp" +#include "linearized_ndim_iterator.hpp" +#include "linearized_ndim_system.hpp" + +namespace grb { + namespace utils { + namespace multigrid { + + /** + * Iterator type to generate a matrix on top of the couples - of an + * \p DIMS -dimensional mesh. + * + * This iterator is random-access and meets the the interface of an ALP/GraphBLAS + * input iterator, i.e. an object of this type \a it has methods \a i(), \a j() and + * \a v() to describe a nonzero triplet (row index, column index and value, respectively). + * + * This data structure is based on the LinearizedHaloNDimIterator class, esentially wrapping the + * underlying element index as \a i() and the neighbor index as \a j(); the value \a v() + * is user-customizable via a functor of type \p ValueCallable, which emits the nonzero + * of type \p ValueType based on the passed values of \a i() and \a j(). + * + * @tparam DIMS number of dimensions + * @tparam CoordType tyoe storing the coordinate and the system sizes along each dimension + * @tparam ValueType type of nonzeroes + * @tparam ValueCallable callable object producing the nonzero value based on \a i() and \a j() + */ + template< + size_t DIMS, + typename CoordType, + typename ValueType, + typename ValueCallable + > struct HaloMatrixGeneratorIterator { + + static_assert( std::is_copy_constructible< ValueCallable >::value, + "ValueCallable must be copy-constructible" ); + + using RowIndexType = CoordType; ///< numeric type of rows + using ColumnIndexType = CoordType; + using LinearSystemType = LinearizedHaloNDimSystem< DIMS, RowIndexType >; + using SelfType = HaloMatrixGeneratorIterator< DIMS, CoordType, ValueType, ValueCallable >; + using Iterator = typename LinearSystemType::Iterator; + + struct HaloPoint { + + friend SelfType; + + HaloPoint( + const ValueCallable & value_producer, + RowIndexType i, + ColumnIndexType j + ) noexcept : + _value_producer( value_producer ), + _i( i ), + _j( j ) {} + + HaloPoint( const HaloPoint & ) = default; + + HaloPoint & operator=( const HaloPoint & ) = default; + + inline RowIndexType i() const { + return _i; + } + inline ColumnIndexType j() const { + return _j; + } + inline ValueType v() const { + return _value_producer( _i, _j ); + } + + private: + ValueCallable _value_producer; + RowIndexType _i; + ColumnIndexType _j; + }; + + // interface for std::random_access_iterator + using iterator_category = std::random_access_iterator_tag; + using value_type = HaloPoint; + using pointer = value_type; + using reference = value_type; + using difference_type = typename Iterator::difference_type; + + /** + * Construct a new \c HaloMatrixGeneratorIterator object, setting the current row as \p row + * and emitting \p diag if the iterator has moved on the diagonal, \p non_diag otherwise. + * + * @param sizes array with the sizes along the dimensions + * @param _halo halo of points to iterate around; must be > 0 + * @param diag value to emit when on the diagonal + * @param non_diag value to emit outside the diagonal + */ + HaloMatrixGeneratorIterator( + const LinearSystemType & system, + const ValueCallable & value_producer + ) noexcept : + _val( value_producer, 0, 0 ), + _lin_system( &system ), + _sys_iter( system.begin() ) + { + update_coords(); + } + + HaloMatrixGeneratorIterator( const SelfType & ) = default; + + SelfType & operator=( const SelfType & ) = default; + + /** + * Increments the iterator by moving coordinates to the next (row, column) to iterate on. + * + * This operator internally increments the columns coordinates until wrap-around, when it increments + * the row coordinates and resets the column coordinates to the first possible columns; + * this column coordinate depends on the row coordinates according to the dimensions + * iteration order and on the parameter \p halo. + * + * @return HaloMatrixGeneratorIterator& \c this object, with the updated state + */ + SelfType & operator++() noexcept { + (void)++_sys_iter; + update_coords(); + return *this; + } + + SelfType & operator+=( size_t offset ) { + _sys_iter += offset; + update_coords(); + return *this; + } + + difference_type operator-( const SelfType & other ) const { + return this->_sys_iter - other._sys_iter; + } + + /** + * Operator to compare \c this against \p o and return whether they differ. + * + * @param o object to compare \c this against + * @return true of the row or the column is different between \p o and \c this + * @return false if both row and column of \p o and \c this are equal + */ + bool operator!=( const SelfType & o ) const { + return this->_sys_iter != o._sys_iter; + } + + /** + * Operator to compare \c this against \p o and return whether they are equal. + * + * @param o object to compare \c this against + * @return true of the row or the column is different between \p o and \c this + * @return false if both row and column of \p o and \c this are equal + */ + bool operator==( const SelfType & o ) const { + return ! operator!=( o ); + } + + /** + * Operator returning the triple to directly access row, column and element values. + * + * Useful when building the matrix by copying the triple of coordinates and value, + * like for the BSP1D backend. + */ + reference operator*() const { + return _val; + } + + pointer operator->() const { + return &_val; + } + + /** + * Returns the current row. + */ + inline RowIndexType i() const { + return _val.i(); + } + + /** + * Returns the current column. + */ + inline ColumnIndexType j() const { + return _val.j(); + } + + /** + * Returns the current matrix value. + * + * @return ValueType #diagonal_value if \code row == column \endcode (i.e. if \code this-> \endcode + * #i() \code == \endcode \code this-> \endcode #j()), #non_diagonal_value otherwise + */ + inline ValueType v() const { + return _val.v(); + } + + private: + value_type _val; + const LinearSystemType * _lin_system; + Iterator _sys_iter; + + void update_coords() { + _val._i = _sys_iter->get_element_linear(); + _val._j = _sys_iter->get_neighbor_linear(); + } + }; + + } // namespace multigrid + } // namespace utils +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_MULTIGRID_HALO_MATRIX_GENRATOR_ITERATOR diff --git a/include/graphblas/utils/multigrid/linearized_halo_ndim_iterator.hpp b/include/graphblas/utils/multigrid/linearized_halo_ndim_iterator.hpp new file mode 100644 index 000000000..6c020c39d --- /dev/null +++ b/include/graphblas/utils/multigrid/linearized_halo_ndim_iterator.hpp @@ -0,0 +1,391 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file linearized_halo_ndim_iterator.cpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Definition of LinearizedHaloNDimSystem. + */ + +#ifndef _H_GRB_ALGORITHMS_MULTIGRID_LINEARIZED_HALO_NDIM_ITERATOR +#define _H_GRB_ALGORITHMS_MULTIGRID_LINEARIZED_HALO_NDIM_ITERATOR + +#include +#include +#include +#include + +#include + +#include "array_vector_storage.hpp" +#include "linearized_ndim_iterator.hpp" +#include "linearized_ndim_system.hpp" + +namespace grb { + namespace utils { + namespace multigrid { + + // forward declaration + template< + size_t DIMS, + typename SizeType + > class LinearizedHaloNDimSystem; + + /** + * Class to iterate over the \b neighbors of a system with halo: by advancing the iterator, + * the user can traverse all neighbors of all elements one-by-one, in order, for example, to + * emit all possible copies element-neighbor. + * + * Example: for a 2-dimensional 3 x 3 system with halo 1, with elements numbered as in + * + * 0 1 2 + * 3 4 5 + * 6 7 8 + * + * the emitted couples are: + * + * 0-0, 0-1, 0-3, 0-4; 1-0, 1-1, 1-2, 1-3, 1-4, 1-5; 2-1, 2-2, 2-4, 2-5; + * 3-0, 3-1, 3-3, 3-4; 4-0, 4-1, 4-2, 4-3, 4-4, 4-5, 4-6, 4-7, 4-8; and so on. + * + * It implements two interfaces for iteration. The first is a standard STL-like + * interface meeting the random-access requirements, with operators \a ++, \a *, \a ->, + * \a +=, \a -, \a ==; these facilities iterate over \b all neighbors of the underlying system, + * automatically updating the corresponding element the neighbor is associated to. + * The second interface is a custom (Java-like) one that allows to iterate separately over elements + * and their neighbors: the user can query whether more elements exist, move to the next element, + * iterate over the neighbors of the current element, query whether more neighbors exist for the + * current element. + * + * The state of this structure essentially contains: + * + * 1. a const-pointer to a LinearizedHaloNDimSystem object, storing the geometry + * information of the N-dimensional system. + * 2. the iterator to the current element (which in turn provides the element's vector + * and linear coordinates) + * 3. the vector coordinate of the current neighbor + * 4. the linear coordinate of the current neighbor + * 5. information about the current element's neighbors space: + * 1. the N-dimensional sub-space of neighbors w.r.t. the current element: this + * LinearizedHaloNDimSystem object stores the sizes of the neighbors's sub-space + * centered around the current element (at most 2 * halo + 1 per dimension, if the current + * element is an inner one); hence, it computes coordinates and provides iterators that are + * \b relative to the current element + * 2. vector coordinates of the first neighbor of the current element, in the main system + * (i.e. \b not relative); this allows computing any neighbor as the sum of this vector + * plus its relative coordinates in the neighbors' sub-space + * 3. iterator to the current neighbor, built out of the relative sub-space, to actually iterate + * over the current element's neighbors + * 4. iterator to the last neighbor of the current element, to stop the iteration over neighbors + * and advance to the next element. + * + * The above-mentioned methods to advance the iterator \c this (over neighbors or elements) + * take care of updating these structures properly, keeping the state \b always coherent. + * + * @tparam DIMS syztem number of dimensions + * @tparam SizeType type of coordinates and of sizes (must be large enough to describe the size + * of the system along each direction) + */ + template< size_t DIMS, typename SizeType > + class LinearizedHaloNDimIterator { + + using SystemType = LinearizedHaloNDimSystem< DIMS, SizeType >; + using VectorType = ArrayVectorStorage< DIMS, SizeType >; + using VectorIteratorType = LinearizedNDimIterator< SizeType, VectorType >; + + public: + using ConstVectorReference = typename VectorIteratorType::ConstVectorReference; + using SelfType = LinearizedHaloNDimIterator< DIMS, SizeType >; + + /** + * Structure holding the information about a neighbor in a system: its linear + * and vector coordinates and the element it is neighbor of (in the form of both + * linear and vectoor coordinate). + */ + struct HaloNDimElement { + private: + // for linearization + const SystemType * _system; + + // for iteration + VectorIteratorType _element_iter; // coordinates iterator + + VectorType _neighbor; // the current neighbor + SizeType _position; + + public: + friend SelfType; + + HaloNDimElement() = delete; + + HaloNDimElement( const HaloNDimElement & ) = default; + + HaloNDimElement( HaloNDimElement && ) = delete; + + HaloNDimElement( const SystemType & system ) noexcept : + _system( &system ), + _element_iter( system ), + _neighbor( DIMS ), + _position( 0 ) + { + std::fill_n( this->_neighbor.begin(), DIMS, 0 ); + } + + HaloNDimElement & operator=( const HaloNDimElement & ) = default; + + /** + * Get the element as vector coordinates. + */ + ConstVectorReference get_element() const { + return this->_element_iter->get_position(); + } + + /** + * Get the element as linear coordinates. + */ + size_t get_element_linear() const { + return this->_system->ndim_to_linear( this->_element_iter->get_position() ); + } + + /** + * Get the neighbor as vector coordinates. + */ + ConstVectorReference get_neighbor() const { + return this->_neighbor; + } + + /** + * Get the neighbor as linear coordinates. + */ + size_t get_neighbor_linear() const { + return this->_system->ndim_to_linear( this->_neighbor ); + } + + /** + * Get the (unique) neighbor number in the system. + */ + SizeType get_position() const { + return this->_position; + } + }; + + // interface for std::random_access_iterator + using iterator_category = std::random_access_iterator_tag; + using value_type = HaloNDimElement; + using pointer = const HaloNDimElement *; + using reference = const HaloNDimElement &; + using difference_type = signed long; + + LinearizedHaloNDimIterator() = delete; + + /** + * Construct a new LinearizedHaloNDimIterator object from the underlying system + * \p system (whose geometry information is used to iterate). The constructed object + * points to the first neighbor of the first element, i.e. the one with vector coordinates + * \a [0,0,...,0]. + * + * IF \p system is not valid anymore, then also \c this is not. + */ + LinearizedHaloNDimIterator( const SystemType & system ) noexcept : + _point( system ), + _neighbors_subspace( DIMS, system.halo() + 1 ), + _neighbors_start( DIMS ), + _neighbor_iter( this->_neighbors_subspace ), + _neighbor_end( VectorIteratorType::make_system_end_iterator( this->_neighbors_subspace ) ) + { + std::fill_n( this->_neighbors_start.begin(), DIMS, 0 ); + } + + LinearizedHaloNDimIterator( const SelfType & ) = default; + + SelfType & operator=( const SelfType & ) = default; + + bool operator!=( const SelfType & other ) const { + return this->_point._position != other._point._position; // use linear coordinate + } + + reference operator*() const { + return this->_point; + } + + pointer operator->() const { + return &( this->_point ); + } + + /** + * Tells whether the current element has more neighbor available (on which the user + * has not iterated yet). + */ + bool has_more_neighbours() const { + return this->_neighbor_iter != this->_neighbor_end; + } + + /** + * Moves \c this to point to the next neighbor (if any, exception otherwise). + * + * Does \b not advance the element, which should be done manually via #next_element(). + */ + void next_neighbour() { + if( ! has_more_neighbours() ) { + throw std::out_of_range( "the current element has no more neighbors" ); + } + ++( this->_neighbor_iter ); + this->on_neighbor_iter_update(); + this->_point._position++; + } + + /** + * Tells whether the system has more elements. + */ + bool has_more_elements() const { + return this->_point.get_element_linear() != ( this->_point._system )->base_system_size(); + } + + /** + * Moves \c this to point to the next element, setting the neighbor as the first one. + */ + void next_element() { + if( ! has_more_elements() ) { + throw std::out_of_range( "the system has no more elements" ); + } + size_t num_neighbours = this->_neighbors_subspace.system_size(); + size_t neighbour_position_offset = + this->_neighbors_subspace.ndim_to_linear( this->_neighbor_iter->get_position() ); + ++( this->_point._element_iter ); + this->on_element_advance(); + this->_point._position -= neighbour_position_offset; + this->_point._position += num_neighbours; + } + + /** + * Moves \c this to point to the next neighbor, also advancing the element if needed. + */ + SelfType & operator++() noexcept { + ++( this->_neighbor_iter ); + if( ! has_more_neighbours() ) { + ++( this->_point._element_iter ); + this->on_element_advance(); + + } else { + this->on_neighbor_iter_update(); + } + this->_point._position++; + return *this; + } + + /** + * Moves \c this ahead of \p offste neighbors, also advancing the element if necessary. + */ + SelfType & operator+=( size_t offset ) { + if( offset == 1UL ) { + return this->operator++(); + } + const size_t final_position = this->_point._position + offset; + if( final_position > this->_point._system->halo_system_size() ) { + throw std::range_error( "neighbor linear value beyond system" ); + } + VectorType final_element( DIMS ); + size_t neighbor_index = + this->_point._system->neighbour_linear_to_element( final_position, final_element ); + + this->_point._element_iter = VectorIteratorType( *this->_point._system, final_element.cbegin() ); + this->_point._position = final_position; + + this->on_element_update(); + this->_neighbors_subspace.linear_to_ndim( neighbor_index, final_element ); + + this->_neighbor_iter = VectorIteratorType( this->_neighbors_subspace, final_element.cbegin() ); + this->_neighbor_end = VectorIteratorType::make_system_end_iterator( this->_neighbors_subspace ); + this->on_neighbor_iter_update(); + + return *this; + } + + /** + * Returns the difference between \c this and \p other in the linear space of neighbors, + * i.e. how many times \p other must be advanced in order to point to the same neighbor of \c this. + * + * It throws if the result cannot be stored as a difference_type variable. + */ + difference_type operator-( const SelfType & other ) const { + return grb::utils::compute_signed_distance< difference_type, SizeType >( _point.get_position(), + other._point.get_position() ); + } + + /** + * Utility to build an iterator to the end of the system \p system. + * + * The implementation depends on the logic of operator++. + */ + static SelfType make_system_end_iterator( const SystemType & system ) { + SelfType result( system ); + // go to the very first point outside of space + result._point._element_iter = VectorIteratorType::make_system_end_iterator( system ); + result.on_element_advance(); + result._point._position = system.halo_system_size(); + return result; + } + + private: + HaloNDimElement _point; + LinearizedNDimSystem< SizeType, VectorType > _neighbors_subspace; + VectorType _neighbors_start; + VectorIteratorType _neighbor_iter; // iterator in the sub-space of neighbors (0-based) + VectorIteratorType _neighbor_end; + + /** + * To be called when the iterator pointing to the neighbor is updated in order to update + * the actual neighbor's coordinates. + */ + inline void on_neighbor_iter_update() { + for( size_t i = 0; i < DIMS; i++ ) { + this->_point._neighbor[ i ] = this->_neighbors_start[ i ] + + this->_neighbor_iter->get_position()[ i ]; + } + } + + /** + * To be called after the iterator pointing to the element is updated in order to + * reset the information about the neighbor. + */ + void on_element_update() { + // reset everything + VectorType neighbors_range( DIMS ); + this->_point._system->compute_neighbors_range( this->_point._element_iter->get_position(), + this->_neighbors_start, neighbors_range ); + // re-target _neighbors_subspace + this->_neighbors_subspace.retarget( neighbors_range ); + } + + /** + * To be called after the iterator pointing to the element is updated in order to update + * all information about the neighbor, like iterator, sorrounding halo and coordinates. + */ + void on_element_advance() { + this->on_element_update(); + + this->_neighbor_iter = VectorIteratorType( this->_neighbors_subspace ); + this->_neighbor_end = VectorIteratorType::make_system_end_iterator( this->_neighbors_subspace ); + + this->on_neighbor_iter_update(); + } + }; + + } // namespace multigrid + } // namespace utils +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_MULTIGRID_LINEARIZED_HALO_NDIM_ITERATOR diff --git a/include/graphblas/utils/multigrid/linearized_halo_ndim_system.hpp b/include/graphblas/utils/multigrid/linearized_halo_ndim_system.hpp new file mode 100644 index 000000000..34e16069d --- /dev/null +++ b/include/graphblas/utils/multigrid/linearized_halo_ndim_system.hpp @@ -0,0 +1,559 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file linearized_halo_ndim_system.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Definition of LinearizedHaloNDimSystem. + */ + +#ifndef _H_GRB_ALGORITHMS_MULTIGRID_LINEARIZED_HALO_NDIM_SYSTEM +#define _H_GRB_ALGORITHMS_MULTIGRID_LINEARIZED_HALO_NDIM_SYSTEM + +#include +#include +#include +#include +#ifdef _DEBUG +#include +#endif + +#include "array_vector_storage.hpp" +#include "dynamic_vector_storage.hpp" +#include "linearized_halo_ndim_iterator.hpp" +#include "linearized_ndim_system.hpp" +#include "ndim_vector.hpp" + +namespace grb { + namespace utils { + namespace multigrid { + + /** + * Structure to represent an N-dimensional space (or \a system) of given sizes and to + * iterate on both the \a elements of the N-dimensional system and the N-dimensional + * \a neighbors of each element within a given \p halo. This facility takes into account + * the various cases where the element is at the corner, edge or face of the N-dimensional + * system, to which different neighbors correspond. Both elements and their neighbors are + * vectors in the N-dimensional system and as such described via both N-dimensional coordinates + * and a linear coordinate. + * + * This structure returns the number of elements of the underlying N-dimensional system + * (the \a base system) via #base_system_size() and the total sum of neighbors of all + * system elements via #halo_system_size(). + * + * The peculiar feature of this structure is the method #neighbour_linear_to_element(), to translate + * a neighbor index (i.e. a value from \a 0 to #halo_system_size(), uniquely identifying an element + * as neighbor of an element) to the N-dimensional coordinates of the corresponding elements in a time + * that is constant with respect to the input value (it depends on \p DIMS and the halo size). + * This facility allows the iterators of a LinearizedNDimSystem to be random-access: when advancing + * an iterator by an \a offset via the \a += method, the logic: + * + * - increments the index of the current neighbor (stored inside the iterator) by \a offset, thus + * computing the index of the destination neighbor (constant time) + * - translates the index of the destination neighbor to its base element's coordinates via + * #neighbour_linear_to_element() (constant time) + * + * The same method also returns the index of the destination neighbor within the sub-space of the base + * element's neighbors: hence, the logic can compute in constant time the destination base element + * and its destination neighbor. The constant time of this translation is achieved by pre-computing + * the number of neighbors for each element along each dimension: for example, inner elements in + * a 3D mesh with halo 1 have 27 neighbors. Thus, it suffices in principle to divide the neighbor + * index by 27 to compute the base element of a neighbor. Care must be taken for elements at the + * sides of each dimension: for example, a corner element on a face has 8 neighbors, while a corner + * element in an iternal slab (a 2D "plane" in a 3D mesh) has 12 neighbors. The pre-computed + * information and the logic also account for this. + * + * @tparam DIMS number of dimensions of the system + * @tparam SizeType type storing the system sizes and offsets + */ + template< + size_t DIMS, + typename SizeType + > class LinearizedHaloNDimSystem : + public LinearizedNDimSystem< SizeType, ArrayVectorStorage< DIMS, SizeType > > { + public: + using VectorType = ArrayVectorStorage< DIMS, SizeType >; + using ConstVectorStorageType = typename VectorType::ConstVectorStorageType; + using SelfType = LinearizedHaloNDimSystem< DIMS, SizeType >; + using BaseType = LinearizedNDimSystem< SizeType, VectorType >; + using Iterator = LinearizedHaloNDimIterator< DIMS, SizeType >; + + /** + * Construct a new LinearizedHaloNDimSystem object with given sizes and halo. + * + * The size of \p sizes must be exactly \p DIMS. Each size must be so that there is at least + * en element in the system with full halo neighors, i.e. for each size \a s + * s >= 2 * halo + 1 (otherwise an exception is thrown). + */ + LinearizedHaloNDimSystem( + ConstVectorStorageType sizes, + SizeType halo + ) : + BaseType( sizes.cbegin(), sizes.cend() ), + _halo( halo ) + { + for( SizeType __size : sizes ) { + if( __size < halo + 1 ) { + throw std::invalid_argument( + std::string( "the halo (" + std::to_string( halo ) + + std::string( ") goes beyond a system size (" ) + + std::to_string( __size ) + std::string( ")" ) ) ); + } + } + + this->_system_size = init_neigh_to_base_search( + this->get_sizes(), _halo, this->_dimension_limits ); + assert( this->_dimension_limits.size() == DIMS ); + } + + LinearizedHaloNDimSystem() = delete; + + LinearizedHaloNDimSystem( const SelfType & ) = default; + + LinearizedHaloNDimSystem( SelfType && ) = delete; + + ~LinearizedHaloNDimSystem() noexcept {} + + SelfType & operator=( const SelfType & ) = default; + + SelfType & operator=( SelfType && ) = delete; + + /** + * Builds an iterator from the beginning of the system, i.e. from vector \a [0,0,...,0]. + * The iterator iterates on each neighbor and allows iterating on each element and on + * its neighbors. + */ + Iterator begin() const { + return Iterator( *this ); + } + + /** + * Build an iterator marking the end of the system; it should not be accessed. + */ + Iterator end() const { + return Iterator::make_system_end_iterator( *this ); + } + + /** + * Returns the size of the entire system, i.e. the number of neighbors of all elements. + */ + size_t halo_system_size() const { + return this->_system_size; + } + + /** + * Returns the size of the base system, i.e. number of elements (not considering neighbors). + */ + size_t base_system_size() const { + return this->BaseType::system_size(); + } + + /** + * Returns the halo size. + */ + size_t halo() const { + return this->_halo; + } + + /** + * Computes the first neighbor and the size of the N-dimensional range of neighbors + * around the given element's coordinates for the system \c this. + * + * @param[in] element_coordinates coordinates of the element to iterate around + * @param[out] neighbors_start first neighbor around \p element_coordinates to iterate from + * @param[out] neighbors_range vector of halos around \p element_coordinates; + * if \p element_coordinates is an inner point, all values equal #halo(), they are smaller + * otherwise (on corner, edge, or face). + */ + void compute_neighbors_range( + const VectorType & element_coordinates, + VectorType & neighbors_start, + VectorType & neighbors_range + ) const noexcept { + compute_first_neigh_and_range( this->get_sizes(), + this->_halo, element_coordinates, neighbors_start, neighbors_range ); + } + + /** + * Maps the linear index \p neighbor_linear of a neighbor to the vector \p base_element_vector + * of the corresponding element \p neighbor_linear is neighbor of, and returns the neighbor's + * number within the sub-space of \p base_element_vector 's neighbors. + * + * @param[in] neighbor_linear linear coordinate of input neighbor + * @param[out] base_element_vector vector of coordinates that identify which element + * \p neighbor_linear is neighbor of + * @return size_t the neighbor number w.r.t. to the corresponding element: if \a e is the system + * element \p neighbor_linear is neighbor of and \a e has \a n neighbors, then the return value + * \a 0<=iget_sizes(), this->_system_size, + this->_dimension_limits, this->_halo, neighbor_linear, base_element_vector ); + } + + private: + const SizeType _halo; + std::vector< NDimVector< SizeType, SizeType, + DynamicVectorStorage< SizeType > > > _dimension_limits; + size_t _system_size; + + /** + * Computes the total number of neighbors along a certain dimension and configuration by accumulating + * the neighbors along the smaller dimensions. + * + * The logic uses this buffer to iterate over the configurations of + * the previous dimension. Example: to compute in 3D the neighbors of an inner row of a face + * (configuration [0,1,0], dimension 1 - y), the logic needs the neighbors of + * en edge element and of an element internal to a face of the mesh, corresponding to + * the configurations [0,1,0] and [1,1,0], respectively. Hence, the caller + * must initialize a buffer with the values [X,1,0] (\a X meaning don't care) and pass + * as \p coords_buffer the pointer to the first position (the \a X ), where this function + * will write all possible values [0, \p halo ) to access the number of neighbors + * of the configurations of the previous dimension via \p prev_neighs and accumulate them. + * + * @param[in] prev_neighs neighbors in the configurations of the previous dimension + * @param[in,out] coords_buffer pointer to the first position of the configuration buffer + * for this dimension + * @param[in] halo halo size + * @param[in] local_size size (i.e., number of elements) along the current dimension, + * including the edges + * @return size_t the total number of neighbors for this configuration and this dimension + */ + static size_t accumulate_dimension_neighbours( + const NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > > & prev_neighs, + SizeType * coords_buffer, + size_t halo, + size_t local_size + ) { + size_t neighs = 0; + size_t h = 0; + for( ; h < halo && local_size > 1; h++ ) { + *coords_buffer = h; + + const size_t local_neighs = prev_neighs.at( coords_buffer ); + neighs += 2 * local_neighs; // the 2 sides + local_size -= 2; + } + *coords_buffer = h; + neighs += local_size * prev_neighs.at( coords_buffer ); // innermost elements + return neighs; + } + + /** + * Computes the number of neighbors for each configuration along dimension 0: + * corner, edge, face, inner element. + * + * Example: in a 3D system with \p halo = 1, the configurations along dimension 0 are 8: + * 1. z axis - face: + * 1. y axis - top row: corner element (8 neighbors), edge element (12 neighbors) + * 2 y axis - inner row: edge element (12 neighbors), face inner element (18 neighbors) + * 2. z axis - inner slab: + * 1. y axis - top row: edge element (12 neighbors), face inner element (18 neighbors) + * 2 y axis - inner row: face inner element (18 neighbors), inner element (27 neighbors) + * + * @param[in] halo halo size + * @param[out] config_neighbors the storage object for each configuration + */ + static void compute_dim0_neighbors( + size_t halo, + NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > > & config_neighbors + ) { + using it_type = typename NDimVector< SizeType, SizeType, + DynamicVectorStorage< SizeType > >::DomainIterator; + it_type end = config_neighbors.domain_end(); + for( it_type it = config_neighbors.domain_begin(); it != end; ++it ) { + size_t res = 1; + for( size_t h : it->get_position() ) + res *= ( h + 1 + halo ); + config_neighbors.at( it->get_position() ) = res; + } + } + + /** + * Initializes the search space of neighbors for the -> translation. + * + * This function populates an std::vector<> with the number of neighors for each dimension + * and each configuration (corner, edge, face, inner). + * Along each dimension \a d, it stores an \a n -dimensional vector + * NDimVector> (n = 2 ^ d) with all + * possible numbers of neighbors along that dimension, depending on the position of the element + * (corner, edge, face, inner volume); for example, for 3 dimensions: + * - dimension 2 (z axis) moves along "slabs" of a 3D systems, where the total number of neighbors + * depends on whether the slab is a face of the mesh of an internal slab (2 possible configurations: + * face slabs or inner slabs) + * - dimension 1 (y axis) moves along "rows" within each slab, whose total number of neighbors + * depends on whether the row is at the extreme sides (top or bottom of the face) or inside; + * in turn, each type of slab has different geometry (face slabs comprise mesh corners, edges and + * faces, while inner slabs comprise edges, faces and inner elements), thus resulting in + * 2*2 different configurations of dimension-1 total neighbors + * - dimension 0 (x axis) moves along "column" elements within each row, where the first (or last) + * column has a different number of neighbors than the inner ones; here again are two configuration + * for each dimension-1 configuration, leading to a total of 8 dimension-1 configurations + * Within each dimension \a d, each configuration (as per the above explanation) can be identified + * via a vector of N - d coordinates; to limit the data storage, every dimension stores the + * total number of neighbors only at the first side and inside, since the second side is identical + * to the first one: for example, along the z axis the first and last slab (those on the two extremes) + * have the same size, and one only is stored. Therefore, with halo = 1 a vector identifying + * a configuration is composed only of 0s and 1s. For example, the vector [0,1,0] identifies: + * - rightmost 0 (z axis): first (or last) slab, i.e. face slab + * - (middle) 1 (y axis): inner row + * - leftmost 0 (x axis): first (or last) element, i.e. on the edge of the mesh + * In a 3D space with halo = 1, this element has 12 neighbors (it is on the edge of a face). + * + * @paragraph[in] vector of sizes sizes of the N-dimensional system + * @param[in] halo halo size + * @param[out] dimension_limits the std::vector<> with the neighbors information for each dimension + * and each configuration + * @return size_t the number of neighbors of the entire system + */ + static size_t init_neigh_to_base_search( + typename LinearizedNDimSystem< SizeType, ArrayVectorStorage< DIMS, SizeType > + >::ConstVectorReference sizes, + size_t halo, + std::vector< NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > > > & dimension_limits + ) { + using nd_vec = NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > >; + using nd_vec_iterator = typename nd_vec::DomainIterator; + + std::vector< size_t > halo_sizes( DIMS, halo + 1 ); + dimension_limits.emplace_back( halo_sizes ); + // initialize values + compute_dim0_neighbors( halo, dimension_limits[ 0 ] ); + for( size_t i = 1; i < DIMS; i++ ) { + std::vector< size_t > halos( DIMS - i, halo + 1 ); + dimension_limits.emplace_back( halos ); + } + + std::array< SizeType, DIMS > prev_coords_buffer; // store at most DIMS values + SizeType * const prev_coords = prev_coords_buffer.data(); + SizeType * const second = prev_coords + 1; // store previous coordinates from second position + for( size_t dimension = 1; dimension < DIMS; dimension++ ) { + const nd_vec & prev_neighs { dimension_limits[ dimension - 1 ] }; + nd_vec & current_neighs { dimension_limits[ dimension ] }; + + nd_vec_iterator end = current_neighs.domain_end(); + for( nd_vec_iterator it = current_neighs.domain_begin(); it != end; ++it ) { + typename nd_vec::ConstDomainVectorReference current_halo_coords = it->get_position(); + + std::copy( it->get_position().cbegin(), it->get_position().cend(), second ); + size_t local_size = sizes[ dimension - 1 ]; + const size_t neighs = accumulate_dimension_neighbours( prev_neighs, + prev_coords, halo, local_size ); + current_neighs.at( current_halo_coords ) = neighs; + } + } + return accumulate_dimension_neighbours( dimension_limits[ DIMS - 1 ], + prev_coords, halo, sizes.back() ); + } + + /** + * For the given system (with sizes \p _system_sizes), the given halo size \p halo, + * the given element's coordinates \p element_coordinates, computes the coordinates + * of the first neighbor of \p element_coordinates into \p neighbors_start (within the main system) + * and the range of neighbors of \p element_coordinates, i.e. the sub-space of neighbors of + * \p element_coordinates; hence, \p neighbors_range stores at most 2 *<\em> \p halo + * + 1 per coordinate. + * + * @param[in] _system_sizes sizes of the N-dimensional system + * @param[in] halo halo size + * @param[in] element_coordinates coordinates of the considered element + * @param[out] neighbors_start stores the (absolute) coordinates of the first neighbor + * of \p element_coordinates + * @param[out] neighbors_range stores the range of neighbors around \p element_coordinates + */ + static void compute_first_neigh_and_range( + const ArrayVectorStorage< DIMS, SizeType > & _system_sizes, + const SizeType halo, + const ArrayVectorStorage< DIMS, SizeType > & element_coordinates, + ArrayVectorStorage< DIMS, SizeType > & neighbors_start, + ArrayVectorStorage< DIMS, SizeType > & neighbors_range + ) { + for( SizeType i = 0; i < DIMS /* - 1*/; i++ ) { + const SizeType start = element_coordinates[ i ] <= halo ? 0 : + element_coordinates[ i ] - halo; + const SizeType end = std::min( element_coordinates[ i ] + halo, _system_sizes[ i ] - 1 ); + neighbors_start[ i ] = start; + neighbors_range[ i ] = end - start + 1; + } + } + +#ifdef _DEBUG + template< typename IterType > + static std::ostream & print_sequence( IterType begin, IterType end ) { + for( ; begin != end; ++begin ) { + std::cout << *begin << ' '; + } + return std::cout; + } +#endif + + /** + * Maps a neighbor's linear coordinate \p neighbor_linear to the element \p element_vector it is + * neighbor of and also returns the neighbor index of \p neighbor_linear within the sub-space + * of \p element_vector's neighbors. + * + * @param[in] sizes main system sizes along all dimensions + * @param[in] system_size total size of the neighbors system, i.e. the total number of neighbors + * @param[in] neighbors_per_dimension along each dimension \a d, it stores an \a n -dimensional vector + * NDimVector> (n = 2 ^ d) with all + * possible numbers of neighbors along that dimension, depending on the position of the element + * (corner, edge, face, inner volume) + * @param[in] halo halo size + * @param[in] neighbor_linear linear coordinate of the neighbor + * @param[out] element_vector coordinates vector representing the element \p neighbor_linear is + * neighbor of + * @return size_t the index of the neighbor within the element's neighbors + */ + static size_t map_neigh_to_base_and_index( + const std::array< SizeType, DIMS > & sizes, + size_t system_size, + const std::vector< NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > > + > & neighbors_per_dimension, + SizeType halo, + SizeType neighbor_linear, + ArrayVectorStorage< DIMS, SizeType > & element_vector + ) { + if( neighbor_linear > system_size ) { + throw std::invalid_argument( "neighbor number ( " + std::to_string( neighbor_linear ) + + " ) >= system size ( " + std::to_string( system_size ) + " )" ); + } + ArrayVectorStorage< DIMS, SizeType > configuration( DIMS ); +#ifdef _DEBUG + size_t * const halo_coords_end = configuration.data() + DIMS; +#endif + std::fill_n( configuration.begin(), DIMS, 0 ); + + for( size_t _dim = DIMS; _dim > 0; _dim-- ) { + // each iteration looks for the base element along a dimension via the number of neighbors + // each element has: once previous_neighs reaches neighbor_linear, the corresponding + // base element is found; if the control reaches the end, this means it must explore + // the following dimension to find the base element: this is why dimensions are explored + // starting from the highest, because moving along a higher dimension means "skipping" + // more neighbors; then the search "zooms in"to a smaller dimension to find the base element + + // start from highest dimension + const size_t dimension = _dim - 1; + // how many elements along this dimension + const size_t dimension_size = sizes[ dimension ]; + // configurations of neighbors along this dimension + // (e.g., corner, edge; or edge, inner element) + const NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > > & neighbors = + neighbors_per_dimension[ dimension ]; + + // coordinate to modify to identify each configuration + SizeType * const halo_coords_begin = configuration.data() + dimension; +#ifdef _DEBUG + std::cout << "DIMENSION " << dimension << std::endl << "- setup - neighbour " + << neighbor_linear << std::endl << "\thalo : "; + print_sequence( halo_coords_begin, halo_coords_end ) << std::endl; +#endif + size_t h = 0; // configuration type along this dimension + size_t previous_neighs = 0; + *halo_coords_begin = h; + // account for neighbors in the first elements along the dimension, within halo distance: + // these elements have a number of neighbors that depends on the distance h + // and on the configuration + size_t halo_max_neighs = neighbors.at( halo_coords_begin ); + while( h < halo && neighbor_linear >= previous_neighs + halo_max_neighs ) { + h++; + *halo_coords_begin = h; + previous_neighs += halo_max_neighs; + halo_max_neighs = neighbors.at( halo_coords_begin ); + } +#ifdef _DEBUG + std::cout << "- initial halo - neighbour " << neighbor_linear + << std::endl << "\th " << h << std::endl << "\thalo : "; + print_sequence( halo_coords_begin, halo_coords_end ) << std::endl; + std::cout << "\thalo_max_neighs " << halo_max_neighs << std::endl; +#endif + if( h < halo ) { + // we have already counted enough neighbors: neighbor_linear is thus a neighbor + // of one of the first (< halo) elements along this dimension: go to next dimension + element_vector[ dimension ] = h; + neighbor_linear -= previous_neighs; +#ifdef _DEBUG + std::cout << "end neighbour " << neighbor_linear << std::endl; +#endif + continue; + } + // saturation occurred: the base element is beyond the halo: go on with the search + + // inner elements have the same number of neighbors halo_max_neighs: compute + // the base element via division + const size_t distance_from_halo = ( neighbor_linear - previous_neighs ) / halo_max_neighs; +#ifdef _DEBUG + std::cout << "- before middle elements - neighbour " << neighbor_linear << std::endl + << "\tprevious_neighs " << previous_neighs << std::endl + << "\thalo_max_neighs " << halo_max_neighs << std::endl + << "\tdistance_from_halo " << distance_from_halo << std::endl + << "\tdimension_size " << dimension_size << std::endl; +#endif + if( distance_from_halo < dimension_size - 2 * halo ) { + // the base element is one of the internal elements along this dimension: + // hence return its diatance from the halo + the halo itself (= distance from + // beginning of the space) + element_vector[ dimension ] = distance_from_halo + halo; + neighbor_linear -= ( previous_neighs + distance_from_halo * halo_max_neighs ); +#ifdef _DEBUG + std::cout << "end neighbour " << neighbor_linear << std::endl; +#endif + continue; + } + // base element is even beyond inner elements, it might be among the elements at the end, + // which also have different numbers of neighbors (specular to initial elements) + previous_neighs += ( dimension_size - 2 * halo ) * halo_max_neighs; +#ifdef _DEBUG + std::cout << "- after middle elements -neighbour " << neighbor_linear << std::endl; + std::cout << "\tprevious_neighs " << previous_neighs << std::endl; + std::cout << "\thalo_max_neighs " << halo_max_neighs << std::endl; +#endif + // look for base the element at the end of the dimension: specular search to beginning, + // just with h decreasing + h = halo - 1; + *halo_coords_begin = h; + halo_max_neighs = neighbors.at( halo_coords_begin ); + while( h > 0 && neighbor_linear >= previous_neighs + halo_max_neighs ) { + h--; + *halo_coords_begin = h; + previous_neighs += halo_max_neighs; + halo_max_neighs = neighbors.at( halo_coords_begin ); + } + neighbor_linear -= previous_neighs; +#ifdef _DEBUG + std::cout << "- final halo - neighbour " << neighbor_linear << std::endl; + std::cout << "\tadding h " << h << " previous_neighs " << previous_neighs << std::endl; +#endif + // ( dimension_size - 1 ) because coordinates are 0-based and neighbor + // is "inside" range [ previous_neighs, previous_neighs + halo_max_neighs ] + element_vector[ dimension ] = dimension_size - 1 - h; +#ifdef _DEBUG + std::cout << "end neighbour " << neighbor_linear << std::endl; +#endif + } + return neighbor_linear; + } + }; + + } // namespace multigrid + } // namespace utils +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_MULTIGRID_LINEARIZED_HALO_NDIM_SYSTEM diff --git a/include/graphblas/utils/multigrid/linearized_ndim_iterator.hpp b/include/graphblas/utils/multigrid/linearized_ndim_iterator.hpp new file mode 100644 index 000000000..a4ae8af5e --- /dev/null +++ b/include/graphblas/utils/multigrid/linearized_ndim_iterator.hpp @@ -0,0 +1,241 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file linearized_ndim_iterator.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Definition of LinearizedNDimIterator. + */ + +#ifndef _H_GRB_ALGORITHMS_MULTIGRID_NDIM_ITERATOR +#define _H_GRB_ALGORITHMS_MULTIGRID_NDIM_ITERATOR + +#include +#include +#include +#include +#include + +#include + +#include "array_vector_storage.hpp" + +namespace grb { + namespace utils { + namespace multigrid { + + // forward declaration for default + template< + typename SizeType, + typename InternalVectorType + > class LinearizedNDimSystem; + + /** + * Iterator object couled to a LinearizedNDimSystem: each object points to a vector + * in the creating LinearizedNDimSystem#dimensions()-dimensions space, to which also a + * linear position is associated; both the vector and the linear position can be retrieved + * via the \a -> method. + * + * It meets the requirements of a random access iterator. + * + * @tparam SizeType integral type to store the size of each dimension + * @tparam InternalStorageType internal vector type to store the sizes + */ + template< + typename SizeType, + typename InternalVectorType + > class LinearizedNDimIterator { + public: + using VectorType = InternalVectorType; + using LinNDimSysType = LinearizedNDimSystem< SizeType, VectorType >; + using ConstVectorReference = const VectorType &; + using SelfType = LinearizedNDimIterator< SizeType, InternalVectorType >; + + /** + * Structure describing a couple vector/linear coordinate: the vector + * can be obtained via #get_position() while the linear coordinate via + * #get_linear_position(). + */ + struct NDimPoint { + private: + const LinNDimSysType * system; // pointer because of copy assignment + VectorType coords; + + public: + friend SelfType; + + NDimPoint() = delete; + + NDimPoint( const NDimPoint & ) = default; + + NDimPoint( NDimPoint && ) = delete; + + NDimPoint( const LinNDimSysType & _system ) noexcept : + system( &_system ), + coords( _system.dimensions() ) + { + std::fill_n( this->coords.begin(), _system.dimensions(), 0 ); + } + + NDimPoint & operator=( const NDimPoint & ) = default; + + inline ConstVectorReference get_position() const { + return coords; + } + + size_t get_linear_position() const { + return system->ndim_to_linear( coords ); + } + }; + + // interface for std::random_access_iterator + using iterator_category = std::random_access_iterator_tag; + using value_type = NDimPoint; + using pointer = const value_type *; + using reference = const value_type &; + using difference_type = signed long; + + /** + * Construct a new LinearizedNDimIterator object from the original LinNDimSysType + * object, storing the information about system dimensionality and sizes. The referenced + * vector is the first one in the system, i.e. with all coordinates being \a 0. + * + * If \p _system is not a valid object anymore, all iterators created from it are also + * not valid. + */ + LinearizedNDimIterator( const LinNDimSysType & _system ) noexcept : _p( _system ) {} + + /** + * Construct a new LinearizedNDimIterator object from the original LinNDimSysType + * object, storing the information about system dimensionality and sizes. The referenced + * vector is initialized with the coordinates referenced via the iterator \p begin, + * which should have at least \p _system.dimensions() valid successors. + * + * If \p _system is not a valid object anymore, all iterators created from it are also + * not valid. + */ + template< typename IterT > LinearizedNDimIterator( + const LinNDimSysType & _system, + IterT begin + ) noexcept : + _p( _system ) + { + std::copy_n( begin, _system.dimensions(), this->_p.coords.begin() ); + } + + LinearizedNDimIterator() = delete; + + LinearizedNDimIterator( const SelfType & original ) : _p( original._p ) {} + + SelfType & operator=( const SelfType & original ) = default; + + ~LinearizedNDimIterator() {} + + /** + * Moves to the next vector in the multi-dimensional space, corresponding to + * advancing the linear coordinate by 1. + */ + SelfType & operator++() noexcept { + bool rewind = true; + // rewind only the first N-1 coordinates + for( size_t i = 0; i < this->_p.system->dimensions() - 1 && rewind; i++ ) { + SizeType & coord = this->_p.coords[ i ]; + // must rewind dimension if we wrap-around + SizeType plus = coord + 1; + rewind = plus >= this->_p.system->get_sizes()[ i ]; + coord = rewind ? 0 : plus; + } + // if we still have to rewind, increment the last coordinate, which is unbounded + if( rewind ) { + this->_p.coords[ this->_p.system->dimensions() - 1 ]++; + } + return *this; + } + + /** + * Moves \p _offset vectors ahead in the multi-dimensional space, corresponding to + * advancing the linear coordinate by \p _offset. + * + * If the destination vector is outside of the system (i.e. the corresponding + * linear coordinate is beyond the underlying LinearizedNDimSystem#system_size()), + * an exception is thrown. + */ + SelfType & operator+=( size_t offset ) { + size_t linear = _p.get_linear_position() + offset; + if( linear > _p.system->system_size() ) { + throw std::invalid_argument( "increment is too large" ); + } + if( offset == 1 ) { + return operator++(); + } + _p.system->linear_to_ndim( linear, _p.coords ); + return *this; + } + + /** + * Returns the difference between \p _other and \c this in the linear space. + * + * It throws if the result cannot be stored as a difference_type variable. + */ + difference_type operator-( const SelfType & other ) const { + return grb::utils::compute_signed_distance< difference_type, SizeType >( + _p.get_linear_position(), other._p.get_linear_position() ); + } + + reference operator*() const { + return this->_p; + } + + pointer operator->() const { + return &( this->_p ); + } + + bool operator!=( const SelfType & o ) const { + const size_t dims = this->_p.system->dimensions(); + if( dims != o._p.system->dimensions() ) { + throw std::invalid_argument( "system sizes do not match" ); + } + bool equal = true; + for( size_t i = 0; i < dims && equal; i++ ) { + equal &= ( this->_p.coords[ i ] == o._p.coords[ i ] ); + } + return ! equal; + } + + /** + * Facility to build an end iterator. + * + * Its implementation depending on the logic in operator++. + */ + static SelfType make_system_end_iterator( const LinNDimSysType & _system ) { + // fill with 0s + SelfType iter( _system ); + size_t last = iter->system->dimensions() - 1; + // store last size in last position + iter._p.coords[ last ] = iter->system->get_sizes()[ last ]; + return iter; + } + + private: + NDimPoint _p; + }; + + } // namespace multigrid + } // namespace utils +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_MULTIGRID_NDIM_ITERATOR diff --git a/include/graphblas/utils/multigrid/linearized_ndim_system.hpp b/include/graphblas/utils/multigrid/linearized_ndim_system.hpp new file mode 100644 index 000000000..c4b62707a --- /dev/null +++ b/include/graphblas/utils/multigrid/linearized_ndim_system.hpp @@ -0,0 +1,287 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file linearized_ndim_system.cpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Definition of \p LinearizedNDimSystem. + */ + +#ifndef _H_GRB_ALGORITHMS_MULTIGRID_NDIM_SYSTEM_LINEARIZER +#define _H_GRB_ALGORITHMS_MULTIGRID_NDIM_SYSTEM_LINEARIZER + +#include +#include +#include +#include +#include +#include +#include + +#include "linearized_ndim_iterator.hpp" +#include "ndim_system.hpp" + +namespace grb { + namespace utils { + namespace multigrid { + + /** + * Extends an NDimSystem by linearizing it, i.e. it provides facilities to map a vector in + * NDimSystem#dimensions() dimensions to a linear value ranging from \a 0 to #system_size() (excluded) + * and vice versa. Such a linearized representation allows user logic to iterate over the system: + * iterators are indeed available via #begin()/#end(). Consecutive system elements along dimension 0 + * are mapped to consecutive linear values, while elements consecutive along dimension 1 + * are mapped at offset #get_offsets()[1] = #get_sizes()[0], elements along dimension 2 + * are mapped at offset #get_offsets()[2] = #get_sizes()[0] * #get_sizes()[0], and so on. + * + * Further facilities are methods to map users' vectors from linear to NDimSystem#dimensions()-dimensional + * or vice versa and also to "retaget" the system, i.e. to represent a system of same dimensionality + * but different sizes; this last feature is a mere performance optimization aimed at + * reusing existing objects instead of deleting them and allocating new memory. + * + * @tparam SizeType integral type to store the size of each dimension + * @tparam InternalStorageType internal vector type to store the sizes + */ + template< + typename SizeType, + typename InternalVectorType + > class LinearizedNDimSystem : public NDimSystem< SizeType, InternalVectorType > { + public: + static_assert( std::is_integral< SizeType >::value, "SizeType must be an integral type" ); + + using BaseType = NDimSystem< SizeType, InternalVectorType >; + using SelfType = LinearizedNDimSystem< SizeType, InternalVectorType >; + using VectorType = typename BaseType::VectorType; + using VectorReference = typename BaseType::VectorReference; + using ConstVectorReference = typename BaseType::ConstVectorReference; + using VectorStorageType = typename VectorType::VectorStorageType; + using ConstVectorStorageType = typename VectorType::ConstVectorStorageType; + using Iterator = LinearizedNDimIterator< SizeType, InternalVectorType >; + + /** + * Construct a new LinearizedNDimSystem object from an iterable range, + * where each iterator's position stores the size along each dimension; example: + * *begin is the size along dimension 0, *(++begin) is the size along dimension 1 ... + */ + template< typename IterT > + LinearizedNDimSystem( + IterT begin, + IterT end + ) noexcept : + BaseType( begin, end ), + _offsets( std::distance( begin, end ) ) + { + this->_system_size = compute_range_product( begin, end, this->_offsets.begin() ); + } + + /** + * Construct a new LinearizedNDimSystem object with dimensions \p _sizes.size() + * and sizes stored in \p _sizes. + */ + LinearizedNDimSystem( const std::vector< size_t > & _sizes ) noexcept : + LinearizedNDimSystem( _sizes.cbegin(), _sizes.cend() ) {} + + /** + * Construct a new LinearizedNDimSystem object with \p _dimensions dimensions + * and sizes all equal to \p max_value. + */ + LinearizedNDimSystem( + size_t _dimensions, + size_t _size + ) noexcept : + BaseType( _dimensions, _size ), + _offsets( _dimensions ), + _system_size( _dimensions ) + { + SizeType v = 1; + for( size_t i = 0; i < _dimensions; i++ ) { + this->_offsets[ i ] = v; + v *= _size; + } + this->_system_size = v; + } + + LinearizedNDimSystem() = delete; + + LinearizedNDimSystem( const SelfType & original ) = default; + + LinearizedNDimSystem( SelfType && original ) noexcept : + BaseType( std::move( original ) ), + _offsets( std::move( original._offsets ) ), + _system_size( original._system_size ) + { + original._system_size = 0; + } + + ~LinearizedNDimSystem() {} + + SelfType & operator=( const SelfType & ) = default; + + SelfType & operator=( SelfType && original ) = delete; + + /** + * Computes the size of the system, i.e. its number of elements; + * this corresponds to the product of the sizes along all dimensions. + */ + inline size_t system_size() const { + return this->_system_size; + } + + /** + * Get the offsets of the system, i.e. by how many linear elements moving along + * a dimension corresponds to. + */ + inline ConstVectorReference get_offsets() const { + return this->_offsets; + } + + /** + * Computes the #dimensions()-dimensions vector the linear value in input corresponds to. + * + * @param[in] linear linear index + * @param[out] output output vector \p linear corresponds to + */ + void linear_to_ndim( + size_t linear, + VectorReference output + ) const { + if( linear > this->_system_size ) { + throw std::range_error( "linear value beyond system" ); + } + for( size_t _i = this->_offsets.dimensions(); _i > 0; _i-- ) { + const size_t dim = _i - 1; + const size_t coord = linear / this->_offsets[ dim ]; + output[ dim ] = coord; + linear -= ( coord * this->_offsets[ dim ] ); + } + assert( linear == 0 ); + } + + /** + * Computes the linear value the input vector corresponds to; this method takes in input + * a const reference to \p InternalVectorType and checks whether each value in the input + * vector \p ndim_vector is within the system sizes (otherwise it throws). + */ + size_t ndim_to_linear_check( ConstVectorReference ndim_vector ) const { + return this->ndim_to_linear_check( ndim_vector.storage() ); + } + + /** + * Computes the linear value the input vector corresponds to; this method takes in input + * a const reference to the underlying storage of \p InternalVectorType and checks + * whether each value in the input vector \p ndim_vector is within the system sizes + * (otherwise it throws). + */ + size_t ndim_to_linear_check( ConstVectorStorageType ndim_vector ) const { + size_t linear = 0; + for( size_t i = 0; i < this->dimensions(); i++ ) { + if( ndim_vector[ i ] >= this->get_sizes()[ i ] ) { + throw std::invalid_argument( "input vector beyond system sizes" ); + } + } + return ndim_to_linear( ndim_vector ); + } + + /** + * Computes the linear value the input vector corresponds to; this method takes in input + * a const reference to \p InternalVectorType but does not check whether each value in the input + * vector \p ndim_vector is within the system sizes. + */ + size_t ndim_to_linear( ConstVectorReference ndim_vector ) const { + return this->ndim_to_linear( ndim_vector.storage() ); + } + + /** + * Computes the linear value the input vector corresponds to; this method takes in input + * a const reference to the underlying storage of \p InternalVectorType but does not check + * whether each value in the input vector \p ndim_vector is within the system sizes. + */ + size_t ndim_to_linear( ConstVectorStorageType ndim_vector ) const { + size_t linear = 0; + for( size_t i = 0; i < this->dimensions(); i++ ) { + linear += this->_offsets[ i ] * ndim_vector[ i ]; + } + return linear; + } + + // must be same dimensionality + /** + * Retargets the current object to describe a system with the same number of dimensions + * and sizes \p _new_sizes. If the number of dimensions of \p _new_sizes does not match + * #dimensions(), an exception is thrown. + */ + void retarget( ConstVectorReference _new_sizes ) { + if( _new_sizes.dimensions() != this->_sizes.dimensions() ) { + throw std::invalid_argument( "new system must have same dimensions as previous: new " + + std::to_string( _new_sizes.dimensions() ) + ", old " + + std::to_string( this->_sizes.dimensions() ) ); + } + this->_sizes = _new_sizes; // copy + this->_system_size = compute_range_product( _new_sizes.begin(), _new_sizes.end(), + this->_offsets.begin() ); + } + + /** + * Returns a beginning iterator to the #dimensions()-dimensional system \c this describes. + * The provided iterator references a system point, described both via its #dimensions()-dimensional + * coordinates and via a linear value from \a 0 to #system_size() (excluded). + */ + Iterator begin() const { + return Iterator( *this ); + } + + /** + * Return an iterator to the end of the system; this iterator should not be + * referenced nor incremented. + */ + Iterator end() const { + return Iterator::make_system_end_iterator( *this ); + } + + private: + VectorType _offsets; + size_t _system_size; + + /** + * Incrementally computes the product of the input iterator's range, storing each value + * into the position pointed to the output iterator; the accumulation starts from 1 + * (also the first output values), and the last accumulated value is returned directly + * (and not stored). This assumes that the output container can store at least as many values + * as in the input range. + */ + template< + typename IterIn, + typename IterOut + > static size_t compute_range_product( + IterIn in_begin, + IterIn in_end, + IterOut out_begin + ) { + size_t prod = 1; + for( ; in_begin != in_end; ++in_begin, ++out_begin ) { + *out_begin = prod; + prod *= *in_begin; + } + return prod; + } + }; + + } // namespace multigrid + } // namespace utils +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_MULTIGRID_NDIM_SYSTEM_LINEARIZER diff --git a/include/graphblas/utils/multigrid/ndim_system.hpp b/include/graphblas/utils/multigrid/ndim_system.hpp new file mode 100644 index 000000000..5df62ace2 --- /dev/null +++ b/include/graphblas/utils/multigrid/ndim_system.hpp @@ -0,0 +1,118 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file ndim_system.cpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Definition of NDimSystem. + */ + +#ifndef _H_GRB_ALGORITHMS_MULTIGRID_NDIM_SYSTEM +#define _H_GRB_ALGORITHMS_MULTIGRID_NDIM_SYSTEM + +#include +#include +#include +#include + +namespace grb { + namespace utils { + namespace multigrid { + + /** + * Describes a #dimensions()-dimensional system by storing its size along each dimension. + * + * It is meant to represent a grid of #dimensions() dimensions and size #get_sizes()[d] + * for each dimension \a d in the interval [0, #dimensions())<\em>. + * + * @tparam SizeType integral type to store the size of each dimension + * @tparam InternalStorageType internal vector type to store the sizes + */ + template< + typename SizeType, + typename InternalVectorType + > class NDimSystem { + public: + static_assert( std::is_integral< SizeType >::value, "SizeType must be an integral type" ); + + using VectorType = InternalVectorType; + using VectorReference = VectorType &; + using ConstVectorReference = const VectorType &; + using SelfType = NDimSystem< SizeType, InternalVectorType >; + + /** + * Construct a new NDimSystem object from an iterable range, where each referenced value + * is a size of the system. + * + * The dimension is computed as \a std::distance(begin,end), i.e. + * \p IterT should be a random-access iterator for performance. + * + * @tparam IterT iterator type + * @param begin range begin + * @param end end of range + */ + template< typename IterType > + NDimSystem( IterType begin, IterType end ) noexcept : _sizes( std::distance( begin, end ) ) { + std::copy( begin, end, this->_sizes.begin() ); + } + + /** + * Construct a new NDimSystem object from an std::vector<>, taking its values + * as system sizes and its length as number of dimensions. + */ + NDimSystem( const std::vector< size_t > & _sizes ) noexcept : + SelfType( _sizes.cbegin(), _sizes.cend() ) {} + + /** + * Construct a new NDimSystem object of dimensions \p dimensions + * and with all sizes initialized to \p max_size + */ + NDimSystem( size_t _dimensions, size_t max_size ) noexcept : _sizes( _dimensions ) { + std::fill_n( this->_sizes.begin(), _dimensions, max_size ); + } + + NDimSystem() = delete; + + NDimSystem( const SelfType & ) = default; + + NDimSystem( SelfType && ) = delete; + + SelfType & operator=( const SelfType & original ) = default; + + SelfType & operator=( SelfType && original ) = delete; + + inline size_t dimensions() const noexcept { + return _sizes.dimensions(); + } + + /** + * Get the sizes of the represented system as an iterable \p InternalStorageType + * object. + */ + inline ConstVectorReference get_sizes() const noexcept { + return this->_sizes; + } + + protected: + InternalVectorType _sizes; + }; + + } // namespace multigrid + } // namespace utils +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_MULTIGRID_NDIM_SYSTEM diff --git a/include/graphblas/utils/multigrid/ndim_vector.hpp b/include/graphblas/utils/multigrid/ndim_vector.hpp new file mode 100644 index 000000000..5a3ef4144 --- /dev/null +++ b/include/graphblas/utils/multigrid/ndim_vector.hpp @@ -0,0 +1,201 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file ndim_vector.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Definition of NDimVector. + */ + +#ifndef _H_GRB_ALGORITHMS_MULTIGRID_NDIM_VECTOR +#define _H_GRB_ALGORITHMS_MULTIGRID_NDIM_VECTOR + +#include +#include +#include +#include +#include + +#include "linearized_ndim_system.hpp" + +namespace grb { + namespace utils { + namespace multigrid { + + /** + * Maps an N-dimensional vector to an array of data. + * + * The user constructs an object by passing the sizes (as an N-dimensional vector) + * of the iteration space and accesses the stored data via an N-dimensional vector of coordinates. + * + * Example: if the user constructs an NDimVector with 3D sizes \a [2,3,4], she can access data + * via a 3D coordinates vector of ranges \a [0-1]x[0-2]x[0-3] (here \a x denoting the cartesian product) + * by using the #at() method. + * + * This facility allows associating a value of type \p DataType to, for example, + * each element of an N-dimensional grid. + * + * @tparam DataType type of data stored in the array + * @tparam SizeType type for the components of the N-dimensional vector: + * the maximum number of stored data is thus \f$ std::numeric_limits::max()^N \f$ + * @tparam InternalVectorType storage type of the internal N-dimensional vector + */ + template< + typename DataType, + typename SizeType, + typename InternalVectorType + > class NDimVector { + public: + static_assert( std::is_default_constructible< DataType >::value, + "the stored type is not default constructible" ); + static_assert( std::is_integral< SizeType >::value, "SizeType must be integral" ); + + using ConstDomainVectorReference = typename LinearizedNDimSystem< SizeType, + InternalVectorType >::ConstVectorReference; + using ConstDomainVectorStorageType = typename InternalVectorType::ConstVectorStorageType; + using DomainIterator = typename LinearizedNDimSystem< SizeType, InternalVectorType >::Iterator; + using Selftype = NDimVector< DataType, SizeType, InternalVectorType >; + + NDimVector() = delete; + + /** + * Construct a new NDimVector object with sizes read from the iteration range + * and number of dimensions equal to the range distance; the data values are + * \b not initialized. + */ + template< typename IterT > NDimVector( + IterT begin, + IterT end + ) : _linearizer( begin, end ) { + this->data = new DataType[ _linearizer.system_size() ]; + } + + /** + * Construct a new NDimVector object with sizes read from the \p _sizes + * and number of dimensions equal to \p _sizes.size(); the data values are + * \b not initialized. + */ + NDimVector( const std::vector< size_t > & _sizes ) : + NDimVector( _sizes.cbegin(), _sizes.cend() ) {} + + NDimVector( const Selftype & original ) : + _linearizer( original._linearizer ), + data( new DataType[ original.data_size() ] ) + { + std::copy_n( original.data, original.data_size(), this->data ); + } + + NDimVector( Selftype && original ) noexcept : + _linearizer( std::move( original._linearizer ) ) { + this->data = original.data; + original.data = nullptr; + } + + Selftype & operator=( const Selftype & original ) = delete; + + Selftype & operator=( Selftype && original ) = delete; + + ~NDimVector() { + this->clean_mem(); + } + + /** + * Number of dimensions of the underlying geometrical space. + */ + size_t dimensions() const { + return this->_linearizer.dimensions(); + } + + /** + * Size of the the underlying geometrical space, i.e. number of stored data elements. + */ + size_t data_size() const { + return this->_linearizer.system_size(); + } + + /** + * Access the data element at N-dimension coordinate given by the iterable + * \p coordinates. + */ + inline DataType & at( ConstDomainVectorReference coordinates ) { + return this->data[ this->get_coordinate( coordinates.storage() ) ]; + } + + /** + * Const-access the data element at N-dimension coordinate given by the iterable + * \p coordinates. + */ + inline const DataType & at( ConstDomainVectorReference coordinates ) const { + return this->data[ this->get_coordinate( coordinates.storage() ) ]; + } + + /** + * Access the data element at N-dimension coordinate given by the vector + * storage object \p coordinates. + */ + inline DataType & at( ConstDomainVectorStorageType coordinates ) { + return this->data[ this->get_coordinate( coordinates ) ]; + } + + /** + * Const-access the data element at N-dimension coordinate given by the vector + * storage object \p coordinates. + */ + inline const DataType & at( ConstDomainVectorStorageType coordinates ) const { + return this->data[ this->get_coordinate( coordinates ) ]; + } + + /** + * Returns an iterator to the beginning of the N-dimensional underlyign space, + * i.e. a vector \a [0,0,0,...,0]. + */ + DomainIterator domain_begin() const { + return this->_linearizer.begin(); + } + + /** + * Returns an iterator to the end of the N-dimensional underlyign space. + * This iterator should not be referenced nor incremented. + */ + DomainIterator domain_end() const { + return this->_linearizer.end(); + } + + private: + const LinearizedNDimSystem< SizeType, InternalVectorType > _linearizer; + DataType * data; + + inline size_t get_coordinate( ConstDomainVectorStorageType coordinates ) const { + return this->_linearizer.ndim_to_linear( coordinates ); + } + + inline size_t get_coordinate( DomainIterator coordinates ) const { + return this->_linearizer.ndim_to_linear( coordinates ); + } + + void clean_mem() { + if( this->data == nullptr ) { + delete[] this->data; + } + } + }; + + } // namespace multigrid + } // namespace utils +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_MULTIGRID_NDIM_VECTOR diff --git a/include/graphblas/utils/telemetry/CSVWriter.hpp b/include/graphblas/utils/telemetry/CSVWriter.hpp new file mode 100644 index 000000000..d92d5efd1 --- /dev/null +++ b/include/graphblas/utils/telemetry/CSVWriter.hpp @@ -0,0 +1,354 @@ + +/* + * Copyright 2023 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file CSVWriter.hpp + * @author Alberto Scolari (alberto.scolar@huawei.com) + * + * Definition for the CSVWriter class. + */ + +#ifndef _H_GRB_UTILS_TELEMETRY_CSV_WRITER +#define _H_GRB_UTILS_TELEMETRY_CSV_WRITER + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "TelemetryBase.hpp" + +namespace grb { + namespace utils { + namespace telemetry { + + /// standard CSV separator + static constexpr char STD_CSV_SEP = ','; + + template< class U, class... Us > + struct __is_csv_printable { + static constexpr bool value = std::is_arithmetic< U >::value; + }; + + template< class U1, class U2, class... Us > + struct __is_csv_printable< U1, U2, Us... > { + static constexpr bool value = __is_csv_printable< U1 >::value + && __is_csv_printable< U2, Us... >::value; + }; + + /** + * Class to store numerical information in form of lines and emit it as a CSV, with + * heading, field separator and newlines. + * + * The user should add an line at once via #add_line( UTypes && ) and can + * then output it to an \a std::ostream or a file, together with the + * heading specified at construction. The output is a fully compliant CSV file + * that can be read by common tools like spreadsheets and parsers (e.g. Pandas, + * https://pandas.pydata.org/). This class allows easily emitting telemetry + * information and importing them into advanced tools for thourough analysis. + * + * This implementation assumes telemetry is enabled, since a specialization for + * disabled telemetry follows. + * It internally allocates memory dynamically to store the lines. + * Only numerical information can be stored. + * + * @tparam TelControllerType type for the telemetry controller + * @tparam enabled whether telemetry is enabled + * @tparam T1 numerical type of the first field to store (at least one is required) + * @tparam Ts numerical types of the following fields to store + */ + template< + typename TelControllerType, + bool enabled, + class T1, + class... Ts + > class CSVWriter : + public TelemetryBase< TelControllerType, enabled > { + public: + static_assert( __is_csv_printable< T1, Ts... >::value, "not all types are printable" ); + + using self_t = CSVWriter< TelControllerType, enabled, T1, Ts... >; + + using base_t = TelemetryBase< TelControllerType, enabled >; + + class CSVLastTuple { + public: + CSVLastTuple( const self_t & _csv ) : csv( _csv ) {} + + CSVLastTuple( const CSVLastTuple & clt ) : csv( clt.csv ) {} + + inline friend std::ostream & operator<<( std::ostream & stream, const CSVLastTuple & t ) { + return t.csv.write_last_line_to_stream( stream ); + } + + private: + const self_t & csv; + }; + + CSVWriter() = delete; + + /** + * Full constructor for a CSVWriter. + * + * @param tt telemetry controller + * @param _headers CSV headers, whose number must match the number of T types to print + * @param _separator field separator for printing + * @param size hint size for initial memory allocation (dynamic allocation may occur anyway) + */ + CSVWriter( + const TelControllerType & tt, + std::initializer_list< const char * > _headers, + char _separator, + size_t size + ) : + base_t( tt ), + separator( _separator ) + { + if( _headers.size() != NUM_FIELDS ) { + throw std::runtime_error( "wrong number of headers, it must match the unmber of line elements" ); + } + // emplace anyway, so that the object is always in a consistent state and can be + // activated/deactivated at runtime + for( const auto & h : _headers ) { + headers.emplace_back( h ); + } + if( ! tt.is_active() ) { + return; + } + lines.reserve( size ); + // zero to force physical allocation + // std::memset( reinterpret_cast< void * >( lines.data() ), 0, lines.size() * sizeof( tuple_t ) ); + } + + /** + * Construct a new CSVWriter object assuming a comma separator and an initial + * amount of lines to store. + */ + CSVWriter( + const TelControllerType & tt, + std::initializer_list< const char * > _headers + ) : CSVWriter( tt, _headers, STD_CSV_SEP, 10 ) {} + + CSVWriter( const self_t & ) = delete; + + CSVWriter( self_t && ) = delete; + + self_t & operator=( const self_t & ) = delete; + + self_t & operator=( self_t && ) = delete; + + /** + * Add a line to the CSV, i.e., store the numerical information internally. + * + * @tparam UTypes information types whose number must match the number of fields in the CSV; + * these types must also be implicitly convertible to the corresponding T1, Ts... types + */ + template< class... UTypes > + void add_line( UTypes &&... vs ) { + if( this->is_active() ) { + lines.emplace_back( std::forward< UTypes >( vs )... ); + } + } + + /** + * Remove all lines from the CSV. + */ + void clear() { + lines.clear(); + } + + /** + * Emit the last line of the CSV into \p stream as actual text, i.e. with the fields separated. + * Does not print the newline. + * + * If there is no line stored, the behavior is undefined. + * + * @param stream stream to write into + * @return std::ostream& \p stream itself + */ + std::ostream & write_last_line_to_stream( std::ostream & stream ) const { + if( lines.size() > 0 && this->is_active() ) { + write_line( stream, lines.back() ); + } + return stream; + } + + /** + * Returns an object that can be streamed into an std::cout stream via the \a << operator + * in order to print the last line stored. + * + * If there is no line stored, the behavior is undefined. + */ + CSVLastTuple last_line() const { + if( lines.size() == 0 ) { + throw std::runtime_error( "no measures" ); + } + return CSVLastTuple( *this ); + } + + /** + * Write the entire CSV into \p stream, with heading (heading, separated fields with newline). + */ + std::ostream & write_to_stream( std::ostream & stream ) const { + if( ! this->is_active() ) { + return stream; + } + write_header( stream ); + stream << NEW_LINE; + for( const tuple_t & line : lines ) { + write_line( stream, line ); + stream << NEW_LINE; + } + return stream; + } + + /** + * Creates a new file named \p name (or overwrites an existing one) and stores the entire CSV + * into it. + */ + void write_to_file( const char * name ) const { + if( ! this->is_active() ) { + return; + } + std::ofstream file( name ); + if( ! file.is_open() ) { + throw std::runtime_error( "cannot open file" ); + } + write_to_stream( file ); + file.close(); + } + + private: + static constexpr char NEW_LINE = '\n'; + + static constexpr size_t NUM_FIELDS = sizeof...( Ts ) + 1; + + using tuple_t = std::tuple< T1, Ts... >; + + std::vector< std::string > headers; + const char separator; + std::vector< tuple_t > lines; + + std::ostream & write_header( std::ostream & stream ) const { + stream << headers[ 0 ]; + for( size_t i = 1; i < headers.size(); i++ ) { + stream << separator << headers[ i ]; + } + return stream; + } + + void write_line( std::ostream & stream, const tuple_t & line ) const { + write_val< 0 >( stream, line ); + } + + // recursive case + template< size_t OFFS > + inline void write_val( std::ostream & stream, typename std::enable_if < OFFS< NUM_FIELDS - 1, const tuple_t & >::type _tup ) const { + stream << std::get< OFFS >( _tup ) << separator; + write_val< OFFS + 1 >( stream, _tup ); // tail recursion + } + + // base case + template< size_t OFFS > + inline void write_val( std::ostream & stream, typename std::enable_if< OFFS == NUM_FIELDS - 1, const tuple_t & >::type _tup ) const { + (void)separator; + stream << std::get< OFFS >( _tup ); + } + }; + + /** + * Temaplate specialization that assumes disabled telemetry: no state is kept, + * operations produce no result when invoked (no output into streams, no file creation). + * + * @tparam TelControllerType + * @tparam T1 + * @tparam Ts + */ + template< + typename TelControllerType, + class T1, + class... Ts + > class CSVWriter< TelControllerType, false, T1, Ts... > : + public TelemetryBase< TelControllerType, false > { + public: + static_assert( __is_csv_printable< T1, Ts... >::value, "not all types are printable" ); + + using self_t = CSVWriter< TelControllerType, false, T1, Ts... >; + + using base_t = TelemetryBase< TelControllerType, false >; + + CSVWriter() = delete; + + CSVWriter( + const TelControllerType & tt, + std::initializer_list< const char * >, + char, + size_t + ) : base_t( tt ) {} + + CSVWriter( + const TelControllerType & tt, + std::initializer_list< const char * > _headers + ) : CSVWriter( tt, _headers, STD_CSV_SEP, 10 ) {} + + CSVWriter( const self_t & ) = delete; + + CSVWriter( self_t && ) = delete; + + self_t & operator=( const self_t & ) = delete; + + self_t & operator=( self_t && ) = delete; + + template< class... UTypes > void add_line( UTypes &&... ) { + static_assert( sizeof...( UTypes ) == sizeof...( Ts ) + 1 ); + } + + void clear() {} + + std::ostream & write_last_line_to_stream( std::ostream & stream ) const { + return stream; + } + + char last_line() const { + return '\0'; + } + + std::ostream & write_to_stream( std::ostream & stream ) const { + return stream; + } + + void write_to_file( const char * name ) const { + (void)name; + } + }; + + /** + * Implementation of CSVWriter for enabled telemetry, with implemented operations. + */ + template< class T1, class... Ts > + using StaticCSVWriter = CSVWriter< TelemetryControllerAlwaysOn, true, T1, Ts... >; + + } // namespace telemetry + } // namespace utils +} // namespace grb + +#endif // _H_GRB_UTILS_TELEMETRY_CSV_WRITER diff --git a/include/graphblas/utils/telemetry/OutputStream.hpp b/include/graphblas/utils/telemetry/OutputStream.hpp new file mode 100644 index 000000000..3d7c9fb1b --- /dev/null +++ b/include/graphblas/utils/telemetry/OutputStream.hpp @@ -0,0 +1,255 @@ + +/* + * Copyright 2023 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file OutputStream.hpp + * @author Alberto Scolari (alberto.scolar@huawei.com) + * + * Definition for the OutputStream class. + */ + +#ifndef _H_GRB_UTILS_TELEMETRY_OUTPUT_STREAM +#define _H_GRB_UTILS_TELEMETRY_OUTPUT_STREAM + +#include +#include +#include +#include + +#include "TelemetryBase.hpp" + +namespace grb { + namespace utils { + namespace telemetry { + + /** + * SFINAE-based class to check whether the type \p T can be input to an std::ostream + * via the \a << operator. + */ + template< typename T > struct is_ostream_input { + private: + + template< typename U > static constexpr bool is_input( + typename std::enable_if< std::is_same< + // this means that the expression std::cout << obj is valid, where obj is of type T + decltype( std::declval< std::ostream& >() << std::declval< U >() ), + std::ostream& >::value, nullptr_t >::type + ) { + return true; + } + + template< typename U > static constexpr bool is_input( ... ) { + return false; + } + + public: + static constexpr bool value = is_input< T >( nullptr ); + }; + + /** + * Telemetry-controllable output stream with basic interface, based on the \a << operator. + * + * It accepts in input any type \a std::ostream accepts. In addition, it also accepts + * the internl #OutputStreamLazy type, which marks callable objects and allows + * lazy evaluation of their result if the telemetry is active; if not, the object is + * not called, avoiding runtime costs. This functionality allows paying time and memory + * costs of computation only if really needed. + * + * @tparam TelControllerType type of the telemetry controller + * @tparam enabled whether telemetry is enabled for this type + */ + template< + typename TelControllerType, + bool enabled = TelControllerType::enabled + > class OutputStream : public TelemetryBase< TelControllerType, enabled > { + public: + using self_t = OutputStream< TelControllerType, enabled >; + + using base_t = TelemetryBase< TelControllerType, enabled >; + + /** + * Marker object to indicate that the stored callable object is to be called + * in a lazy way, i.e., only if output is active. + * + * @tparam RetType return type of the collable object, to be printed + */ + template< typename RetType > class OutputStreamLazy { + + const std::function< RetType() > f; + + public: + static_assert( is_ostream_input< RetType >::value ); + + template< class F > OutputStreamLazy( F&& _f ) : f( std::forward< F >( _f ) ) {} + + RetType operator()() const { return f(); } + }; + + /** + * Convenience function to create an #OutputStreamLazy object from + * a callable one, inferring all template parameters automatically. + * + * @tparam CallableType type of the given callable object + * @tparam RetType return type of the callable object, to be printed + * @param f callable object + * @return OutputStreamLazy< RetType > object marking lazy evaluation for printing + */ + template< + typename CallableType, + typename RetType = decltype( std::declval< CallableType >()() ) + > static OutputStreamLazy< RetType > makeLazy( CallableType&& f ) { + static_assert( is_ostream_input< RetType >::value ); + return OutputStreamLazy< RetType >( std::forward< CallableType >( f ) ); + } + + /** + * Construct a new Output Stream object from a telemetry controller \p -tt + * and an output stream \p _out (usually \a std::cout) + */ + OutputStream( + const TelControllerType & _tt, + std::ostream & _out + ) : + TelemetryBase< TelControllerType, enabled >( _tt ), + out( _out ) + {} + + /** + * Copy constructor. + */ + OutputStream( const self_t & _outs ) = default; + + OutputStream & operator=( const self_t & _out ) = delete; + + /** + * Stream input operator, enabled for all types std::ostream supports. + */ + template< typename T > inline typename std::enable_if< is_ostream_input< T >::value, + self_t & >::type operator<<( T&& v ) { + if ( this->is_active() ) { + out << std::forward< T >( v ); + } + return *this; + } + + /** + * Specialization of the \a << operator for stream manipulators, to support + * \a std::endl and similar manipulators. + */ + inline self_t & operator<<( std::ostream& (*func)( std::ostream& ) ) { + if ( this->is_active() ) { + out << func; + } + return *this; + } + + /** + * Specialization of the \a << operator for lazy evaluation of callable objects. + * + * A callable object can be wrapped into an #OutputStreamLazy object in order + * to be called only if necessary, i.e., only if the stream \a this is active. + * In this case, the internal callable object is called, its result is materialized + * and sent into the stream. + * + * To conveniently instantiate an #OutputStreamLazy to pass to this operator, + * see #makeLazy(CallableType&&). + * + * @tparam F type of the callable object + * @param fun callable object + * @return self_t & the stream itself + */ + template< class F > inline typename std::enable_if< + is_ostream_input< decltype( std::declval< OutputStreamLazy< F > >()() ) >::value, + self_t & >::type operator<<( const OutputStreamLazy< F >& fun ) { + if ( this->is_active() ) { + out << fun(); + } + return *this; + } + + private: + std::ostream & out; + }; + + /** + * Template specialization of OutputStream + * for deactivated telemetry: no information is stored, no output produced. + */ + template< + typename TelControllerType + > class OutputStream< TelControllerType, false > : + public TelemetryBase< TelControllerType, false > { + public: + using self_t = OutputStream< TelControllerType, false >; + + + template< typename RetType > struct OutputStreamLazy { + + static_assert( is_ostream_input< RetType >::value ); + + template< class F > OutputStreamLazy( F&& ) {} + + constexpr char operator()() const { return '\0'; } + }; + + template< + typename CallableType, + typename RetType = decltype( std::declval< CallableType >()() ) + > static OutputStreamLazy< RetType > makeLazy( CallableType&& f ) { + static_assert( is_ostream_input< RetType >::value ); + return OutputStreamLazy< RetType >( std::forward< CallableType >( f ) ); + } + + OutputStream() = default; + + OutputStream( const TelControllerType & _tt, std::ostream & ) : + TelemetryBase< TelControllerType, false >( _tt ) {} + + OutputStream( const self_t & _out ) = default; + + OutputStream & operator=( const self_t & _out ) = delete; + + inline self_t & operator<<( std::ostream& (*)( std::ostream& ) ) { + return *this; + } + + /** + * All-capturing implementation for the input stream operator, printing nothing. + * + * This operator is convenient especially for debugging cases. + * In case of "normal" stream types used with custom data types, the user + * must extend them manually to print the custom data type. If the user uses a + * deactivated stream (for example as a default template parameter to disable + * logging by default), she needs not extend it for custom types in order + * to make it compile, which is especially nonsensical when the output is deactivated. + */ + template< typename T > self_t & operator<<( T&& ) { + return *this; + } + }; + + /// Always active output stream, mainly for debugging purposes. + using OutputStreamOn = OutputStream< TelemetryControllerAlwaysOn, true >; + + /// Always inactive output stream + using OutputStreamOff = OutputStream< TelemetryControllerAlwaysOff, false >; + + } + } +} + +#endif // _H_GRB_UTILS_TELEMETRY_OUTPUT_STREAM diff --git a/include/graphblas/utils/telemetry/Stopwatch.hpp b/include/graphblas/utils/telemetry/Stopwatch.hpp new file mode 100644 index 000000000..f599ede03 --- /dev/null +++ b/include/graphblas/utils/telemetry/Stopwatch.hpp @@ -0,0 +1,238 @@ + +/* + * Copyright 2023 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file Stopwatch.hpp + * @author Alberto Scolari (alberto.scolar@huawei.com) + * + * Definition for the Stopwatch class. + */ + +#ifndef _H_GRB_UTILS_TELEMETRY_STOPWATCH +#define _H_GRB_UTILS_TELEMETRY_STOPWATCH + +#include + +#include "TelemetryBase.hpp" + +namespace grb { + namespace utils { + namespace telemetry { + + /** + * Type to store time duration in nanoseconds, which is the default time granularity. + */ + using duration_nano_t = size_t; + + /** + * Duration as floating point type, for time granularities coarser than nanoseconds. + */ + using duration_float_t = double; + + /** + * Base class for Stopwatch, with common logic. + */ + class StopwatchBase { + public: + + /** + * Convert nanoseconds to microseconds, returned as floating point type duration_float_t. + */ + static inline duration_float_t nano2Micro( duration_nano_t nano ) { + return static_cast< duration_float_t >( nano ) / 1000UL; + } + + /** + * Convert nanoseconds to milliseconds, returned as floating point type duration_float_t. + */ + static inline duration_float_t nano2Milli( duration_nano_t nano ) { + return static_cast< duration_float_t >( nano ) / 1000000UL; + } + + /** + * Convert nanoseconds to seconds, returned as floating point type duration_float_t. + */ + static inline duration_float_t nano2Sec( duration_nano_t nano ) { + return static_cast< duration_float_t >( nano ) / 1000000000UL; + } + }; + + /** + * Class with functionalities to measure elapsed time for telemetry purposes: start, stop, reset. + * + * The time granularity is nanoseconds. + * + * Copy semantics is not available. + * + * This implementation assumes telemetry is enabled and the active state is controlled via + * a telemetry controller of type \p TelControllerType. + * + * @tparam TelControllerType underlying telemetry controller type + * @tparam enabled whether it is compile-time enabled + */ + template< + typename TelControllerType, + bool enabled = TelControllerType::enabled + > class Stopwatch : + public StopwatchBase, public TelemetryBase< TelControllerType, enabled > { + + typedef typename std::chrono::high_resolution_clock clock_t; + + typedef typename std::chrono::nanoseconds duration_t; + + typedef typename std::chrono::high_resolution_clock::time_point time_point_t; + + duration_t elapsedTime; ///< measured elapsed time so far, i.e., + ///< accumulated time periods between successive calls to #start() and #stop() + + time_point_t beginning; ///< time instant of last call to #start() + + public: + /** + * Construct a new Stopwatch object from a telemetry controller. + * + * @param tt underlying telemetry controller, to be (de)activated at runtime + */ + Stopwatch( const TelControllerType & tt ) : + StopwatchBase(), + TelemetryBase< TelControllerType, true >( tt ), + elapsedTime( duration_t::zero() ) {} + + Stopwatch( const Stopwatch< TelControllerType, enabled > & ) = delete; + + /** + * Start measuring time. + * + * Subsequent calls to this method "reset" the measure of elapsed time: if the user calls #start() + * twice and then #stop(), the elapsed time accumulated internally after the call to #stop() is + * the time elapsed from the \b second call of #start() to the call to #stop(). + */ + inline void start() { + if( this->is_active() ) { + beginning = clock_t::now(); + } + } + + /** + * Stops time measurement, returning the elapsed time since the last #start() invocation. + * Elapsed time is internally accounted only if this method is invoked. + */ + inline duration_nano_t stop() { + duration_nano_t count = 0; + if( this->is_active() ) { + time_point_t end = clock_t::now(); + duration_t d = end - beginning; + count = d.count(); + elapsedTime += d; + } + return count; + } + + /** + * Returns the elapsed time, which is accounted \b only if #stop() is called. + * + * The value of the elapsed time is not erased, so that successive calls return + * the same value. + */ + inline duration_nano_t getElapsedNano() const { + return static_cast< duration_nano_t >( elapsedTime.count() ); + } + + /** + * To be called on a stopped watch, it returns the elapsed time and sets it to 0. + */ + inline duration_nano_t reset() { + duration_nano_t r = getElapsedNano(); + if( this->is_active() ) { + elapsedTime = duration_t::zero(); + } + return r; + } + + /** + * Stops the watch, sets the elapsed time to 0, starts it again + * and returns the time elapsed between the previous #start() + * and the #stop() internally called. + */ + inline duration_nano_t restart() { + stop(); + duration_nano_t r = reset(); + start(); + return r; + } + }; + + /** + * Template specialization of Stopwatch for disabled telemetry: + * no state is stored, all functions are inactive. + */ + template< + typename TelControllerType + > class Stopwatch< TelControllerType, false > : + public StopwatchBase, public TelemetryBase< TelControllerType, false > { + public: + Stopwatch( const TelControllerType & tt ) : + StopwatchBase(), + TelemetryBase< TelControllerType, false >( tt ) {} + + Stopwatch( const Stopwatch< TelControllerType, false > & ) = delete; + + constexpr inline void start() {} + + constexpr inline duration_nano_t stop() { + return static_cast< duration_nano_t >( 0 ); + } + + constexpr inline duration_nano_t getElapsedNano() const { + return static_cast< duration_nano_t >( 0 ); + } + + constexpr inline duration_nano_t reset() { + return static_cast< duration_nano_t >( 0 ); + + } + + constexpr inline duration_nano_t restart() { + return static_cast< duration_nano_t >( 0 ); + } + + }; + + /** + * Always active stopwatch, requiring no telemetry controller for construction. + * Mainly for debugging purposes. + */ + class ActiveStopwatch : public Stopwatch< TelemetryControllerAlwaysOn, true > { + public: + + using base_t = Stopwatch< TelemetryControllerAlwaysOn, true >; + + ActiveStopwatch(): + base_t( tt ), + tt( true ) {} + + ActiveStopwatch( const ActiveStopwatch & ) = delete; + + private: + TelemetryControllerAlwaysOn tt; + }; + + } // namespace telemetry + } // namespace utils +} // namespace grb + +#endif // _H_GRB_UTILS_TELEMETRY_STOPWATCH diff --git a/include/graphblas/utils/telemetry/Telemetry.hpp b/include/graphblas/utils/telemetry/Telemetry.hpp new file mode 100644 index 000000000..3da512b82 --- /dev/null +++ b/include/graphblas/utils/telemetry/Telemetry.hpp @@ -0,0 +1,51 @@ + +/* + * Copyright 2023 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @dir include/graphblas/utils/telemetry + * This folder contains all telemetry functionalities, i.e., those meant to measure + * and report code execution in detail. They are designed with two goals in mind: + * -# compile-time control: all functionalities can be activated or deactivated + * at compile-time; if deactivated, they incur no runtime and memory cost + * -# fine granularity: since telemetry is complex and very application-specific, + * they allow fine-grained measurement and reporting; hence, they are also meant + * to be conveniently integrated into an existing application at fine granularity + * -# no pre-processor cluttering: multiple specializations may exist for the same functionality, + * for example to avoid memory or runtime costs if telemetry is deactivated; all + * implementations \b must compile against the same code paths, to avoid verbose + * insertion of #ifdef or similar directives on user's behalf. + * + * See the documentation of TelemetryController.hpp for some basic examples. + */ + +/** + * @file OutputStream.hpp + * @author Alberto Scolari (alberto.scolar@huawei.com) + * + * Convenience all-include header for all telemetry-related functionalities. + */ + +#ifndef _H_GRB_UTILS_TELEMETRY_TELEMETRY +#define _H_GRB_UTILS_TELEMETRY_TELEMETRY + +#include "TelemetryController.hpp" +#include "Stopwatch.hpp" +#include "Timeable.hpp" +#include "CSVWriter.hpp" +#include "OutputStream.hpp" + +#endif // _H_GRB_UTILS_TELEMETRY_TELEMETRY diff --git a/include/graphblas/utils/telemetry/TelemetryBase.hpp b/include/graphblas/utils/telemetry/TelemetryBase.hpp new file mode 100644 index 000000000..04773591a --- /dev/null +++ b/include/graphblas/utils/telemetry/TelemetryBase.hpp @@ -0,0 +1,123 @@ + +/* + * Copyright 2023 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file TelemetryBase.hpp + * @author Alberto Scolari (alberto.scolar@huawei.com) + * + * Definition for the TelemetryBase class. + */ + +#ifndef _H_GRB_UTILS_TELEMETRY_TELEMETRY_BASE +#define _H_GRB_UTILS_TELEMETRY_TELEMETRY_BASE + +#include "TelemetryController.hpp" + +namespace grb { + namespace utils { + namespace telemetry { + + /** + * Base class provided as a convenience, exposing whether the telemetry is active. + * + * Default contruction is unavailable, because telemetry functionalities need an + * underlying telemetry controller to know whether they are enabled and active. + * + * Instead, copy construction is available for inheriting classes to easily implement copy semantics + * if needed; the copy shares the same telemetry controller of the original object via a reference. + * + * This implementation corresponds to enabled telemetry and stores an actual + * telemetry controller at runtime to be notified about its active state. + * + * @tparam TelControllerType type of the underlying telemetry controller, + * usually derived from TelemetryControllerBase + * @tparam enabled whther the current type is enabled (usually equals to TelControllerType::enabled) + */ + template< + typename TelControllerType, + bool enabled = TelControllerType::enabled + > class TelemetryBase { + + const TelControllerType & telemetry_Controller; + + public: + static_assert( is_telemetry_controller< TelControllerType >::value, + "type TelControllerType does not implement Telemetry Controller interface" ); + + using self_t = TelemetryBase< TelControllerType, enabled >; + + TelemetryBase( const TelControllerType & tt ): telemetry_Controller( tt ) {} + + TelemetryBase( const self_t & tb ) : telemetry_Controller( tb.telemetry_Controller ) {} + + self_t & operator=( const self_t & ) = delete; + + bool is_active() const { return telemetry_Controller.is_active(); } + }; + + /** + * Template specialization for disabled telemetry: no state, no activity. + * + * @tparam TelControllerType + */ + template < + typename TelControllerType + > class TelemetryBase< TelControllerType, false > { + public: + static_assert( is_telemetry_controller< TelControllerType >::value, + "type TelControllerType does not implement Telemetry Controller interface" ); + + using self_t = TelemetryBase< TelControllerType, false >; + + TelemetryBase() = default; + + TelemetryBase( const TelControllerType & ) {} + + TelemetryBase( const self_t & ) = default; + + self_t & operator=( const self_t & ) = delete; + + constexpr bool is_active() const { return false; } + }; + + /** + * Specialization of TelemetryControllerBase for enabled and always active telemetry, + * mainly for debugging purposes: it is always active. + * + * For API compliance, it accepts an always-on telemetry controller, but does not store it. + */ + template<> class TelemetryBase< TelemetryControllerAlwaysOn, true > { + public: + static_assert( is_telemetry_controller< TelemetryControllerAlwaysOn >::value, + "type TelControllerType does not implement Telemetry Controller interface" ); + + using self_t = TelemetryBase< TelemetryControllerAlwaysOn, true >; + + TelemetryBase( const TelemetryControllerAlwaysOn & tt ) { (void) tt; } + + TelemetryBase( const self_t & tb ) = default; + + self_t & operator=( const self_t & ) = delete; + + constexpr bool is_active() const { return true; } + }; + + } + } +} + +#endif // _H_GRB_UTILS_TELEMETRY_TELEMETRY_BASE diff --git a/include/graphblas/utils/telemetry/TelemetryController.hpp b/include/graphblas/utils/telemetry/TelemetryController.hpp new file mode 100644 index 000000000..f32c9ca21 --- /dev/null +++ b/include/graphblas/utils/telemetry/TelemetryController.hpp @@ -0,0 +1,328 @@ + +/* + * Copyright 2023 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file TelemetryController.hpp + * @author Alberto Scolari (alberto.scolar@huawei.com) + * + * This file defines the basic functionalities for Telemetry Controllers, i.e., + * objects that enable/disable telemetry at compile-time and runtime. + * + * A telemetry controller can be \b enabled (at compile-time) to produce the code for telemetry and must be + * \b activated at runtime to emit actual telemetry information. Activation depends on runtime information + * (e.g., user's input) and may change dynamically \a after the controller is instantiated. + * If a controller is \b disabled, no code for compile-time is generated in any compliant telemetry functionality; + * hence, any (de)activation of a disabled telemetry controller is simply ignored and produces no result. + * In any case, the code must compile under all conditions, in order to avoid verbose + * pre-processing \a #if conditions. + * + * A typical instantiation of a telemetry controller in a user's application looks as follows: + * + * \code{.cpp} + * ENABLE_TELEMETRY_CONTROLLER( my_controller_t ) + * DEFINE_TELEMETRY_CONTROLLER( my_controller_t ) + * + * int main() { + * my_controller_t my_controller( true ); + * if( my_controller.is_active() ) { + * std::cout << "my_controller is active"; + * } else { + * std::cout << "my_controller is NOT active"; + * if( !my_controller_t::enabled ) { + * std::cout << ", because it was deactivated at compile-time"; + * } + * } + * std::cout << std::endl; + * return 0; + * } + * \endcode + * + * where the activation directive \a ENABLE_TELEMETRY_CONTROLLER is present only if the controller + * is to be activated. Users should indeed comment/uncomment this directive do disable/enable telemetry + * while debugging, or may add extra pre-processing logic to control it during compilation, like + * + * \code{.cpp} + * #ifdef __I_WANT_my_controller_t_ENABLED__ + * ENABLE_TELEMETRY_CONTROLLER( my_controller_t ) + * #endif + * DEFINE_TELEMETRY_CONTROLLER( my_controller_t ) + * \endcode + * + * Note that the \a ENABLE_TELEMETRY_CONTROLLER directive (if present) must come \b before the + * \a DEFINE_TELEMETRY_CONTROLLER directive, otherwise compilation errors occur. + */ + +#ifndef _H_GRB_UTILS_TELEMETRY_TELEMETRY_CONTROLLER +#define _H_GRB_UTILS_TELEMETRY_TELEMETRY_CONTROLLER + +#include +#include // std::declval< T >() + +namespace grb { + namespace utils { + namespace telemetry { + + /** + * Returns whether a telemetry controller is enabled at compile-time. By default + * it is \b not. + * + * @tparam T type associated to the telemetry controller + * @return true never + * @return false always + */ + template< typename T > constexpr bool is_controller_enabled() { return false; } + + /** + * Class that encapsulates the logic to enable/disable telemetry at compile-time + * or at runtime. + * + * Telemetry can be completely disabled at compile-time (e.g., to avoid any code generation + * and overhead) or can be controlled at runtime, based on external conditions (e.g., + * user's input, cluster node number, ...). + * + * In the following, the field #enabled encodes the compile-time information, while + * the field \a active (if present) and the corresponding getter #is_active() tell + * whether the controller is \a active at runtime. Hence, users of telemetry controllers should always + * use the #is_active() method to check whether telemetry is active, while implementations + * of telemetry controllers should implement this method also based on the value of the #enabled + * field, possibly "short-circuiting" when #enabled is \a false. This implementation does + * exactly this, disabling telemetry at compile-time and ignoring any runtime information. + * + * Copy semantics is not available, because a controller stores just one piece of information + * (whether it is active) and a copy would essentially behave as a new object. + * Therefore, users should rather create new controllers themselves or pass around references + * to the same controller, in order to centralize control via a single controller object. + * + * Also move semantics is not available, since an "empty" controller makes no sense. + * + * This implementation assumes \p en = \a true, because a specialization for + * \p en = \a false exists (hence #enabled is set as \a true at compile-time). + * + * @tparam en whether telemetry is enabled (\p en = \a false has a + * dedicated template specialization) + */ + template< bool en > class TelemetryControllerBase { + public: + using self_t = TelemetryControllerBase< en >; + + /** + * Construct a new Telemetry oCntroller Base object, specifying the \a active state. + * + * @param _active whether the controller is \a active or not + */ + TelemetryControllerBase( bool _active ) : active( _active ) {} + + TelemetryControllerBase() = delete; + + TelemetryControllerBase( const self_t & ) = default; + + TelemetryControllerBase& operator=( const self_t & ) = delete; + + /** + * Tells whether the controller is \a active. + */ + bool is_active() const { return this->active; } + + /** + * Set the \a active status of the controller at runtime. + * + * @param _active whether to activate the controller + */ + void inline set_active( bool _active ) { + this->active = _active; + } + + /** + * Whether telemetry is compile-time active (here always). + */ + static constexpr bool enabled = true; + + protected: + bool active; + }; + + /** + * Template specialization for compile-time disabled telemetry, + * whose functionalities are all disabled. + * + * The controller is \b disabled by default, and modifications to + * its \a active status are ignored. + */ + template< > class TelemetryControllerBase< false > { + public: + using self_t = TelemetryControllerBase< false >; + + /** + * Construct a new Telemetry Controller Base object with runtime information. + * + * Here, runtime information is ignored, as this implementation disables any telemetry. + * + * @param _enabled whether telemetry is runtime-enabled (ignored here) + */ + TelemetryControllerBase( bool _enabled ) { + (void) _enabled; + } + + TelemetryControllerBase() = delete; + + TelemetryControllerBase( const self_t & ) = delete; + + TelemetryControllerBase& operator=( const self_t & ) = delete; + + /** + * Whether telemetry is runtime-active. + * + * @return true never here + * @return false always + */ + constexpr bool inline is_active() const { return false; } + + /** + * Set the active status of the telemetry controller. + * + * This \a disabled implementation ignores the input \p _active. + */ + void inline set_active( bool ) {} + + /** + * Whether telemetry is compile-time active (never here). + */ + static constexpr bool enabled = false; + }; + + /** + * Convenience definition fo an always-off telemetry controller. + */ + using TelemetryControllerAlwaysOff = TelemetryControllerBase< false >; + + /** + * Always active controller, useful especially for prototyping scenarios. + */ + class TelemetryControllerAlwaysOn { + public: + TelemetryControllerAlwaysOn( bool _enabled ) { + (void) _enabled; + } + + TelemetryControllerAlwaysOn() = default; + + TelemetryControllerAlwaysOn( const TelemetryControllerAlwaysOn & ) = default; + + TelemetryControllerAlwaysOn& operator=( const TelemetryControllerAlwaysOn & ) = delete; + + /** + * Tells whether the controller is \a active, which is in this case always true. + */ + constexpr bool is_active() const { return true; } + + /** + * Set the active status of the telemetry controller. + * + * This \a disabled implementation ignores the input \p _active. + */ + void inline set_active( bool ) {} + + /** + * Whether telemetry is compile-time active (here always). + */ + static constexpr bool enabled = true; + }; + + /** + * SFINAE-based structure to check whether \p T is a telemetry controller, i.e. + * - it has a \a constexpr static field named \a enabled + * - it has an \a is_active() method + * - it has a \a set_active(bool) method + */ + template< typename T > struct is_telemetry_controller { + private: + template< typename U > static constexpr bool has_enabled_field( + typename std::enable_if< + std::is_same< typename std::decay< decltype( U::enabled ) >::type, bool >::value, + bool * >::type + ) { + return true; + } + + template< typename U > static constexpr bool has_enabled_field( ... ) { return false; } + + template< typename U > static constexpr bool has_is_active_method( + typename std::enable_if< + std::is_same< typename std::decay().is_active() ) + >::type, bool >::value, bool * >::type + ) { + return true; + } + + template< typename U > static constexpr bool has_is_active_method( ... ) { return false; } + + template< typename U > static constexpr bool has_set_active_method( + typename std::enable_if< + std::is_same< decltype( std::declval< U >().set_active( true ) ), void >::value, + bool * >::type + ) { + return true; + } + + template< typename U > static constexpr bool has_set_active_method( ... ) { return false; } + + public: + static constexpr bool value = has_enabled_field< T >( nullptr ) + && has_is_active_method< T >( nullptr ) && has_set_active_method< T >( nullptr ) ; + }; + } + + } +} + +// Name of the Controller Enabler, i.e., a type that controls whether a telemetry controller is enabled +#define __TELEMETRY_CONTROLLER_ENABLER_NAME( name ) __ ## name ## _Enabler + +// Name of the Telemetry Controller type +#define __TELEMETRY_CONTROLLER_NAME( name ) name ## _cls + +/** + * Defines a telemetry controller, i.e., a custom type derived from TelemetryControllerBase. + * + * This declaration requires the declaration of an associated controller enabler type, which controls + * whether the controller is enabled at compile-time; the controller is by default \b deactivated. + */ +#define DEFINE_TELEMETRY_CONTROLLER( name ) \ + class __TELEMETRY_CONTROLLER_ENABLER_NAME( name ) {}; \ + using name = class __TELEMETRY_CONTROLLER_NAME( name ) : \ + public grb::utils::telemetry::TelemetryControllerBase< \ + grb::utils::telemetry::is_controller_enabled< \ + __TELEMETRY_CONTROLLER_ENABLER_NAME( name ) >() > { \ + public: \ + using base_t = grb::utils::telemetry::TelemetryControllerBase< \ + grb::utils::telemetry::is_controller_enabled< \ + __TELEMETRY_CONTROLLER_ENABLER_NAME( name ) >() >; \ + __TELEMETRY_CONTROLLER_NAME( name )( bool _enabled ) : base_t( _enabled ) {} \ + }; + +/** + * Enables a telemetry controller through its associated enabler type. + * + * Once enabled, it can be runtime activated. + */ +#define ENABLE_TELEMETRY_CONTROLLER( name ) class __TELEMETRY_CONTROLLER_ENABLER_NAME( name ); \ + namespace grb { namespace utils { namespace telemetry { \ + template<> constexpr bool is_controller_enabled< \ + __TELEMETRY_CONTROLLER_ENABLER_NAME( name ) >() { return true; } \ + } } } + +#endif // _H_GRB_UTILS_TELEMETRY_TELEMETRY_CONTROLLER diff --git a/include/graphblas/utils/telemetry/Timeable.hpp b/include/graphblas/utils/telemetry/Timeable.hpp new file mode 100644 index 000000000..2ffb97723 --- /dev/null +++ b/include/graphblas/utils/telemetry/Timeable.hpp @@ -0,0 +1,129 @@ + +/* + * Copyright 2023 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file Timeable.hpp + * @author Alberto Scolari (alberto.scolar@huawei.com) + * + * Definition for the Timeable class. + */ + +#ifndef _H_GRB_UTILS_TELEMETRY_TIMEABLE +#define _H_GRB_UTILS_TELEMETRY_TIMEABLE + +#include "Stopwatch.hpp" + +namespace grb { + namespace utils { + namespace telemetry { + + /** + * Facility for inheriting classes that want to time interal operations: + * this class provides protected methods to measure elapsed time and public methods to expose + * elapsed time and allow resetting the internal elapsed time. + * + * @tparam TelControllerType type of telemetry controller + * @tparam enabled whether telemetry is enabled + */ + template< + typename TelControllerType, + bool enabled = TelControllerType::enabled + > class Timeable { + public: + using self_t = Timeable< TelControllerType, enabled >; + + Timeable( const TelControllerType & tt ) { + (void) tt; + } + + Timeable( const self_t & ) = default; + + Timeable& operator=( const self_t & ) = delete; + + /** + * Get the elapsed time, in nanoseconds. + */ + constexpr inline duration_nano_t getElapsedNano() const { + return static_cast< duration_nano_t >( 0 ); + } + + /** + * Reset the internal value of elapsed time. + */ + constexpr inline duration_nano_t reset() { + return static_cast< duration_nano_t >( 0 ); + } + + protected: + + /** + * Starts measuring the elapsed time. + */ + inline void start() {} + + /** + * Stops measuring elapsed time. + */ + constexpr inline duration_nano_t stop() { + return static_cast< duration_nano_t >( 0 ); + } + + }; + + /** + * Implementation of Timeable for enabled telemetry. + * + * @tparam TelControllerType type of telemetry controller. + */ + template< typename TelControllerType > class Timeable< TelControllerType, true > { + public: + using self_t = Timeable< TelControllerType, true >; + + Timeable( const TelControllerType & tt ) : swatch( tt ) {} + + Timeable( const self_t & ) = default; + + Timeable& operator=( const self_t & ) = delete; + + inline duration_nano_t getElapsedNano() const { + return swatch.getElapsedNano(); + } + + inline duration_nano_t reset() { + return swatch.reset(); + } + + protected: + inline void start() { + swatch.start(); + } + + inline duration_nano_t stop() { + return swatch.stop(); + } + + private: + Stopwatch< TelControllerType > swatch; + }; + + using StaticTimeable = Timeable< TelemetryControllerAlwaysOn, true >; + + } + } +} + +#endif // _H_GRB_UTILS_TELEMETRY_TIMEABLE diff --git a/tests/smoke/hpcg.cpp b/tests/smoke/hpcg.cpp index d84c157e0..e2f5644c2 100644 --- a/tests/smoke/hpcg.cpp +++ b/tests/smoke/hpcg.cpp @@ -1,6 +1,6 @@ /* - * Copyright 2021 Huawei Technologies Co., Ltd. + * Copyright 2022 Huawei Technologies Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,404 +18,650 @@ /** * @file hpcg_test.cpp * @author Alberto Scolari (alberto.scolari@huawei.com) - * @brief Test for HPCG simulations on N-dimensional physical problems. + * Test for HPCG simulations on N-dimensional physical problems. * * This test strictly follows the parameter and the formulation of the reference HPCG * benchmark impementation in https://github.com/hpcg-benchmark/hpcg. - * - * @date 2021-04-30 */ +#include #include #include #include #include +#include +#include #include +#include #include #include #include -#include #include - -// here we define a custom macro and do not use NDEBUG since the latter is not defined for smoke tests -#ifdef HPCG_PRINT_STEPS - -// HPCG_PRINT_STEPS requires defining the following symbols - -/** - * @brief simply prints \p args on a dedicated line. - */ -#define DBG_println( args ) std::cout << args << std::endl; - -// forward declaration for the tracing facility -template< typename T, - class Ring = grb::Semiring< grb::operators::add< T >, grb::operators::mul< T >, grb::identities::zero, grb::identities::one > -> -void print_norm( const grb::Vector< T > &r, const char * head, const Ring &ring = Ring() ); - -/** - * @brief prints \p head and the norm of \p r. - */ -#define DBG_print_norm( vec, head ) print_norm( vec, head ) -#endif - -#include +#include +#include +#include +#include +#include +#include #include #include #include //========== MAIN PROBLEM PARAMETERS ========= -// values modifiable via cmd line args: default set as in reference HPCG -constexpr size_t PHYS_SYSTEM_SIZE_DEF{ 16UL }; -constexpr size_t PHYS_SYSTEM_SIZE_MIN{ 4UL }; -constexpr size_t DEF_COARSENING_LEVELS{ 1U }; -constexpr size_t MAX_COARSENING_LEVELS{ 4U }; -constexpr size_t MAX_ITERATIONS_DEF{ 56UL }; -constexpr size_t SMOOTHER_STEPS_DEF{ 1 }; - -// internal values -constexpr double SYSTEM_DIAG_VALUE { 26.0 }; -constexpr double SYSTEM_NON_DIAG_VALUE { -1.0 }; -constexpr size_t BAND_WIDTH_3D { 13UL }; -constexpr size_t HALO_RADIUS { 1U }; +// default simulation parameters, set as in reference HPCG +// users can input different ones via the cmd line +constexpr size_t PHYS_SYSTEM_SIZE_DEF = 16UL; +constexpr size_t PHYS_SYSTEM_SIZE_MIN = 2UL; +constexpr size_t MAX_COARSENING_LEVELS = 3UL; +constexpr size_t MAX_ITERATIONS_DEF = 56UL; +constexpr size_t SMOOTHER_STEPS_DEF = 1; + +// internal values defining the simulated physical system +constexpr double SYSTEM_DIAG_VALUE = 26.0; +constexpr double SYSTEM_NON_DIAG_VALUE = -1.0; +constexpr size_t BAND_WIDTH_3D = 13UL; +constexpr size_t HALO_RADIUS = 1U; +constexpr double MAX_NORM = 4.0e-14; //============================================ -constexpr double MAX_NORM { 4.0e-14 }; - using namespace grb; using namespace algorithms; static const char * const TEXT_HIGHLIGHT = "===> "; -#define thcout ( std::cout << TEXT_HIGHLIGHT ) -#define thcerr ( std::cerr << TEXT_HIGHLIGHT ) +// default types +using value_t = double; + +struct HPCGTypes { + using IOType = value_t; + using NonzeroType = value_t; + using InputType = value_t; + using ResidualType = value_t; + using Ring = Semiring< grb::operators::add< NonzeroType >,grb::operators::mul< NonzeroType >, + grb::identities::zero, grb::identities::one >; + using Minus = operators::subtract< NonzeroType >; + using Divide = operators::divide< NonzeroType >; +}; -/** - * @brief Container for system parameters to create the HPCG problem. - */ -struct system_input { - size_t nx, ny, nz; - size_t max_coarsening_levels; +using IOType = typename HPCGTypes::IOType; +using NonzeroType = typename HPCGTypes::NonzeroType; +using InputType = typename HPCGTypes::InputType; +using ResidualType = typename HPCGTypes::ResidualType; +using Ring = typename HPCGTypes::Ring; + +using coord_t = size_t; + +constexpr Descriptor hpcg_desc = descriptors::dense; + +// telemetry control: controllers and output stream types for telemetry +// they can be (de)activated at compile-time by (un)commenting the respective ENABLE_TELEMETRY_CONTROLLER() macro +ENABLE_TELEMETRY_CONTROLLER( dist_controller_t ) +DEFINE_TELEMETRY_CONTROLLER( dist_controller_t ) +using DistStream = grb::utils::telemetry::OutputStream< dist_controller_t >; + +ENABLE_TELEMETRY_CONTROLLER( hpcg_controller_t ) +DEFINE_TELEMETRY_CONTROLLER( hpcg_controller_t ) + +ENABLE_TELEMETRY_CONTROLLER( mg_controller_t ) +DEFINE_TELEMETRY_CONTROLLER( mg_controller_t ) + +// ENABLE_TELEMETRY_CONTROLLER( dbg_controller_t ) +DEFINE_TELEMETRY_CONTROLLER( dbg_controller_t ) +using DBGStream = grb::utils::telemetry::OutputStream< dbg_controller_t >; + +using duration_t = utils::telemetry::duration_nano_t; +using hpcg_csv_t = utils::telemetry::CSVWriter< hpcg_controller_t, hpcg_controller_t::enabled, + size_t, duration_t >; +using mg_csv_t = utils::telemetry::CSVWriter< mg_controller_t, mg_controller_t::enabled, + size_t, size_t, duration_t, duration_t >; + +// assembled types for simulation runners and input/output structures +using smoother_runner_t = grb::algorithms::RedBlackGSSmootherRunner< HPCGTypes, + mg_controller_t, hpcg_desc >; +using smoothing_data_t = typename smoother_runner_t::SmootherDataType; + +using coarsener_runner_t = grb::algorithms::SingleMatrixCoarsener< HPCGTypes, + mg_controller_t, hpcg_desc >; +using coarsening_data_t = typename coarsener_runner_t::CoarseningDataType; + +using mg_runner_t = MultiGridRunner< HPCGTypes, smoother_runner_t, coarsener_runner_t, + mg_controller_t, hpcg_desc, DBGStream >; +using mg_data_t = typename mg_runner_t::MultiGridInputType; + +using hpcg_runner_t = MultiGridCGRunner< HPCGTypes, mg_runner_t, hpcg_controller_t, + hpcg_desc, DBGStream >; +using hpcg_data_t = typename hpcg_runner_t::HPCGInputType; + +// Stopwatch type, to measure various setup phases +using Stw = utils::telemetry::ActiveStopwatch; + + +// allow DBGStream to print grb::Vector's in a lazy way (i.e., no code generated if deactivated) +struct dotter { + const grb::Vector< IOType > & v; + + ResidualType operator()() const { + Ring ring; + ResidualType r = 0; + grb::dot( r, v, v, ring ); + return r; + } }; -/** - * @brief Container for the parameters for the HPCG simulation. - */ -struct simulation_input : public system_input { - size_t test_repetitions; +static inline DBGStream & operator<<( DBGStream & stream, const grb::Vector< IOType > & v ) { + stream << std::setprecision( 7 ); + return stream << DBGStream::makeLazy( dotter{ v } ); +} + +// various algebraic zeros +static const IOType io_zero = Ring().template getZero< IOType >(); +static const NonzeroType nz_zero = Ring().template getZero< NonzeroType >(); +static const InputType input_zero = Ring().template getZero< InputType >(); +static const ResidualType residual_zero = Ring().template getZero< ResidualType >(); + +// input/output structure (serializable for distributed execution), +// with the parameters for the HPCG simulation +static constexpr size_t MAX_CSV_PATH_LENGTH = 255; + +struct simulation_input { + // physical parameters for the multi-grid + size_t nx, ny, nz; + size_t max_coarsening_levels; + // solver options + bool use_average_coarsener; + size_t inner_test_repetitions; size_t max_iterations; size_t smoother_steps; bool evaluation_run; bool no_preconditioning; + // logging options: these are serializable for launcher invocation + std::array< char, MAX_CSV_PATH_LENGTH + 1 > hpcg_csv; + std::array< char, MAX_CSV_PATH_LENGTH + 1 > mg_csv; + bool hpcg_log; + bool mg_log; + + simulation_input() { + hpcg_csv[ 0 ] = '\0'; + mg_csv[ 0 ] = '\0'; + } + + simulation_input( const simulation_input & ) = default; }; -/** - * @brief Containers for test outputs. - */ struct output { - RC error_code; - size_t test_repetitions; - size_t performed_iterations; - double residual; + RC error_code = SUCCESS; + size_t inner_test_repetitions = 0; grb::utils::TimerResults times; - std::unique_ptr< PinnedVector< double > > pinnedVector; - double square_norm_diff; - - output() { - error_code = SUCCESS; - test_repetitions = 0; - performed_iterations = 0; - residual = 0.0; - } + std::unique_ptr< PinnedVector< IOType > > pinnedVector; + NonzeroType square_norm_diff = nz_zero; + CGOutInfo< NonzeroType > cg_out = { 0, nz_zero }; }; -/** - * @brief Returns the closets power of 2 bigger or equal to \p n . - */ -template< typename T = size_t > -T static next_pow_2( T n ) { - static_assert( std::is_integral< T >::value, "Integral required." ); - --n; - n |= ( n >> 1 ); - for( unsigned i = 1; i <= sizeof( T ) * 4; i *= 2 ) { - const unsigned shift = static_cast< T >( 1U ) << i; - n |= ( n >> shift ); +#ifdef HPCG_PRINT_SYSTEM +// routine to print the system matrices +static void print_system( const std::vector< std::unique_ptr< mg_data_t > > & system_levels, + const std::vector< std::unique_ptr< coarsening_data_t > > & coarsener_levels ) { + assert( spmd<>::nprocs() == 1 ); // distributed printin of system not implemented + print_matrix( system_levels[ 0 ]->A, 70, "A" ); + for( size_t i = 0; i < coarsener_levels.size(); i++ ) { + print_matrix( coarsener_levels[ i ]->coarsening_matrix, 50, "COARSENING MATRIX" ); + print_matrix( system_levels[ i + 1 ]->A, 50, "COARSER SYSTEM MATRIX" ); } - return n + 1; } +#endif /** - * @brief Builds and initializes a 3D system for an HPCG simulation according to the given 3D system sizes. - * @return RC grb::SUCCESS if the system initialization within GraphBLAS succeeded + * Allocates the data structure input to the various simulation steps (CG, multi-grid, coarsening, smoothing) + * for each level of the multi-grid. The input is the vector of system sizes \p mg_sizes, with sizes in + * monotonically \b decreasing order (finest system first). + * + * This routine is algorithm-agnositc, as long as the constructors of the data types meet the requirements + * explained in \ref multigrid_allocate_data(). */ -static RC build_3d_system( std::unique_ptr< hpcg_data< double, double, double > > & holder, const system_input & in ) { - const std::array< size_t, 3 > physical_sys_sizes { in.nx, in.ny, in.nz }; - struct hpcg_system_params< 3, double > params { - physical_sys_sizes, HALO_RADIUS, BAND_WIDTH_3D * 2 + 1, SYSTEM_DIAG_VALUE, SYSTEM_NON_DIAG_VALUE, PHYS_SYSTEM_SIZE_MIN, in.max_coarsening_levels, 2 - }; - - return build_hpcg_system< 3, double >( holder, params ); -} - -#ifdef HPCG_PRINT_SYSTEM -static void print_system( const hpcg_data< double, double, double > & data ) { - print_matrix( data.A, 70, "A" ); - multi_grid_data< double, double > * coarser = data.coarser_level; - while( coarser != nullptr ) { - print_matrix( coarser->coarsening_matrix, 50, "COARSENING MATRIX" ); - print_matrix( coarser->A, 50, "COARSER SYSTEM MATRIX" ); - coarser = coarser->coarser_level; - } +static void allocate_system_structures( + std::vector< std::unique_ptr< mg_data_t > > & system_levels, + std::vector< std::unique_ptr< coarsening_data_t > > & coarsener_levels, + std::vector< std::unique_ptr< smoothing_data_t > > & smoother_levels, + std::unique_ptr< hpcg_data_t > & cg_system_data, + const std::vector< size_t > & mg_sizes, + const mg_controller_t & mg_controller, + DistStream & logger +) { + Stw timer; + + hpcg_data_t * data = new hpcg_data_t( mg_sizes[ 0 ] ); + cg_system_data = std::unique_ptr< hpcg_data_t >( data ); + logger << "allocating data for the MultiGrid simulation..."; + timer.start(); + multigrid_allocate_data( system_levels, coarsener_levels, smoother_levels, mg_sizes, mg_controller ); + logger << " time (ms) " << Stw::nano2Milli( timer.restart() ) << std::endl; + + // zero all vectors + logger << "zeroing all vectors..."; + grb::RC rc = data->init_vectors( io_zero ); + ASSERT_RC_SUCCESS( rc ); + std::for_each( system_levels.begin(), system_levels.end(), + []( std::unique_ptr< mg_data_t > & s ) { + ASSERT_RC_SUCCESS( s->init_vectors( io_zero ) ); + } ); + std::for_each( coarsener_levels.begin(), coarsener_levels.end(), + []( std::unique_ptr< coarsening_data_t > & s ) { + ASSERT_RC_SUCCESS( s->init_vectors( io_zero ) ); + } ); + std::for_each( smoother_levels.begin(), smoother_levels.end(), + []( std::unique_ptr< smoothing_data_t > & s ) { + ASSERT_RC_SUCCESS( s->init_vectors( io_zero ) ); + } ); + logger << " time (ms) " << Stw::nano2Milli( timer.stop() ) << std::endl; } -#endif -#ifdef HPCG_PRINT_STEPS -template< typename T, - class Ring = Semiring< grb::operators::add< T >, grb::operators::mul< T >, grb::identities::zero, grb::identities::one > - > -void print_norm( const grb::Vector< T > & r, const char * head, const Ring & ring ) { - T norm; - RC ret = grb::dot( norm, r, r, ring ); // residual = r' * r; - (void)ret; - assert( ret == SUCCESS ); - std::cout << ">>> "; - if( head != nullptr ) { - std::cout << head << ": "; +/** + * Builds and initializes a 3D system for an HPCG simulation according to the given 3D system sizes. + * It allocates the data structures and populates them according to the algorithms chosen for HPCG. + */ +static void build_3d_system( + std::vector< std::unique_ptr< mg_data_t > > & system_levels, + std::vector< std::unique_ptr< coarsening_data_t > > & coarsener_levels, + std::vector< std::unique_ptr< smoothing_data_t > > & smoother_levels, + std::unique_ptr< hpcg_data_t > & cg_system_data, + const simulation_input & in, + const mg_controller_t & tt, + DistStream & logger +) { + constexpr size_t DIMS = 3; + using builder_t = grb::algorithms::HPCGSystemBuilder< DIMS, coord_t, NonzeroType >; + Stw timer; + + HPCGSystemParams< DIMS, NonzeroType > params = { { in.nx, in.ny, in.nz }, HALO_RADIUS, + SYSTEM_DIAG_VALUE, SYSTEM_NON_DIAG_VALUE, PHYS_SYSTEM_SIZE_MIN, in.max_coarsening_levels, 2 }; + + std::vector< builder_t > mg_generators; + logger << "building HPCG generators for " << ( in.max_coarsening_levels + 1 ) << " levels..."; + timer.start(); + // construct the builder_t generator for each grid level, which depends on the system physics + hpcg_build_multigrid_generators( params, mg_generators ); + logger << " time (ms) " << Stw::nano2Milli( timer.stop() ) << std::endl; + logger << "built HPCG generators for " << mg_generators.size() << " levels" << std::endl; + + // extract the size for each level + std::vector< size_t > mg_sizes; + std::transform( mg_generators.cbegin(), mg_generators.cend(), std::back_inserter( mg_sizes ), + []( const builder_t & b ) { + return b.system_size(); + } ); + // given the sizes, allocate the data structures for all the inputs of the algorithms + allocate_system_structures( system_levels, coarsener_levels, smoother_levels, + cg_system_data, mg_sizes, tt, logger ); + assert( mg_generators.size() == system_levels.size() ); + assert( mg_generators.size() == smoother_levels.size() ); + assert( mg_generators.size() - 1 == coarsener_levels.size() ); // coarsener acts between two levels + + // for each grid level, populate the data structures according to the specific algorithm + // and track the time for diagnostics purposes + for( size_t i = 0; i < mg_generators.size(); i++ ) { + logger << "SYSTEM LEVEL " << i << std::endl; + auto & sizes = mg_generators[ i ].get_generator().get_sizes(); + logger << " sizes: "; + for( size_t s = 0; s < DIMS - 1; s++ ) { + logger << sizes[ s ] << " x "; + } + logger << sizes[ DIMS - 1 ] << std::endl; + logger << " populating system matrix: "; + timer.start(); + grb::RC rc = hpcg_populate_system_matrix( mg_generators[ i ], + system_levels.at( i )->A, logger ); + ASSERT_RC_SUCCESS( rc ); + logger << " time (ms) " << Stw::nano2Milli( timer.restart() ) << std::endl; + + logger << " populating smoothing data: "; + rc = hpcg_populate_smoothing_data( mg_generators[ i ], *smoother_levels[ i ], + logger ); + logger << " time (ms) " << Stw::nano2Milli( timer.stop() ) << std::endl; + ASSERT_RC_SUCCESS( rc ); + + if( i > 0 ) { + logger << " populating coarsening data: "; + timer.start(); + if( ! in.use_average_coarsener ) { + rc = hpcg_populate_coarsener( mg_generators[ i - 1 ], mg_generators[ i ], + *coarsener_levels[ i - 1 ] ); + } else { + rc = hpcg_populate_coarsener_avg( mg_generators[ i - 1 ], mg_generators[ i ], + *coarsener_levels[ i - 1 ] ); + } + logger << " time (ms) " << Stw::nano2Milli( timer.stop() ) << std::endl; + ASSERT_RC_SUCCESS( rc ); + } } - std::cout << norm << std::endl; } -#endif /** - * @brief Main test, building an HPCG problem and running the simulation closely following the + * Main test, building an HPCG problem and running the simulation closely following the * parameters in the reference HPCG test. */ void grbProgram( const simulation_input & in, struct output & out ) { // get user process ID - assert( spmd<>::pid() < spmd<>::nprocs() ); - grb::utils::Timer timer; - timer.reset(); + const size_t pid = spmd<>::pid(); + Stw timer; + + // standard logger: active only on master node + dist_controller_t dist( pid == 0 ); + // separate thousands when printing integers + class IntegerSeparation : public std::numpunct< char > { + char do_thousands_sep() const override { + return '\''; + } + std::string do_grouping() const override { + return "\03"; + } + }; + std::locale old_locale = std::cout.imbue( std::locale( std::cout.getloc(), new IntegerSeparation ) ); + DistStream logger( dist, std::cout ); - // assume successful run - out.error_code = SUCCESS; - RC rc { SUCCESS }; + logger << "beginning input generation..." << std::endl; // wrap hpcg_data inside a unique_ptr to forget about cleaning chores - std::unique_ptr< hpcg_data< double, double, double > > hpcg_state; - rc = build_3d_system( hpcg_state, in ); + std::unique_ptr< hpcg_data_t > hpcg_state; + + // measure HPCG execution time by default on master + hpcg_controller_t hpcg_controller( pid == 0 ); + // measure MG and smoother only if the user requested it + mg_controller_t mg_controller( pid == 0 && in.mg_log ); + + // trace execution of CG and MG only on master + dbg_controller_t dbg_controller( pid == 0 ); + DBGStream dbg_stream( dbg_controller, std::cout ); + + // define the main runners and initialize the options of its components + coarsener_runner_t coarsener; + smoother_runner_t smoother; + smoother.presmoother_steps = smoother.postsmoother_steps = in.smoother_steps; + smoother.non_recursive_smooth_steps = 1UL; + mg_runner_t mg_runner( smoother, coarsener, dbg_stream ); + hpcg_runner_t hpcg_runner( hpcg_controller, mg_runner, dbg_stream ); + hpcg_runner.tolerance = residual_zero; + hpcg_runner.with_preconditioning = ! in.no_preconditioning; + + timer.start(); + // build the entire multi-grid system + build_3d_system( mg_runner.system_levels, coarsener.coarsener_levels, smoother.levels, + hpcg_state, in, mg_controller, logger ); + logger << "input generation time (ms): " << Stw::nano2Milli( timer.restart() ) << std::endl; - if( rc != SUCCESS ) { - std::cerr << "Failure to generate the system (" << toString( rc ) << ")." << std::endl; - out.error_code = rc; - return; - } #ifdef HPCG_PRINT_SYSTEM - if( spmd<>::pid() == 0 ) { - print_system( *hpcg_state ); + if( pid == 0 ) { + print_system( mg_runner.system_levels, coarsener.coarsener_levels ); } #endif - Matrix< double > & A { hpcg_state->A }; - Vector< double > & x { hpcg_state->x }; - Vector< double > & b { hpcg_state->b }; + Matrix< NonzeroType > & A = mg_runner.system_levels[ 0 ]->A; + Vector< IOType > & x = hpcg_state->x; + Vector< NonzeroType > & b = hpcg_state->b; // set vectors as from standard HPCG benchmark - set( x, 1.0 ); - set( b, 0.0 ); - rc = grb::mxv( b, A, x, grb::Semiring< grb::operators::add< double >, grb::operators::mul< double >, grb::identities::zero, grb::identities::one >() ); - set( x, 0.0 ); + RC rc = set( x, 1.0 ); + ASSERT_RC_SUCCESS( rc ); + rc = set( b, nz_zero ); + ASSERT_RC_SUCCESS( rc ); + rc = grb::mxv( b, A, x, Ring() ); + ASSERT_RC_SUCCESS( rc ); + rc = set( x, io_zero ); + ASSERT_RC_SUCCESS( rc ); #ifdef HPCG_PRINT_SYSTEM - if( spmd<>::pid() == 0 ) { + if( pid == 0 ) { print_vector( x, 50, "X" ); print_vector( b, 50, "B" ); } #endif - out.times.preamble = timer.time(); + out.times.preamble = Stw::nano2Milli( timer.restart() ); - const bool with_preconditioning = ! in.no_preconditioning; - if( in.evaluation_run ) { - out.test_repetitions = 0; - timer.reset(); - rc = hpcg( *hpcg_state, with_preconditioning, in.smoother_steps, in.smoother_steps, in.max_iterations, 0.0, out.performed_iterations, out.residual ); - double single_time = timer.time(); - if( rc == SUCCESS ) { - rc = collectives<>::reduce( single_time, 0, operators::max< double >() ); - } - out.times.useful = single_time; - out.test_repetitions = static_cast< size_t >( 1000.0 / single_time ) + 1; - } else { - // do benchmark - timer.reset(); - for( size_t i = 0; i < in.test_repetitions && rc == SUCCESS; ++i ) { - rc = set( x, 0.0 ); - assert( rc == SUCCESS ); - rc = hpcg( *hpcg_state, with_preconditioning, in.smoother_steps, in.smoother_steps, in.max_iterations, 0.0, out.performed_iterations, out.residual ); - out.test_repetitions++; - if( rc != SUCCESS ) { - break; - } - } - double time_taken { timer.time() }; - out.times.useful = time_taken / static_cast< double >( out.test_repetitions ); - // sleep( 1 ); - } + mg_data_t & grid_base = *mg_runner.system_levels[ 0 ]; - if( spmd<>::pid() == 0 ) { - if( rc == SUCCESS ) { - if( in.evaluation_run ) { - std::cout << "Info: cold HPCG completed within " << out.performed_iterations << " iterations. Last computed residual is " << out.residual << ". Time taken was " << out.times.useful - << " ms. Deduced inner repetitions parameter of " << out.test_repetitions << " to take 1 second or more per inner benchmark." << std::endl; - } else { - std::cout << "Average time taken for each of " << out.test_repetitions << " HPCG calls (hot start): " << out.times.useful << std::endl; - } - } else { - std::cerr << "Failure: call to HPCG did not succeed (" << toString( rc ) << ")." << std::endl; + // do a cold run to warm the system up + logger << TEXT_HIGHLIGHT << "beginning cold run..." << std::endl; + hpcg_runner.max_iterations = 1; + rc = hpcg_runner( grid_base, *hpcg_state, out.cg_out ); + logger << " time (ms): " << Stw::nano2Milli( timer.restart() ) << std::endl; + ASSERT_RC_SUCCESS( rc ); + + // restore CG options to user-given values + hpcg_runner.max_iterations = in.max_iterations; + logger << TEXT_HIGHLIGHT << "beginning solver..." << std::endl; + out.inner_test_repetitions = 0; + out.times.useful = 0.0; + + // initialize CSV writers (if activated) + hpcg_csv_t hpcg_csv( hpcg_controller, { "repetition", "time" } ); + mg_csv_t mg_csv( mg_controller, { "repetition", "level", "mg time", "smoother time" } ); + timer.reset(); + + // do benchmark + for( size_t i = 0; i < in.inner_test_repetitions; ++i ) { + rc = set( x, io_zero ); + ASSERT_RC_SUCCESS( rc ); + logger << TEXT_HIGHLIGHT << "beginning iteration: " << i << std::endl; + rc = hpcg_runner( grid_base, *hpcg_state, out.cg_out ); + ASSERT_RC_SUCCESS( rc ); + hpcg_csv.add_line( i, hpcg_runner.getElapsedNano() ); + logger << "repetition,duration (ns): " << hpcg_csv.last_line() << std::endl; + for( const auto & mg_level : mg_runner.system_levels ) { + mg_csv.add_line( i, mg_level->level, mg_level->mg_stopwatch.getElapsedNano(), + mg_level->sm_stopwatch.getElapsedNano() ); + mg_level->mg_stopwatch.reset(); + mg_level->sm_stopwatch.reset(); } + hpcg_runner.reset(); + + out.inner_test_repetitions++; + } + timer.stop(); + out.times.useful += Stw::nano2Milli( timer.getElapsedNano() ); + if( in.evaluation_run ) { + // get maximum execution time among processes + rc = collectives<>::reduce( out.times.useful, 0, operators::max< double >() ); + return; } + out.times.useful /= static_cast< double >( in.inner_test_repetitions ); + + logger << TEXT_HIGHLIGHT << "repetitions,average time (ms): " << out.inner_test_repetitions + << ", " << out.times.useful << std::endl; + // restore previous output options + std::cout.imbue( old_locale ); // start postamble - timer.reset(); - // set error code + timer.restart(); + // set error code to caller out.error_code = rc; - Semiring< grb::operators::add< double >, grb::operators::mul< double >, grb::identities::zero, grb::identities::one > ring; grb::set( b, 1.0 ); - out.square_norm_diff = 0.0; - grb::eWiseMul( b, -1.0, x, ring ); - grb::dot( out.square_norm_diff, b, b, ring ); + grb::eWiseMul( b, -1.0, x, Ring() ); + out.square_norm_diff = nz_zero; + grb::dot( out.square_norm_diff, b, b, Ring() ); // output - out.pinnedVector = std::unique_ptr< PinnedVector< double > >( new PinnedVector< double >( x, SEQUENTIAL ) ); + out.pinnedVector.reset( new PinnedVector< NonzeroType >( x, SEQUENTIAL ) ); // finish timing - const double time_taken { timer.time() }; - out.times.postamble = time_taken; + + // write measurements into CSV files + if( in.hpcg_log ) { + hpcg_csv.write_to_file( in.hpcg_csv.data() ); + } + if( in.mg_log ) { + mg_csv.write_to_file( in.mg_csv.data() ); + } + out.times.postamble = Stw::nano2Milli( timer.stop() ); } +#define thcout ( std::cout << TEXT_HIGHLIGHT ) + /** - * @brief Parser the command-line arguments to extract the simulation information and checks they are valid. + * Parser the command-line arguments to extract the simulation information and checks they are valid. */ static void parse_arguments( simulation_input &, size_t &, double &, int, char ** ); int main( int argc, char ** argv ) { simulation_input sim_in; size_t test_outer_iterations; - double max_residual_norm; + double max_diff_norm; - parse_arguments( sim_in, test_outer_iterations, max_residual_norm, argc, argv ); + parse_arguments( sim_in, test_outer_iterations, max_diff_norm, argc, argv ); thcout << "System size x: " << sim_in.nx << std::endl; thcout << "System size y: " << sim_in.ny << std::endl; thcout << "System size z: " << sim_in.nz << std::endl; + thcout << "Coarsener: " << ( sim_in.use_average_coarsener ? "average" : + "single point sampler" ) << std::endl; thcout << "System max coarsening levels " << sim_in.max_coarsening_levels << std::endl; - thcout << "Test repetitions: " << sim_in.test_repetitions << std::endl; + thcout << "Test repetitions: " << sim_in.inner_test_repetitions << std::endl; thcout << "Max iterations: " << sim_in.max_iterations << std::endl; - thcout << "Direct launch: " << std::boolalpha << sim_in.evaluation_run << std::noboolalpha << std::endl; - thcout << "No conditioning: " << std::boolalpha << sim_in.no_preconditioning << std::noboolalpha << std::endl; + thcout << "Is evaluation run: " << std::boolalpha << sim_in.evaluation_run + << std::noboolalpha << std::endl; + thcout << "Conditioning: " << std::boolalpha << !sim_in.no_preconditioning + << std::noboolalpha << std::endl; thcout << "Smoother steps: " << sim_in.smoother_steps << std::endl; thcout << "Test outer iterations: " << test_outer_iterations << std::endl; - thcout << "Maximum norm for residual: " << max_residual_norm << std::endl; + thcout << "Maximum norm for residual: " << max_diff_norm << std::endl; // the output struct struct output out; // set standard exit code - grb::RC rc { SUCCESS }; + grb::RC rc = SUCCESS; // launch estimator (if requested) if( sim_in.evaluation_run ) { grb::Launcher< AUTOMATIC > launcher; + // run just one inner iteration for evaluation purposes + sim_in.inner_test_repetitions = 1; + thcout << "beginning evaluation run..." << std::endl; rc = launcher.exec( &grbProgram, sim_in, out, true ); - if( rc == SUCCESS ) { - sim_in.test_repetitions = out.test_repetitions; - } else { - thcout << "launcher.exec returns with non-SUCCESS error code " << grb::toString( rc ) << std::endl; - std::exit( -1 ); - } + ASSERT_RC_SUCCESS( rc ); + ASSERT_EQ( out.inner_test_repetitions, 1 ); + // compute number of inner repetitions to achieve at least 1s duration + sim_in.inner_test_repetitions = static_cast< size_t >( 1000.0 / out.times.useful ) + 1; + thcout << "Evaluation run" << std::endl + << " computed residual: " << out.cg_out.norm_residual << std::endl + << " iterations: " << out.cg_out.iterations << std::endl + << " time taken (ms): " << out.times.useful << std::endl + << " deduced inner repetitions for 1s duration: " << sim_in.inner_test_repetitions + << std::endl; } // launch full benchmark grb::Benchmarker< AUTOMATIC > benchmarker; + thcout << "beginning test run..." << std::endl; rc = benchmarker.exec( &grbProgram, sim_in, out, 1, test_outer_iterations, true ); ASSERT_RC_SUCCESS( rc ); - thcout << "Benchmark completed successfully and took " << out.performed_iterations << " iterations to converge with residual " << out.residual << std::endl; - - if( ! out.pinnedVector ) { - thcerr << "no output vector to inspect" << std::endl; - } else { - const PinnedVector< double > &solution { *out.pinnedVector }; - thcout << "Size of x is " << solution.size() << std::endl; - if( solution.size() > 0 ) { - print_vector( solution, 30, "SOLUTION" ); - } else { - thcerr << "ERROR: solution contains no values" << std::endl; - } - } - ASSERT_RC_SUCCESS( out.error_code ); - - double residual_norm { sqrt( out.square_norm_diff ) }; - thcout << "Residual norm: " << residual_norm << std::endl; - - ASSERT_LT( residual_norm, max_residual_norm ); + thcout << "completed successfully!" << std::endl + << " final residual: " << out.cg_out.norm_residual << std::endl + << " solver iterations: " << out.cg_out.iterations << std::endl + << " total time (ms): " << out.times.useful << std::endl; + + // check result vector, stored inside a pinned vector + ASSERT_TRUE( out.pinnedVector ); + const PinnedVector< double > & solution = *out.pinnedVector; + ASSERT_EQ( solution.size(), sim_in.nx * sim_in.ny * sim_in.nz ); + + // check norm of solution w.r.t. expected solution (i.e. vector of all 1) + double diff_norm = sqrt( out.square_norm_diff ); + thcout << "Norm of difference vector: | - | = " + << diff_norm << std::endl; + ASSERT_LT( diff_norm, max_diff_norm ); thcout << "Test OK" << std::endl; return 0; } -static void parse_arguments( simulation_input & sim_in, size_t & outer_iterations, double & max_residual_norm, int argc, char ** argv ) { +static const char * const empty = ""; +static void parse_arguments( simulation_input & sim_in, size_t & outer_iterations, + double & max_diff_norm, int argc, char ** argv ) { argument_parser parser; + const char *hpcg_csv, *mg_csv; + parser.add_optional_argument( "--nx", sim_in.nx, PHYS_SYSTEM_SIZE_DEF, "physical system size along x" ) .add_optional_argument( "--ny", sim_in.ny, PHYS_SYSTEM_SIZE_DEF, "physical system size along y" ) .add_optional_argument( "--nz", sim_in.nz, PHYS_SYSTEM_SIZE_DEF, "physical system size along z" ) - .add_optional_argument( "--max_coarse-levels", sim_in.max_coarsening_levels, DEF_COARSENING_LEVELS, - "maximum level for coarsening; 0 means no coarsening; note: actual " - "level may be limited" + .add_optional_argument( "--max-coarse-levels", sim_in.max_coarsening_levels, MAX_COARSENING_LEVELS, + "maximum level for coarsening; 0 means no coarsening; note: actual level may be limited" " by the minimum system dimension" ) - .add_optional_argument( "--test-rep", sim_in.test_repetitions, grb::config::BENCHMARKING::inner(), "consecutive test repetitions before benchmarking" ) - .add_optional_argument( "--init-iter", outer_iterations, grb::config::BENCHMARKING::outer(), "test repetitions with complete initialization" ) - .add_optional_argument( "--max_iter", sim_in.max_iterations, MAX_ITERATIONS_DEF, "maximum number of HPCG iterations" ) - .add_optional_argument( "--max-residual-norm", max_residual_norm, MAX_NORM, - "maximum norm for the residual to be acceptable (does NOT limit " - "the execution of the algorithm)" ) - .add_optional_argument( "--smoother-steps", sim_in.smoother_steps, SMOOTHER_STEPS_DEF, "number of pre/post-smoother steps; 0 disables smoothing" ) + .add_optional_argument( "--inner-iterations", sim_in.inner_test_repetitions, 1, + "consecutive test repetitions before benchmarking" ) + .add_optional_argument( "--outer-iterations", outer_iterations, 1, + "test repetitions with complete initialization" ) + .add_optional_argument( "--max-cg-iterations", sim_in.max_iterations, MAX_ITERATIONS_DEF, + "maximum number of CG iterations" ) + .add_optional_argument( "--max-difference-norm", max_diff_norm, MAX_NORM, "maximum acceptable" + " norm | - | (does NOT limit the execution of the algorithm)" ) + .add_optional_argument( "--smoother-steps", sim_in.smoother_steps, SMOOTHER_STEPS_DEF, + "number of pre/post-smoother steps; 0 disables smoothing" ) .add_option( "--evaluation-run", sim_in.evaluation_run, false, - "launch single run directly, without benchmarker (ignore " - "repetitions)" ) - .add_option( "--no-preconditioning", sim_in.no_preconditioning, false, "do not apply pre-conditioning via multi-grid V cycle" ); + "launch single run directly, without benchmarker (ignore repetitions)" ) + .add_option( "--no-preconditioning", sim_in.no_preconditioning, false, + "do not apply pre-conditioning via multi-grid V cycle" ) + .add_optional_argument( "--hpcg-csv", hpcg_csv, empty, + "file for HPCG run measurements (overwrites any previous)" ) + .add_optional_argument( "--mg-csv", mg_csv, empty, + "file for Multigrid run measurements (overwrites any previous)" ) + .add_option( "--use-average-coarsener", sim_in.use_average_coarsener, false, + "coarsen by averaging instead of by sampling a single point (slower, but more accurate)" ); parser.parse( argc, argv ); - // check for valid values - size_t ssize { std::max( next_pow_2( sim_in.nx ), PHYS_SYSTEM_SIZE_MIN ) }; - if( ssize != sim_in.nx ) { - std::cout << "Setting system size x to " << ssize << " instead of " << sim_in.nx << std::endl; - sim_in.nx = ssize; - } - ssize = std::max( next_pow_2( sim_in.ny ), PHYS_SYSTEM_SIZE_MIN ); - if( ssize != sim_in.ny ) { - std::cout << "Setting system size y to " << ssize << " instead of " << sim_in.ny << std::endl; - sim_in.ny = ssize; - } - ssize = std::max( next_pow_2( sim_in.nz ), PHYS_SYSTEM_SIZE_MIN ); - if( ssize != sim_in.nz ) { - std::cout << "Setting system size z to " << ssize << " instead of " << sim_in.nz << std::endl; - sim_in.nz = ssize; - } if( sim_in.max_coarsening_levels > MAX_COARSENING_LEVELS ) { - std::cout << "Setting max coarsening level to " << MAX_COARSENING_LEVELS << " instead of " << sim_in.max_coarsening_levels << std::endl; - sim_in.max_coarsening_levels = MAX_COARSENING_LEVELS; + std::cerr << "ERROR: max coarsening level is " << sim_in.max_coarsening_levels << + "; at most " << MAX_COARSENING_LEVELS << " is allowed" << std::endl; + std::exit( -1 ); } - if( sim_in.test_repetitions == 0 ) { - std::cerr << "ERROR no test runs selected: set \"--test-rep >0\"" << std::endl; + if( sim_in.inner_test_repetitions == 0 ) { + std::cerr << "ERROR no test runs selected: set \"--inner-iterations\" > 0" << std::endl; std::exit( -1 ); } if( sim_in.max_iterations == 0 ) { - std::cout << "Setting number of iterations to 1" << std::endl; - sim_in.max_iterations = 1; + std::cerr << "ERROR no CG iterations selected: set \"--max-cg-iterations > 0\"" << std::endl; + std::exit( -1 ); + } + + // check sizes + const size_t max_system_divider = 1 << sim_in.max_coarsening_levels; + for( size_t s : { sim_in.nx, sim_in.ny, sim_in.nz } ) { + std::lldiv_t div_res = std::div( static_cast< long long >( s ), + static_cast< long long >( max_system_divider ) ); + if( div_res.rem != 0 ) { + std::cerr << "ERROR: system size " << s << " cannot be coarsened " << sim_in.max_coarsening_levels + << " times because it is not exactly divisible" << std::endl; + std::exit( -1 ); + } + if( div_res.quot < static_cast< long long >( PHYS_SYSTEM_SIZE_MIN ) ) { + std::cerr << "ERROR: system size " << s << " cannot be coarsened " << sim_in.max_coarsening_levels + << " times because it is too small" << std::endl; + std::exit( -1 ); + } + if( div_res.quot % 2 != 0 ) { + std::cerr << "ERROR: the coarsest size " << div_res.rem << " is not even" << std::endl; + std::exit( -1 ); + } } -} + // check output CSV file names + size_t len = std::strlen( hpcg_csv ); + if( ( sim_in.hpcg_log = len > 0 ) ) { + if( len > MAX_CSV_PATH_LENGTH ) { + std::cerr << "HPCG CSV file name is too long!" << std::endl; + std::exit( -1 ); + } + std::strncpy( sim_in.hpcg_csv.data(), hpcg_csv, MAX_CSV_PATH_LENGTH ); + } + len = std::strlen( mg_csv ); + if( ( sim_in.mg_log = len > 0 ) ) { + if( len > MAX_CSV_PATH_LENGTH ) { + std::cerr << "HPCG CSV file name is too long!" << std::endl; + std::exit( -1 ); + } + std::strncpy( sim_in.mg_csv.data(), mg_csv, MAX_CSV_PATH_LENGTH ); + } +} diff --git a/tests/utils/matrix_generators.hpp b/tests/utils/matrix_generators.hpp index be45890c6..65fe789be 100644 --- a/tests/utils/matrix_generators.hpp +++ b/tests/utils/matrix_generators.hpp @@ -35,6 +35,7 @@ #include #include +#include namespace grb { @@ -114,28 +115,6 @@ namespace grb { namespace internal { - /** - * Computes the difference between \a a and \a b and returns it as the given - * type \a DiffT. - * - * Raises an exception if \a DiffT cannot store the difference. - */ - template< - typename SizeT, - typename DiffT - > - DiffT compute_distance( - const SizeT a, - const SizeT b - ) { - const SizeT diff = std::max( a, b ) - std::min( a, b ); - if( diff > static_cast< SizeT >( std::numeric_limits< DiffT >::max() ) ) { - throw std::range_error( "cannot represent difference" ); - } - DiffT result = static_cast< DiffT >( diff ); - return a >= b ? result : -result ; - } - /** * Stores the coordinate for a generator of diagonal matrices. */ @@ -240,9 +219,8 @@ namespace grb { typename SelfType::difference_type operator-( const SelfType &other ) const { - return internal::compute_distance< - size_t, typename SelfType::difference_type - >( this->_v.coord, other._v.coord ); + return compute_signed_distance< typename SelfType::difference_type, + size_t >( this->_v.coord, other._v.coord ); } typename SelfType::pointer operator->() { return &_v; } @@ -461,9 +439,8 @@ namespace grb { const size_t this_position = coords_to_linear( _v.size, _v.row, _v.col ); const size_t other_position = coords_to_linear( other._v.size, other._v.row, other._v.col ); - return internal::compute_distance< - size_t, typename SelfType::difference_type - >( this_position, other_position ); + return compute_signed_distance< typename SelfType::difference_type, + size_t >( this_position, other_position ); } typename SelfType::pointer operator->() { return &_v; } @@ -584,9 +561,8 @@ namespace grb { typename SelfType::difference_type operator-( const SelfType &other ) const { - return internal::compute_distance< - size_t, typename SelfType::difference_type - >( this->_v.offset, other._v.offset ); + return compute_signed_distance< typename SelfType::difference_type, + size_t >( this->_v.offset, other._v.offset ); } typename SelfType::pointer operator->() { return &_v; }