diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt index 714e6034b..5b48b9e55 100644 --- a/include/CMakeLists.txt +++ b/include/CMakeLists.txt @@ -219,30 +219,20 @@ install( DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/graphblas/interfaces/" install( TARGETS algorithms EXPORT GraphBLASTargets ) -# generate the spblas header with the library prefix -configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/transition/spblas.h.in - ${CMAKE_CURRENT_BINARY_DIR}/transition/spblas.h @ONLY -) - # this target lists the transition path headers # these are plain C headers and do not have any dependences -add_library( transition_headers INTERFACE ) +add_library( transition INTERFACE ) target_include_directories( - transition_headers INTERFACE + transition INTERFACE $ - $ $ ) -install( FILES ${CMAKE_CURRENT_BINARY_DIR}/transition/spblas.h - DESTINATION "${INCLUDE_INSTALL_DIR}/transition" -) - install( DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/transition/" - DESTINATION "${INCLUDE_INSTALL_DIR}/transition" + DESTINATION "${GRB_INCLUDE_INSTALL_DIR}/../transition/" FILES_MATCHING REGEX "${HEADERS_REGEX}" ) -install( TARGETS transition_headers EXPORT GraphBLASTargets ) +install( TARGETS transition EXPORT GraphBLASTargets ) diff --git a/include/graphblas.hpp b/include/graphblas.hpp index 7a61f9599..a539a5c0d 100644 --- a/include/graphblas.hpp +++ b/include/graphblas.hpp @@ -38,11 +38,6 @@ * -# generalised sparse linear algebra, \ref GraphBLAS; * -# vertex-centric programming, \ref Pregel. * - * Additionally, to ease integration with existing software, ALP defines - * so-called \ref TRANS libraries, which presently includes (partial) - * implementations of the \ref SPARSEBLAS and \ref SPBLAS (de-facto) standards, - * as well as an interface for numerical \ref TRANS_SOLVERS. - * * Several other programming interfaces are under design at present. * * For authors who contributed to ALP, please see the NOTICE file. diff --git a/include/graphblas/algorithms/conjugate_gradient.hpp b/include/graphblas/algorithms/conjugate_gradient.hpp index 27616ec77..b87cfa080 100644 --- a/include/graphblas/algorithms/conjugate_gradient.hpp +++ b/include/graphblas/algorithms/conjugate_gradient.hpp @@ -27,7 +27,7 @@ #define _H_GRB_ALGORITHMS_CONJUGATE_GRADIENT #include -#include +#include #include #include @@ -144,8 +144,7 @@ namespace grb { * performance semantics, with the exception of getters such as #grb::nnz, are * specific to the backend selected during compilation. */ - template< - Descriptor descr = descriptors::no_operation, + template< Descriptor descr = descriptors::no_operation, typename IOType, typename ResidualType, typename NonzeroType, @@ -155,20 +154,19 @@ namespace grb { grb::identities::zero, grb::identities::one >, class Minus = operators::subtract< IOType >, - class Divide = operators::divide< IOType >, - typename RSI, typename NZI, Backend backend + class Divide = operators::divide< IOType > > grb::RC conjugate_gradient( - grb::Vector< IOType, backend > &x, - const grb::Matrix< NonzeroType, backend, RSI, RSI, NZI > &A, - const grb::Vector< InputType, backend > &b, + grb::Vector< IOType > &x, + const grb::Matrix< NonzeroType > &A, + const grb::Vector< InputType > &b, const size_t max_iterations, ResidualType tol, size_t &iterations, ResidualType &residual, - grb::Vector< IOType, backend > &r, - grb::Vector< IOType, backend > &u, - grb::Vector< IOType, backend > &temp, + grb::Vector< IOType > &r, + grb::Vector< IOType > &u, + grb::Vector< IOType > &temp, const Ring &ring = Ring(), const Minus &minus = Minus(), const Divide ÷ = Divide() @@ -326,7 +324,7 @@ namespace grb { assert( ret == SUCCESS ); if( ret == SUCCESS ) { - tol *= std::sqrt( grb::utils::is_complex< IOType >::modulus( bnorm ) ); + tol *= sqrt( grb::utils::is_complex< IOType >::modulus( bnorm ) ); } size_t iter = 0; @@ -419,7 +417,7 @@ namespace grb { // return correct error code if( ret == SUCCESS ) { - if( std::sqrt( residual ) >= tol ) { + if( sqrt( residual ) >= tol ) { // did not converge within iterations return FAILED; } diff --git a/include/graphblas/algorithms/triangle_count.hpp b/include/graphblas/algorithms/triangle_count.hpp new file mode 100644 index 000000000..6cb9f2ca7 --- /dev/null +++ b/include/graphblas/algorithms/triangle_count.hpp @@ -0,0 +1,249 @@ + +/* + * Copyright 2023 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file + * + * Implements the triangle counting algorithm, using different methods. + * + * @author Benjamin Lozes + * @date: May 10th, 2023 + */ + +#ifndef _H_GRB_TRIANGLE_COUNT +#define _H_GRB_TRIANGLE_COUNT + +#include +#include +#include + +#include + +#include + +namespace grb { + + namespace algorithms { + + enum class TriangleCountAlgorithm { Burkhardt, Cohen, Sandia_TT }; + + std::map< TriangleCountAlgorithm, std::string > TriangleCountAlgorithmNames = { + { TriangleCountAlgorithm::Burkhardt, "Burkhardt" }, + { TriangleCountAlgorithm::Cohen, "Cohen" }, + { TriangleCountAlgorithm::Sandia_TT, "Sandia_TT" } + }; + + template< + class Semiring, class MulMonoid, class SumMonoid, + Descriptor descr_mxm = descriptors::no_operation, + Descriptor descr_ewa = descriptors::no_operation, + Descriptor descr_reduce = descriptors::no_operation, + typename D1, typename RIT1, typename CIT1, typename NIT1, + typename D2, typename RIT2, typename CIT2, typename NIT2, + typename D3, typename RIT3, typename CIT3, typename NIT3, + typename D4, typename RIT4, typename CIT4, typename NIT4, + typename D5, typename RIT5, typename CIT5, typename NIT5, + typename D6 + > + RC triangle_count_generic( + size_t & count, + Matrix< D1, config::default_backend, RIT1, CIT1, NIT1 > & MXM_out, + const Matrix< D2, config::default_backend, RIT2, CIT2, NIT2 > & MXM_lhs, + const Matrix< D3, config::default_backend, RIT3, CIT3, NIT3 > & MXM_rhs, + Matrix< D4, config::default_backend, RIT4, CIT4, NIT4 > & EWA_out, + const Matrix< D5, config::default_backend, RIT5, CIT5, NIT5 > & EWA_rhs, + const D6 div_factor, + const Semiring mxm_semiring = Semiring(), + const MulMonoid ewiseapply_monoid = MulMonoid(), + const SumMonoid sumreduce_monoid = SumMonoid() + ) { + if( ( &MXM_out == &MXM_lhs ) || ( &MXM_out == &MXM_rhs ) ) { + return ILLEGAL; + } + + RC rc = SUCCESS; + + // Compute MXM_out = Mlhs * Mrhs + rc = rc ? rc : mxm< descr_mxm >( MXM_out, MXM_lhs, MXM_rhs, mxm_semiring, Phase::RESIZE ); + rc = rc ? rc : mxm< descr_mxm >( MXM_out, MXM_lhs, MXM_rhs, mxm_semiring, Phase::EXECUTE ); + + // Compute MXM_out .*= EWA_rhs + // FIXME: Replace by a foldl( Matrix[in,out], Matrix[in], Monoid ) - not implemented yet + // Will then become: + // rc = rc ? rc : eWiseApply< descr_ewa >( MXM_out, MXM_out, EWA_rhs, ewiseapply_monoid, Phase::RESIZE ); + // rc = rc ? rc : eWiseApply< descr_ewa >( MXM_out, MXM_out, EWA_rhs, ewiseapply_monoid, Phase::EXECUTE ); + // Instead of: + rc = rc ? rc : eWiseApply< descr_ewa >( EWA_out, MXM_out, EWA_rhs, ewiseapply_monoid, Phase::RESIZE ); + rc = rc ? rc : eWiseApply< descr_ewa >( EWA_out, MXM_out, EWA_rhs, ewiseapply_monoid, Phase::EXECUTE ); + + // Compute a sum reduction over into + count = static_cast< size_t >( 0 ); + rc = rc ? rc : foldl< descr_reduce >( count, EWA_out, sumreduce_monoid ); + + // Apply the div_factor to the reduction result + count /= div_factor; + + return rc; + } + + /** + * Given a graph, indicates how many triangles are contained within. + * + * @tparam D The type of the matrix non-zero values. + * + * @param[out] count The number of triangles. + * Any prior contents will be ignored. + * @param[in] A The input graph. + * @param[in,out] MXM_out Buffer matrix with the same dimensions as the input + * graph. Any prior contents will be ignored. + * @param[in] L Lower triangular matrix of the input graph (optional) + * @param[in] U Lower triangular matrix of the input graph (optional) + * + * + * @returns #grb::SUCCESS When the computation completes successfully. + * @returns #grb::MISMATCH If the dimensions of the input matrices/buffers + * are incompatible. + * @returns #grb::ILLEGAL If the given algorithm does not exist. + * @returns #grb::PANIC If an unrecoverable error has been encountered. The + * output as well as the state of ALP/GraphBLAS is + * undefined. + * + * \par Performance semantics + * + * -# This function does not allocate nor free dynamic memory, nor shall it + * make any system calls. + * + * For performance semantics regarding work, inter-process data movement, + * intra-process data movement, synchronisations, and memory use, please see + * the specification of the ALP primitives this function relies on. These + * performance semantics, with the exception of getters such as #grb::nnz, are + * specific to the backend selected during compilation. + */ + template< + Descriptor descr = descriptors::no_operation, + typename D1, typename RIT1, typename CIT1, typename NIT1, + typename D2, typename RIT2, typename CIT2, typename NIT2, + typename D3, typename RIT3, typename CIT3, typename NIT3, + typename D4, typename RIT4, typename CIT4, typename NIT4, + class Semiring = Semiring< operators::add< D1 >, + operators::mul< D1 >, + identities::zero, + identities::one >, + class MulMonoid = Monoid< operators::mul< D1 >, + identities::one >, + class SumMonoid = Monoid< operators::add< size_t, D1, size_t >, + identities::zero > + > + RC triangle_count( + const TriangleCountAlgorithm algo, + size_t & count, + const Matrix< D1, config::default_backend, RIT1, CIT1, NIT1 > & A, + Matrix< D2, config::default_backend, RIT2, CIT2, NIT2 > & MXM_out, + Matrix< D3, config::default_backend, RIT3, CIT3, NIT3 > & EWA_out, + Matrix< D4, config::default_backend, RIT4, CIT4, NIT4 > & L = { 0, 0 }, + Matrix< D4, config::default_backend, RIT4, CIT4, NIT4 > & U = { 0, 0 } + ) { + // Static assertions + static_assert( std::is_integral< D1 >::value, "Type D1 must be integral" ); + + // Sanity checks + if( nrows( A ) != ncols( A ) ) { + std::cerr << "Matrix A must be square" << std::endl; + return MISMATCH; + } + if( ncols( L ) != nrows( L ) ) { + std::cerr << "Matrix L must be square" << std::endl; + return MISMATCH; + } + if( nrows( A ) != ncols( L ) ) { + std::cerr << "Matrices A and L must have the same dimensions" << std::endl; + return MISMATCH; + } + if( ncols( U ) != nrows( U ) ) { + std::cerr << "Matrix U must be square" << std::endl; + return MISMATCH; + } + if( nrows( A ) != ncols( U ) ) { + std::cerr << "Matrices A and U must have the same dimensions" << std::endl; + return MISMATCH; + } + if( ncols( MXM_out ) != nrows( MXM_out ) ) { + std::cerr << "Matrix MXM_out must be square" << std::endl; + return MISMATCH; + } + if( nrows( A ) != ncols( MXM_out ) ) { + std::cerr << "Matrices A and MXM_out must have the same dimensions" << std::endl; + return MISMATCH; + } + if( ncols( EWA_out ) != nrows( EWA_out ) ) { + std::cerr << "Matrix EWA_out must be square" << std::endl; + return MISMATCH; + } + if( nrows( A ) != ncols( EWA_out ) ) { + std::cerr << "Matrices A and EWA_out must have the same dimensions" << std::endl; + return MISMATCH; + } + + // Dispatch to the appropriate algorithm + switch( algo ) { + case TriangleCountAlgorithm::Burkhardt: { + return triangle_count_generic< + Semiring, MulMonoid, SumMonoid, + descr | descriptors::transpose_right + >( count, MXM_out, A, A, EWA_out, A, 6UL ); + } + + case TriangleCountAlgorithm::Cohen: { + if( nrows( L ) == 0 || ncols( L ) == 0 ) { + std::cerr << "Matrix L must be provided for the Cohen algorithm" << std::endl; + return MISMATCH; + } + if( nrows( U ) == 0 || ncols( U ) == 0 ) { + std::cerr << "Matrix U must be provided for the Cohen algorithm" << std::endl; + return MISMATCH; + } + + return triangle_count_generic< + Semiring, MulMonoid, SumMonoid + >( count, MXM_out, L, U, EWA_out, A, 2UL ); + } + + case TriangleCountAlgorithm::Sandia_TT: { + if( ( nrows( U ) == 0 || ncols( U ) == 0 ) && ( nrows( L ) == 0 || ncols( L ) == 0 ) ) { + std::cerr << "Matrix L or U must be provided for the Sandia_TT algorithm" << std::endl; + return MISMATCH; + } + + const Matrix< D4, config::default_backend, RIT4, CIT4, NIT4 > & T = ( nrows( U ) == 0 || ncols( U ) == 0 ) ? L : U; + return triangle_count_generic< + Semiring, MulMonoid, SumMonoid + >( count, MXM_out, T, T, EWA_out, T, 1UL ); + } + + default: + std::cerr << "Unknown TriangleCountAlgorithm enum value" << std::endl; + return ILLEGAL; + } + + return SUCCESS; + } + + } // namespace algorithms + +} // namespace grb + +#endif // _H_GRB_TRIANGLE_COUNT diff --git a/include/graphblas/backends.hpp b/include/graphblas/backends.hpp index 653348112..3fd2f0ec1 100644 --- a/include/graphblas/backends.hpp +++ b/include/graphblas/backends.hpp @@ -29,9 +29,6 @@ #ifndef _H_GRB_BACKENDS #define _H_GRB_BACKENDS -#include -#include - namespace grb { @@ -221,42 +218,6 @@ namespace grb { }; - /** - * Converts a backend identifier to a human-readable string. - * - * @param[in] backend The backend whose string to return. - * - * @return The name of the given \a backend as a C++ string. - */ - static inline std::string toString( const enum grb::Backend backend ) { - switch( backend ) { - case grb::Backend::reference: return "reference"; - case grb::Backend::reference_omp: return "reference_omp"; - case grb::Backend::hyperdags: return "hyperdags"; - case grb::Backend::nonblocking: return "nonblocking"; - case grb::Backend::shmem1D: return "shmem1D"; - case grb::Backend::NUMA1D: return "NUMA1D"; - case grb::Backend::GENERIC_BSP: return "GENERIC_BSP"; - case grb::Backend::BSP1D: return "BSP1D"; - case grb::Backend::doublyBSP1D: return "doublyBSP1D"; - case grb::Backend::BSP2D: return "BSP2D"; - case grb::Backend::autoBSP: return "autoBSP"; - case grb::Backend::optBSP: return "optBSP"; - case grb::Backend::hybrid: return "hybrid"; - case grb::Backend::hybridSmall: return "hybridSmall"; - case grb::Backend::hybridMid: return "hybridMid"; - case grb::Backend::hybridLarge: return "hybridLarge"; - case grb::Backend::minFootprint: return "minFootprint"; - case grb::Backend::banshee: return "banshee"; - case grb::Backend::banshee_ssr: return "banshee_ssr"; - default: - const int backend_id = static_cast< int >( backend ); - std::cerr << "Warning, std::string( const grb::Backend ): unknown backend " - << backend_id << " encountered, please submit a bug report.\n"; - return "unknown_backend(id=" + std::to_string( backend_id ) + ")"; - } - } - } // namespace grb #endif diff --git a/include/graphblas/base/vector.hpp b/include/graphblas/base/vector.hpp index 5eb4ad83a..c00ca6e53 100644 --- a/include/graphblas/base/vector.hpp +++ b/include/graphblas/base/vector.hpp @@ -236,28 +236,6 @@ namespace grb { (void) n; } - /** - * Creates a dense ALP/GraphBLAS vector. - * - * This constructor takes an initialiser list of values that will be copied - * into this vector. The size of the vector will be equal to the number of - * elements in the initialiser list. - * - * For backends with more than one user process, the size of \a vals is the - * global vector size, and the contents of \a vals are processed using - * sequential I/O semantics. - * - * @see #grb::IOMode For the difference between sequential and parallel I/O - * modes. - * - * \note There is only a difference if there are more than one user process. - * - * @param[in] vals The values to be copied into this vector. - */ - Vector( const std::initializer_list< D > &vals ) { - (void) vals; - } - /** * Move constructor. * diff --git a/include/graphblas/bsp1d/vector.hpp b/include/graphblas/bsp1d/vector.hpp index a0d44829f..1e85db74e 100644 --- a/include/graphblas/bsp1d/vector.hpp +++ b/include/graphblas/bsp1d/vector.hpp @@ -2377,67 +2377,6 @@ namespace grb { #endif } - /** - * Constructs a BSP1D vector. - * - * @see Full description in base backend. - * - * \internal - * This constructor initialises the local vector and synchronises the global - * vector once. - * - * TODO rewrite below logic using an iterator filter (GitHub PR 233, issue - * 228) - * \endinternal - */ - Vector( const std::initializer_list< D > &vals ) - : Vector( vals.size(), vals.size() ) - { -#ifdef _DEBUG - std::cerr << "In Vector< BSP1D >::Vector( initializer_list ) constructor\n"; -#endif - RC ret = SUCCESS; - const size_t n = vals.size(); - const internal::BSP1D_Data &data = internal::grb_BSP1D.cload(); - - // Set all the local values - for( size_t i = 0; i < vals.size(); i++ ) { - const D val = *( vals.begin() + i ); - - // check if local - // if( (i / x._b) % data.P != data.s ) { - if( data.s != - internal::Distribution< BSP1D >::global_index_to_process_id( - i, n, data.P - ) - ) { - continue; - } - - // local, so translate index and perform requested operation - const size_t local_index = - internal::Distribution< BSP1D >::global_index_to_local( i, n, data.P ); -#ifdef _DEBUG - std::cout << data.s << ", grb::setElement translates global index " - << i << " to " << local_index << "\n"; -#endif - ret = ret - ? ret - : setElement( _local, val, local_index, EXECUTE ); - } - - // Synchronise once between all processes - if( SUCCESS != - collectives< BSP1D >::allreduce( ret, operators::any_or< RC >() ) - ) { - throw std::runtime_error( "grb::Vector< BSP1D >::Vector( initializer_list ): " - "collective::allreduce failed." ); - } - - // on successful execute, sync new nnz count - updateNnz(); - } - /** * Copy constructor. * diff --git a/include/graphblas/hyperdags/io.hpp b/include/graphblas/hyperdags/io.hpp index 09b16634a..db1f09e54 100644 --- a/include/graphblas/hyperdags/io.hpp +++ b/include/graphblas/hyperdags/io.hpp @@ -180,8 +180,7 @@ namespace grb { typename T > RC set( - Vector< DataType, hyperdags, Coords > &x, - const T val, + Vector< DataType, hyperdags, Coords > &x, const T val, const Phase &phase = EXECUTE, const typename std::enable_if< !grb::is_object< DataType >::value && diff --git a/include/graphblas/hyperdags/vector.hpp b/include/graphblas/hyperdags/vector.hpp index 02c0f7734..5f422399e 100644 --- a/include/graphblas/hyperdags/vector.hpp +++ b/include/graphblas/hyperdags/vector.hpp @@ -161,15 +161,6 @@ namespace grb { register_vector(); } - Vector( const std::initializer_list< T > vals ) : vector( vals ) - { -#ifdef _DEBUG - std::cout << "In Vector< hyperdags >::Vector( initializer_list )" - << " constructor\n"; -#endif - register_vector(); - } - ~Vector() { #ifdef _DEBUG std::cout << "Vector (hyperdags) destructor\n"; diff --git a/include/graphblas/nonblocking/blas1.hpp b/include/graphblas/nonblocking/blas1.hpp index f970da41a..f9f14cafc 100644 --- a/include/graphblas/nonblocking/blas1.hpp +++ b/include/graphblas/nonblocking/blas1.hpp @@ -548,7 +548,7 @@ namespace grb { typename Monoid::D3 global = monoid.template getIdentity< typename Monoid::D3 >(); - size_t local_reduced_size = NONBLOCKING::numThreads() * + size_t local_reduced_size = sysconf( _SC_NPROCESSORS_ONLN ) * config::CACHE_LINE_SIZE::value(); IOType local_reduced[ local_reduced_size ]; @@ -10550,7 +10550,7 @@ namespace grb { typename AddMonoid::D3 reduced = addMonoid.template getIdentity< typename AddMonoid::D3 >(); - size_t reduced_size = NONBLOCKING::numThreads() * + size_t reduced_size = sysconf( _SC_NPROCESSORS_ONLN ) * config::CACHE_LINE_SIZE::value(); typename AddMonoid::D3 array_reduced[ reduced_size ]; diff --git a/include/graphblas/nonblocking/coordinates.hpp b/include/graphblas/nonblocking/coordinates.hpp index ad9e3c670..bcb4cf42a 100644 --- a/include/graphblas/nonblocking/coordinates.hpp +++ b/include/graphblas/nonblocking/coordinates.hpp @@ -232,21 +232,6 @@ namespace grb { } } - /** - * Sets this data structure to a dummy placeholder for a dense structure. - * - * This structure will be immutable, and does not support the majority of - * operations this class defines; use dense coordinates with care. - */ - void setDense( const size_t dim ) noexcept { - _assigned = nullptr; - _stack = nullptr; - _buffer = nullptr; - _n = dim; - _cap = dim; - _buf = 0; - } - inline bool assign( const size_t i ) noexcept { if( _n == _cap ) { return true; @@ -265,7 +250,7 @@ namespace grb { } template< bool maybe_invalid = false > - inline void local_assignAll() noexcept { + inline void local_assignAll( ) noexcept { if( maybe_invalid || _n != _cap ) { if( _assigned != nullptr ) { assert( _stack != nullptr ); @@ -318,21 +303,8 @@ namespace grb { } } - inline void assignAll() noexcept { - // this operates on the global coordinates, not on a local view of it - #pragma omp parallel - { - size_t start, end; - config::OMP::localRange( start, end, 0, _cap ); - for( size_t i = start; i < end; ++i ) { - _assigned[ i ] = true; - _stack[ i ] = i; - } - } - _n = _cap; - } - inline void clear() noexcept { + if( _n == _cap ) { #ifndef NDEBUG if( _assigned == nullptr && _cap > 0 ) { @@ -340,6 +312,7 @@ namespace grb { assert( dense_coordinates_may_not_call_clear ); } #endif + #pragma omp parallel for schedule( dynamic, config::CACHE_LINE_SIZE::value() ) for( size_t i = 0; i < _cap; ++i ) { _assigned[ i ] = false; diff --git a/include/graphblas/nonblocking/matrix.hpp b/include/graphblas/nonblocking/matrix.hpp index 00d0e5ebb..251e2037d 100644 --- a/include/graphblas/nonblocking/matrix.hpp +++ b/include/graphblas/nonblocking/matrix.hpp @@ -419,32 +419,6 @@ namespace grb { const Matrix< InputType, nonblocking, RIT, CIT, NIT > & ); - // Native interface friends - - friend const grb::Matrix< - D, nonblocking, - ColIndexType, ColIndexType, NonzeroIndexType - > - internal::wrapCRSMatrix< D, ColIndexType, NonzeroIndexType, nonblocking >( - const D *__restrict__ const, - const ColIndexType *__restrict__ const, - const NonzeroIndexType *__restrict__ const, - const size_t, const size_t - ); - - friend grb::Matrix< - D, nonblocking, - ColIndexType, ColIndexType, NonzeroIndexType - > - internal::wrapCRSMatrix< D, ColIndexType, NonzeroIndexType, nonblocking >( - D *__restrict__ const, - ColIndexType *__restrict__ const, - NonzeroIndexType *__restrict__ const, - const size_t, const size_t, const size_t, - char * const, char * const, - D *__restrict__ const - ); - private: diff --git a/include/graphblas/nonblocking/vector.hpp b/include/graphblas/nonblocking/vector.hpp index a70c48521..9b57d8c24 100644 --- a/include/graphblas/nonblocking/vector.hpp +++ b/include/graphblas/nonblocking/vector.hpp @@ -197,46 +197,12 @@ namespace grb { friend class PinnedVector< D, nonblocking >; - // Native interface friends - - template< typename ValueType, Backend backend > - friend Vector< - ValueType, backend, - internal::Coordinates< - config::IMPLEMENTATION< backend >::coordinatesBackend() - > - > internal::wrapRawVector( const size_t n, ValueType *__restrict__ const - raw ); - - template< typename ValueType, Backend backend > - friend const Vector< - ValueType, backend, - internal::Coordinates< - config::IMPLEMENTATION< backend >::coordinatesBackend() - > - > internal::wrapRawVector( const size_t n, const ValueType *__restrict__ const raw ); - private: Vector< D, reference, MyCoordinates > ref; - protected: - - /** - * \internal Internal constructor that wraps around an existing raw dense - * vector. This constructor results in a dense vector whose - * structure is immutable. Any invalid use incurs UB; use with care. - */ - Vector( const size_t n, D *__restrict__ const raw ) : ref( n, raw ) { -#ifdef _DEBUG - std::cerr << "In Vector< nonblocking > constructor that wraps around an " - << "external raw array.\n"; -#endif - } - - public: /** @see Vector::value_type. */ @@ -255,14 +221,8 @@ namespace grb { typedef typename Vector< D, reference, MyCoordinates >::const_iterator const_iterator; - Vector( const size_t n, const size_t nz ) : ref( n, nz ) { - // pipeline execution is not required here as this is a grb::Vector - // declaration -#ifdef _DEBUG - std::cerr << "In Vector< nonblocking >::Vector( size_t, size_t )" - << " constructor\n"; -#endif - } + + Vector( const size_t n, const size_t nz ) : ref( n, nz ) {} Vector( const size_t n ) : Vector( n, n ) { @@ -273,15 +233,6 @@ namespace grb { #endif } - Vector( const std::initializer_list< D > vals ) : ref( vals ) { - // pipeline execution is not required here as this is a grb::Vector - // declaration -#ifdef _DEBUG - std::cerr << "In Vector< nonblocking >::Vector( initializer_list )" - << " constructor\n"; -#endif - } - Vector() : Vector( 0 ) {} Vector( const Vector< D, nonblocking, MyCoordinates > &x ) : diff --git a/include/graphblas/reference/blas2.hpp b/include/graphblas/reference/blas2.hpp index 865f7bc51..40acf6531 100644 --- a/include/graphblas/reference/blas2.hpp +++ b/include/graphblas/reference/blas2.hpp @@ -2573,10 +2573,9 @@ namespace grb { j_start = A.n / 2; assert( A.n > 0 ); while( j_start < A.n && !( - A.CCS.col_start[ j_start ] <= start && - start < A.CCS.col_start[ j_start + 1 ] - ) - ) { + A.CCS.col_start[ j_start ] <= + start && start < A.CCS.col_start[ j_start + 1 ] + ) ) { #ifdef _DEBUG #ifdef _H_GRB_REFERENCE_OMP_BLAS2 #pragma omp critical @@ -2609,32 +2608,33 @@ namespace grb { j_left_range = 0; j_right_range = A.n; j_end = A.n / 2; - while( j_end < A.n && !( + if( j_end < A.CCS.col_start[ A.n ] ) { + while( j_end < A.n && !( A.CCS.col_start[ j_end ] <= end && end < A.CCS.col_start[ j_end + 1 ] - ) - ) { + ) ) { #ifdef _DEBUG #ifdef _H_GRB_REFERENCE_OMP_BLAS2 - #pragma omp critical + #pragma omp critical #endif - std::cout << "\t binary search for " << end << " in [ " << j_left_range - << ", " << j_right_range << " ) = [ " << A.CCS.col_start[ j_left_range ] - << ", " << A.CCS.col_start[ j_right_range ] << " ). " - << "Currently tried and failed at " << j_end << "\n"; -#endif - if( j_right_range == j_left_range ) { - assert( false ); - break; - } else if( A.CCS.col_start[ j_end ] > end ) { - j_right_range = j_end; - } else { - j_left_range = j_end + 1; + std::cout << "\t binary search for " << end << " in [ " << j_left_range + << ", " << j_right_range << " ) = [ " << A.CCS.col_start[ j_left_range ] + << ", " << A.CCS.col_start[ j_right_range ] << " ). " + << "Currently tried and failed at " << j_end << "\n"; +#endif + if( j_right_range == j_left_range ) { + assert( false ); + break; + } else if( A.CCS.col_start[ j_end ] > end ) { + j_right_range = j_end; + } else { + j_left_range = j_end + 1; + } + assert( j_right_range >= j_left_range ); + j_end = j_right_range - j_left_range; + j_end /= 2; + j_end += j_left_range; } - assert( j_right_range >= j_left_range ); - j_end = j_right_range - j_left_range; - j_end /= 2; - j_end += j_left_range; } if( j_start > j_end ) { j_start = j_end; diff --git a/include/graphblas/reference/vector.hpp b/include/graphblas/reference/vector.hpp index fcd2516d8..f0db908b2 100644 --- a/include/graphblas/reference/vector.hpp +++ b/include/graphblas/reference/vector.hpp @@ -244,8 +244,6 @@ namespace grb { friend class PinnedVector< D, BSP1D >; - friend class Vector< D, nonblocking, internal::Coordinates< nonblocking > >; - template< typename ValueType, Backend backend > friend Vector< ValueType, backend, @@ -857,28 +855,6 @@ namespace grb { #endif } - /** - * Constructs a reference vector. - * - * @see Full description in base backend. - */ - Vector( const std::initializer_list< D > &vals ) - : Vector( vals.size(), vals.size() ) - { -#ifdef _DEBUG - std::cerr << "In Vector< reference >::Vector( initializer_list )" - << " constructor\n"; -#endif - -#ifdef _H_GRB_REFERENCE_OMP_VECTOR - #pragma omp parallel for simd -#endif - for( size_t i = 0; i < vals.size(); ++i ) { - _raw[ i ] = *( vals.begin() + i ); - } - _coordinates.assignAll(); - } - /** * The default constructor creates an empty vector and should never be * used explicitly. diff --git a/include/transition/blas_sparse.h b/include/transition/blas_sparse.h index 83e186f8e..63839723e 100644 --- a/include/transition/blas_sparse.h +++ b/include/transition/blas_sparse.h @@ -21,39 +21,6 @@ * This is the ALP implementation of a subset of the NIST Sparse BLAS standard. * While the API is standardised, this header makes some implementation-specific * extensions. - * - * @author A. N. Yzelman - * @date 2023 - */ - -/** - * \defgroup TRANS Transition path - * - * The transition path libraries enable integrating ALP with existing software. - * It operates by exposing several of its functionalities via established C - * interfaces and established data formats in order to facilitate the transition - * of legacy software to ALP. Ideally, users of transition interfaces need only - * re-compile and link their software; in some cases, trivial modifications - * might be required to migrate to transition interfaces, e.g., changing the - * prefix of called functions. - * - * The currently exposed interfaces are: - * - \ref SPARSEBLAS; - * - \ref SPBLAS; and - * - \ref TRANS_SOLVERS. - * - * All of these transition libraries show-case ALP's ability to quickly wrap - * around external APIs, thus simplifying integration of ALP-backed code with - * existing software. We do note, however, that the direct use of the native C++ - * ALP API may lead to higher performance than the use of these transition path - * interfaces, and that in some cases the legacy interface itself is what makes - * achieving such higher performance impossible. - * - * The current transition path interfaces are at an *experimental prototype - * phase*; in particular, not all primitives in a given standard API are - * currently implemented. For \ref SPARSEBLAS in particular, additional support - * or coverage may freely be requested in GitHub issue #14. For other - * interfaces, feel free to open new issues or to contact the maintainers. */ #ifndef _H_ALP_SPARSEBLAS_NIST @@ -65,41 +32,6 @@ extern "C" { #endif -/** - * \defgroup SPARSEBLAS SparseBLAS - * \ingroup TRANS - * - * A SparseBLAS implementation enabled by ALP/GraphBLAS - * - * ALP provides a (presently partial) implementation of the Sparse BLAS standard - * as defined by the BLAS forum and in the following paper: - * - Duff, Iain S., Michael A. Heroux, and Roldan Pozo. "An overview of the - * sparse basic linear algebra subprograms: The new standard from the BLAS - * technical forum." ACM Transactions on Mathematical Software (TOMS) 28(2), - * 2002, pp. 239-267. - * - * We also provide a couple of extensions over this standard, in particular to - * add support for sparse vectors. Such extensions are prefixed by - * EXTBLAS_ and extblas_, such as, for example, - * - #EXTBLAS_dusv_begin and - * - #extblas_sparse_vector. - * This prefix can be configured differently, please refer to the developer - * documentation if looking for this option. - * - * The functionalities defined by the standard of course retain the prefix - * defined by the standard: BLAS_ and blas_, such as, e.g., - * - #BLAS_duscr_begin and - * - #blas_sparse_matrix. - * - * The implementation of this standard is done by mapping back to the equivalent - * ALP/GraphBLAS primitives. By default, ALP builds both sequential and shared- - * memory parallel SparseBLAS libraries. It does so simply by compiling the same - * ALP-based SparseBLAS implementation with a sequential and a shared-memory ALP - * backend, respectively. - * - * @{ - */ - /** * The possible transposition types. * @@ -261,8 +193,8 @@ int BLAS_dusmm( int EXTBLAS_dusmsv( const enum blas_trans_type transa, const double alpha, const blas_sparse_matrix A, - const EXTBLAS_TYPE( sparse_vector ) x, - EXTBLAS_TYPE( sparse_vector ) y + const extblas_sparse_vector x, + extblas_sparse_vector y ); /** @@ -397,8 +329,6 @@ int EXTBLAS_dusm_clear( blas_sparse_matrix A ); */ int EXTBLAS_free(); -/**@}*/ // ends the SparseBLAS doxygen group - #ifdef __cplusplus } // end extern "C" #endif diff --git a/include/transition/blas_sparse_vec.h b/include/transition/blas_sparse_vec.h index efbb8256a..dd44f0ec2 100644 --- a/include/transition/blas_sparse_vec.h +++ b/include/transition/blas_sparse_vec.h @@ -21,47 +21,17 @@ * This is an ALP-specific extension to the NIST Sparse BLAS standard, which * the ALP libsparseblas transition path also introduces to the de-facto spblas * standard. - * - * @author A. N. Yzelman - * @date 2023 */ #ifndef _H_ALP_SPARSEBLAS_EXT_VEC #define _H_ALP_SPARSEBLAS_EXT_VEC -/** - * \addtogroup SPARSEBLAS - * @{ - */ - -/**@{*/ -/** \internal Helper macros for #EXTBLAS_FUN and #EXTBLAS_TYPE */ -#define __SPBLAS_CONC( _a, _b ) _a ## _b -#define __SPBLAS_CONCAT( _a, _b ) __SPBLAS_CONC( _a, _b ) -#define SPCONCAT( _a, _b ) __SPBLAS_CONCAT( _a, _b ) -/**@}*/ - #ifdef __cplusplus extern "C" { #endif -/**@{*/ -/** - * \internal - * - * Allows renaming our non-standard functions with some other prefix. - * - * The default prefixes are EXTBLAS_ for functions and - * extblas_ for types. - * - * \endinternal - */ -#define EXTBLAS_FUN( name ) SPCONCAT( EXTBLAS_, name ) -#define EXTBLAS_TYPE( name ) SPCONCAT( extblas_, name ) -/**@}*/ - /** A sparse vector. This is an implementation-specific extension. */ -typedef void * EXTBLAS_TYPE( sparse_vector ); +typedef void * extblas_sparse_vector; /** * Creates a handle to a new sparse vector that holds no entries. @@ -72,7 +42,7 @@ typedef void * EXTBLAS_TYPE( sparse_vector ); * * @returns An #extblas_sparse_vector that is under construction. */ -EXTBLAS_TYPE( sparse_vector ) EXTBLAS_FUN( dusv_begin )( const int n ); +extblas_sparse_vector EXTBLAS_dusv_begin( const int n ); /** * Inserts a new nonzero entry into a sparse vector that is under construction. @@ -90,8 +60,8 @@ EXTBLAS_TYPE( sparse_vector ) EXTBLAS_FUN( dusv_begin )( const int n ); * * This is an implementation-specific extension. */ -int EXTBLAS_FUN( dusv_insert_entry )( - EXTBLAS_TYPE( sparse_vector ) x, +int EXTBLAS_dusv_insert_entry( + extblas_sparse_vector x, const double val, const int index ); @@ -108,7 +78,7 @@ int EXTBLAS_FUN( dusv_insert_entry )( * * This is an implementation-specific extension. */ -int EXTBLAS_FUN( dusv_end )( EXTBLAS_TYPE( sparse_vector ) x ); +int EXTBLAS_dusv_end( extblas_sparse_vector x ); /** * Destroys the given sparse vector. @@ -122,7 +92,7 @@ int EXTBLAS_FUN( dusv_end )( EXTBLAS_TYPE( sparse_vector ) x ); * * This is an implementation-specific extension. */ -int EXTBLAS_FUN( dusvds )( EXTBLAS_TYPE( sparse_vector ) x ); +int EXTBLAS_dusvds( extblas_sparse_vector x ); /** * Retrieves the number of nonzeroes in a given finalised sparse vector. @@ -137,7 +107,7 @@ int EXTBLAS_FUN( dusvds )( EXTBLAS_TYPE( sparse_vector ) x ); * * This is an implementation-specific extension. */ -int EXTBLAS_FUN( dusv_nz )( const EXTBLAS_TYPE( sparse_vector ) x, int * nz ); +int EXTBLAS_dusv_nz( const extblas_sparse_vector x, int * nz ); /** * Opens a sparse vector for read-out. @@ -154,7 +124,7 @@ int EXTBLAS_FUN( dusv_nz )( const EXTBLAS_TYPE( sparse_vector ) x, int * nz ); * * This is an implementation-specific extension. */ -int EXTBLAS_FUN( dusv_open )( const EXTBLAS_TYPE( sparse_vector ) x ); +int EXTBLAS_dusv_open( const extblas_sparse_vector x ); /** * Retrieves a sparse vector entry. @@ -184,8 +154,8 @@ int EXTBLAS_FUN( dusv_open )( const EXTBLAS_TYPE( sparse_vector ) x ); * * This is an implementation-specific extension. */ -int EXTBLAS_FUN( dusv_get )( - const EXTBLAS_TYPE( sparse_vector ) x, +int EXTBLAS_dusv_get( + const extblas_sparse_vector x, double * const val, int * const ind ); @@ -200,7 +170,7 @@ int EXTBLAS_FUN( dusv_get )( * * This is an implementation-specific extension. */ -int EXTBLAS_FUN( dusv_close )( const EXTBLAS_TYPE( sparse_vector ) x ); +int EXTBLAS_dusv_close( const extblas_sparse_vector x ); /** * Removes all entries from a finalised sparse vector. @@ -213,9 +183,7 @@ int EXTBLAS_FUN( dusv_close )( const EXTBLAS_TYPE( sparse_vector ) x ); * * This is an implementation-specific extension. */ -int EXTBLAS_FUN( dusv_clear )( EXTBLAS_TYPE( sparse_vector ) x ); - -/**@}*/ // end doxygen grouping for SPARSEBLAS +int EXTBLAS_dusv_clear( extblas_sparse_vector x ); #ifdef __cplusplus } // end extern "C" diff --git a/include/transition/spblas.h b/include/transition/spblas.h new file mode 100644 index 000000000..4bcc41fc9 --- /dev/null +++ b/include/transition/spblas.h @@ -0,0 +1,211 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file + * + * This is the ALP implementation of a subset of the de-facto *_spblas.h Sparse + * BLAS standard. This implementation uses the spblas_ prefix; e.g., + * #spblas_dcsrgemv. + * + * All functions defined have void return types. This implies two + * important factors: + * 1. when breaking the contract defined in the API, undefined behaviour will + * occur. + * 2. this API hence does not permit the graceful handling of any errors that + * ALP would normally recover gracefully from, such as, but not limited to, + * the detection of dimension mismatches. + */ + +#ifndef _H_ALP_SPBLAS +#define _H_ALP_SPBLAS + +#include "blas_sparse_vec.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Performs sparse matrix--vector multiplication. + * + * This function computes one of + * - \f$ y \to Ax \f$, or + * - \f$ y \to A^Tx \f$. + * + * The matrix \f$ A \f$ is \f$ m \times n \f$ and holds \f$ k \f$ nonzeroes, + * and is assumed to be stored in Compressed Row Storage (CRS). + * + * @param[in] transa Either 'N' or 'T' for transposed ('T') or not ('N'). + * @param[in] m The row size of \f$ A \f$. + * @param[in] a The nonzero value array of \f$ A \f$ of size \f$ k \f$. + * @param[in] ia The row offset array of \f$ A \f$ of size \f$ m+1 \f$. + * @param[in] ja The column indices of nonzeroes of \f$ A \f$. Must be of + * size \f$ k \f$. + * @param[in] x The dense input vector \f$ x \f$ of length \f$ n \f$. + * @param[out] y The dense output vector \f$ y \f$ of length \f$ m \f$. + * + * All memory regions must be pre-allocated and initialised. + */ +void spblas_dcsrgemv( + const char * transa, + const int * m, + const double * a, const int * ia, const int * ja, + const double * x, + double * y +); + +/** + * Computes a variant of \f$ C \to \alpha AB+\beta C \f$. + * + * The matrix \f$ A \f$ is sparse and employs the Compressed Row Storage (CRS). + * The matrices \f$ B, C \f$ are dense. \f$ A \f$ has size \f$ m \times k \f$, + * \f$ B \f$ is \f$ k \times n \f$ and \f$ C \f$ is \f$ m \times n \f$. + * + * @param[in] transa Either 'N' or 'T'. + * @param[in] m, n, k Pointers to integers that equal \f$ m, n, k \f$, resp. + * @param[in] alpha Pointer to the scalar \f$ \alpha \f$. + * @param[in] matdescra Has several entries. Going from first to last: + * Either 'G', 'S', 'H', 'T', 'A', or 'D' (similar to MatrixMarket) + * Either 'L' or 'U', in the case of 'T' (triangular) + * Either 'N' or 'U' for the diagonal type + * Either 'F' or 'C' (one or zero based indexing) + * @param[in] val The values of the nonzeroes in \f$ A \f$. + * @param[in] indx The column index of the nonzeroes in \f$ A \f$. + * @param[in] pntrb The Compressed Row Storage (CRS) row start array. + * @param[in] pntre The array \a pntrb shifted by one. + * @param[in] b Pointer to the values of \f$ B \f$. + * @param[in] ldb Leading dimension of \a b. If in row-major format, this + * should be \f$ n \f$. If in column-major format, this + * should be \f$ k \f$. + * @param[in] beta Pointer to the scalar \f$ \beta \f$. + * @param[in] c Pointer to the values of \f$ C \f$. + * @param[in] ldc Leading dimension of \a c. If in row-major format, this + * should be \f$ n \f$. If in column-major format, this + * should be \f$ m \f$. + */ +void spblas_dcsrmm( + const char * transa, + const int * m, const int * n, const int * k, + const double * alpha, + const char * matdescra, const double * val, const int * indx, + const int * pntrb, const int * pntre, + const double * b, const int * ldb, + const double * beta, + double * c, const int * ldc +); + +/** + * Computes \f$ C \to AB \f$ or \f$ C \to A^TB \f$, where all matrices are + * sparse and employ the Compressed Row Storage (CRS). + * + * The matrix \f$ C \f$ is \f$ m \times n \f$, the matrix \f$ A \f$ is + * \f$ m \times k \f$, and the matrix \f$ B \f$ is \f$ k \times n \f$. + * + * @param[in] trans Either 'N' or 'T', indicating whether A is to be transposed. + * The Hermitian operator on \a A is currently not supported; + * if required, please submit a ticket. + * @param[in] request A pointer to an integer that reads either 0, 1, or 2. + * 0: the output memory area has been pre-allocated and is + * guaranteed sufficient for storing the output + * 1: a symbolic phase will be executed that only modifies + * the row offset array \a ic. This array must have been + * pre-allocated and of sufficient size (\f$ m+1 \f$). + * 2: assumes 1 has executed prior to this call and that the + * contents of the row offset arrays have not been + * modified. It also assumes that the column index and + * value arrays are (now) of sufficient size to hold the + * output. + * @param[in] sort A pointer to an integer value of 7. All other values are not + * supported by this interface. If you require it, please submit + * a ticket. + * @param[in] m,n,k Pointers to the integer sizes of \a A, \a B, and \a C. + * @param[in] a The value array of nonzeroes in \a A. + * @param[in] ja The column index array of nonzeroes in \a A. + * @param[in] ia The row offset array of nonzeroes in \a A. + * @param[in] b, ib, jb Similar for the nonzeroes in \a B. + * @param[out] c, ic, jc Similar for the nonzeroes in \a C. For these parameters + * depending on \a request there are various assumptions + * on capacity and, for \a ic, contents. + * @param[in] nzmax A pointer to an integer that holds the capacity of \a c and + * \a jc. + * @param[out] info The integer pointed to will be set to 0 if the call was + * successful, -1 if the routine only computed the required + * size of \a c and \a jc (stored in \a ic), and any positive + * integer when computation has proceeded successfully until + * (but not including) the returned integer. + */ +void spblas_dcsrmultcsr( + const char * trans, const int * request, const int * sort, + const int * m, const int * n, const int * k, + double * a, int * ja, int * ia, + double * b, int * jb, int * ib, + double * c, int * jc, int * ic, + const int * nzmax, int * info +); + +/** + * Performs sparse matrix--sparse vector multiplication. + * + * This extension performs one of + * -# \f$ y \to y + \alpha A x \f$, or + * -# \f$ y \to y + \alpha A^T x \f$. + * + * Here, \f$ A \f$ is assumed in Compressed Row Storage (CRS), while \f$ x \f$ + * and \f$ y \f$ are assumed to be using the #extblas_sparse_vector extension. + * + * This API follows loosely that of #spblas_dcsrmultcsr. + * + * @param[in] trans Either 'N' or 'T', indicating whether A is to be transposed. + * The Hermitian operator on \a A is currently not supported; + * if required, please submit a ticket. + * @param[in] request A pointer to an integer that reads either 0 or 1 + * 0: the output vector is guaranteed to have sufficient + * capacity to hold the output of the computation. + * 1: a symbolic phase will be executed that only modifies + * the capacity of the output vector so that it is + * guaranteed to be able to hold the output of the + * requested computation. + * @param[in] m, n Pointers to integers equal to \f$ m, n \f$. + * @param[in] a The value array of the nonzeroes in \f$ A \f$. + * @param[in] ja The column indices of the nonzeroes in \f$ A \f$. + * @param[in] ia The row offset arrays of the nonzeroes in \f$ A \f$. + * @param[in] x The sparse input vector. + * @param[out] y The sparse output vector. + * + * This is an ALP implementation-specific extension. + */ +void extspblas_dcsrmultsv( + const char * trans, const int * request, + const int * m, const int * n, + const double * a, const int * ja, const int * ia, + const extblas_sparse_vector x, + extblas_sparse_vector y +); + +/** + * An extension that frees any buffers the ALP/GraphBLAS-generated SparseBLAS + * library may have allocated. + */ +void extspblas_free(); + +#ifdef __cplusplus +} // end extern "C" +#endif + +#endif // end _H_ALP_SPBLAS + diff --git a/src/transition/CMakeLists.txt b/src/transition/CMakeLists.txt index 5c6abcc72..daf7252e0 100644 --- a/src/transition/CMakeLists.txt +++ b/src/transition/CMakeLists.txt @@ -18,79 +18,48 @@ # This file creates the basic target(s) needed by all backends # -assert_defined_variables( WITH_REFERENCE_BACKEND WITH_OMP_BACKEND WITH_NONBLOCKING_BACKEND ) +assert_defined_variables( WITH_REFERENCE_BACKEND WITH_OMP_BACKEND ) -function( add_transition_library target_name lib_type lib_name src1 ) - - set( multiValueArgs - "SOURCES" - "PUBLIC_LINK_LIBRARIES" - "PRIVATE_LINK_LIBRARIES" +if( WITH_REFERENCE_BACKEND ) + add_library( sparseblas_static STATIC + ${CMAKE_CURRENT_SOURCE_DIR}/sparseblas.cpp ) - cmake_parse_arguments( parsed "" "" "${multiValueArgs}" "SOURCES;${src1};${ARGN}" ) - add_library( ${target_name} ${lib_type} ${parsed_SOURCES} ) - set_target_properties( ${target_name} PROPERTIES - OUTPUT_NAME ${lib_name} + set_target_properties( sparseblas_static PROPERTIES + OUTPUT_NAME "sparseblas" ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/shmem" ) - target_link_libraries( ${target_name} PUBLIC transition_headers ${parsed_PUBLIC_LINK_LIBRARIES} ) - target_link_libraries( ${target_name} PRIVATE backend_flags ${parsed_PRIVATE_LINK_LIBRARIES} ) - add_dependencies( libs ${target_name} ) - install( TARGETS ${target_name} EXPORT GraphBLASTargets - ARCHIVE DESTINATION "${SHMEM_BACKEND_INSTALL_DIR}" - ) + target_link_libraries( sparseblas_static PUBLIC backend_reference transition ) -endfunction( add_transition_library ) + target_link_libraries( sparseblas_static PRIVATE backend_flags ) -if( WITH_REFERENCE_BACKEND ) - add_transition_library( sparseblas_sequential_static STATIC "sparseblas_sequential" ${CMAKE_CURRENT_SOURCE_DIR}/sparseblas.cpp - PUBLIC_LINK_LIBRARIES backend_reference - ) + add_dependencies( libs sparseblas_static ) - # this is the version for sequantial execution only - add_transition_library( ${_SPBLAS_PREFIX}sequential STATIC "${_SPBLAS_PREFIX}sequential" - ${CMAKE_CURRENT_SOURCE_DIR}/spblas.cpp PUBLIC_LINK_LIBRARIES backend_reference + install( TARGETS sparseblas_static + EXPORT GraphBLASTargets + ARCHIVE DESTINATION "${SHMEM_BACKEND_INSTALL_DIR}" ) - target_compile_definitions( ${_SPBLAS_PREFIX}sequential PUBLIC SPBLAS_PREFIX=${_SPBLAS_PREFIX} ) - - if( ENABLE_SOLVER_LIB ) - add_transition_library( spsolver_sequential STATIC "spsolver_sequential" ${CMAKE_CURRENT_SOURCE_DIR}/solver.cpp - PRIVATE_LINK_LIBRARIES backend_reference - ) - endif() -endif() +endif( WITH_REFERENCE_BACKEND ) if( WITH_OMP_BACKEND ) - add_transition_library( sparseblas_shmem_parallel_static STATIC "sparseblas_shmem_parallel" ${CMAKE_CURRENT_SOURCE_DIR}/sparseblas.cpp - PRIVATE_LINK_LIBRARIES backend_reference_omp + add_library( sparseblas_omp_static STATIC + ${CMAKE_CURRENT_SOURCE_DIR}/sparseblas.cpp ) - # this is the "default" version (parallel) - add_transition_library( ${_SPBLAS_PREFIX}shmem_parallel STATIC "${_SPBLAS_PREFIX}shmem_parallel" - ${CMAKE_CURRENT_SOURCE_DIR}/spblas.cpp PRIVATE_LINK_LIBRARIES backend_reference_omp + set_target_properties( sparseblas_omp_static PROPERTIES + OUTPUT_NAME "sparseblas_omp" + ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/shmem" ) - target_compile_definitions( ${_SPBLAS_PREFIX}shmem_parallel PUBLIC SPBLAS_PREFIX=${_SPBLAS_PREFIX} ) - if( ENABLE_EXTRA_SOLVER_LIBS ) - add_transition_library( spsolver_shmem_blocking STATIC "spsolver_shmem_blocking" ${CMAKE_CURRENT_SOURCE_DIR}/solver.cpp - PRIVATE_LINK_LIBRARIES backend_reference_omp - ) - endif() -endif() + target_link_libraries( sparseblas_omp_static PUBLIC backend_reference_omp transition ) -if( WITH_NONBLOCKING_BACKEND ) - if( ENABLE_SOLVER_LIB ) - add_transition_library( spsolver_shmem_parallel STATIC "spsolver_shmem_parallel" ${CMAKE_CURRENT_SOURCE_DIR}/solver.cpp - PRIVATE_LINK_LIBRARIES backend_nonblocking - ) + target_link_libraries( sparseblas_omp_static PRIVATE backend_flags ) - # same binary name of KML library - # https://www.hikunpeng.com/document/detail/en/kunpengaccel/math-lib/devg-kml/kunpengaccel_kml_16_0011.html - add_transition_library( ksolver STATIC "ksolver" ${CMAKE_CURRENT_SOURCE_DIR}/kml_iss.cpp - PRIVATE_LINK_LIBRARIES spsolver_nonblocking_static - ) - endif() -endif() + add_dependencies( libs sparseblas_omp_static ) + install( TARGETS sparseblas_omp_static + EXPORT GraphBLASTargets + ARCHIVE DESTINATION "${SHMEM_BACKEND_INSTALL_DIR}" + ) +endif() diff --git a/src/transition/sparseblas.cpp b/src/transition/sparseblas.cpp index 6d3471258..60fb1f055 100644 --- a/src/transition/sparseblas.cpp +++ b/src/transition/sparseblas.cpp @@ -15,17 +15,7 @@ * limitations under the License. */ -/** - * @file - * - * Implements the Sparse BLAS standard using ALP/GraphBLAS. - * - * @author A. N. Yzelman - * @date 2023 - */ - #include "blas_sparse.h" -#include "sparse_vector_impl.hpp" #include #include @@ -252,6 +242,76 @@ namespace sparseblas { }; + /** + * \internal A sparse vector that is either under construction, or finalized as + * an ALP/GraphBLAS vector. + */ + template< typename T > + class SparseVector { + + public: + + int n; + bool finalized; + grb::Vector< T > * vector; + typename grb::Vector< T >::const_iterator start, end; + + private: + + std::vector< T > uc_vals; + std::vector< int > uc_inds; + + public: + + SparseVector( const int &_n ) : + n( _n ), finalized( false ), vector( nullptr ) + {} + + ~SparseVector() { + if( finalized ) { + assert( vector != nullptr ); + delete vector; + } else { + assert( vector == nullptr ); + } + } + + void add( const T &val, const int &index ) { + assert( !finalized ); + uc_vals.push_back( val ); + uc_inds.push_back( index ); + } + + void finalize() { + assert( uc_vals.size() == uc_inds.size() ); + const size_t nz = uc_vals.size(); + vector = new grb::Vector< T >( n, nz ); + if( vector == nullptr ) { + std::cerr << "Could not create ALP/GraphBLAS vector of size " << n + << " and capacity " << nz << "\n"; + throw std::runtime_error( "Could not create ALP/GraphBLAS vector" ); + } + if( grb::capacity( *vector ) < nz ) { + throw std::runtime_error( "ALP/GraphBLAS vector has insufficient " + "capacity" ); + } + const grb::RC rc = grb::buildVector( + *vector, + uc_inds.cbegin(), uc_inds.cend(), + uc_vals.cbegin(), uc_vals.cend(), + grb::SEQUENTIAL + ); + if( rc != grb::SUCCESS ) { + throw std::runtime_error( "Could not ingest nonzeroes into ALP/GraphBLAS " + "vector" ); + } + uc_vals.clear(); + uc_inds.clear(); + finalized = true; + } + + }; + /** * \internal SparseBLAS allows a matrix to be under construction or finalized. * This class matches that concept -- for non-finalized matrices, it @@ -331,20 +391,95 @@ namespace sparseblas { * \internal Utility function that converts a #extblas_sparse_vector to a * sparseblas::SparseVector. This is for vectors of doubles. */ - static native::SparseVector< double > * getDoubleVector( - EXTBLAS_TYPE( sparse_vector ) x - ) { - return static_cast< native::SparseVector< double >* >( x ); + SparseVector< double > * getDoubleVector( extblas_sparse_vector x ) { + return static_cast< SparseVector< double >* >( x ); } /** * \internal Utility function that converts a #blas_sparse_matrix to a * sparseblas::SparseMatrix. This is for matrices of doubles. */ - static SparseMatrix< double > * getDoubleMatrix( blas_sparse_matrix A ) { + SparseMatrix< double > * getDoubleMatrix( blas_sparse_matrix A ) { return static_cast< SparseMatrix< double >* >( A ); } + /** + * \internal Internal buffer used for output matrix containers. + */ + char * buffer = nullptr; + + /** + * \internal The size of #buffer. + */ + size_t buffer_size = 0; + + /** + * @returns false if and only if buffer allocation failed. + * @returns true on success. + */ + template< typename T > + bool getBuffer( + char * &bitmask, char * &stack, T * &valbuf, + const size_t size + ) { + typedef typename grb::internal::Coordinates< grb::config::default_backend > + Coors; + constexpr const size_t b = grb::config::CACHE_LINE_SIZE::value(); + + // catch trivial case + if( size == 0 ) { + bitmask = stack = nullptr; + valbuf = nullptr; + return true; + } + + // compute required size + size_t reqSize = Coors::arraySize( size ) + Coors::stackSize( size ) + + (size * sizeof(T)) + 3 * b; + + // ensure buffer is at least the required size + if( buffer == nullptr ) { + assert( buffer_size == 0 ); + buffer_size = reqSize; + buffer = static_cast< char * >( malloc( buffer_size ) ); + if( buffer == nullptr ) { + buffer_size = 0; + return false; + } + } else if( buffer_size < reqSize ) { + free( buffer ); + buffer_size = std::max( reqSize, 2 * buffer_size ); + buffer = static_cast< char * >( malloc( buffer_size ) ); + if( buffer == nullptr ) { + buffer_size = 0; + return false; + } + } + + // set buffers and make sure they are aligned + char * walk = buffer; + uintptr_t cur_mod = reinterpret_cast< uintptr_t >(walk) % b; + if( cur_mod > 0 ) { + walk += (b - cur_mod); + } + bitmask = walk; + walk += Coors::arraySize( size ); + cur_mod = reinterpret_cast< uintptr_t >(walk) % b; + if( cur_mod > 0 ) { + walk += (b - cur_mod); + } + stack = walk; + walk += Coors::stackSize( size ); + cur_mod = reinterpret_cast< uintptr_t >(walk) % b; + if( cur_mod > 0 ) { + walk += (b - cur_mod); + } + valbuf = reinterpret_cast< T * >( walk); + + // done + return true; + } + } // end namespace sparseblas namespace std { @@ -374,536 +509,823 @@ namespace std { // implementation of the SparseBLAS API follows -EXTBLAS_TYPE( sparse_vector ) EXTBLAS_FUN( dusv_begin )( const int n ) { - return new native::SparseVector< double >( n ); -} - -int EXTBLAS_FUN( dusv_insert_entry )( - EXTBLAS_TYPE( sparse_vector ) x, - const double val, - const int index -) { - auto vector = sparseblas::getDoubleVector( x ); - assert( !(vector->finalized) ); - try { - vector->add( val, index ); - } catch( ... ) { - return 20; - } - return 0; -} - -int EXTBLAS_FUN( dusv_end )( EXTBLAS_TYPE( sparse_vector ) x ) { - auto vector = sparseblas::getDoubleVector( x ); - assert( !(vector->finalized) ); - try { - vector->finalize(); - } catch( ... ) { - return 30; - } - return 0; -} - -int EXTBLAS_FUN( dusvds )( EXTBLAS_TYPE( sparse_vector ) x ) { - auto vector = sparseblas::getDoubleVector( x ); - delete vector; - return 0; -} - -int EXTBLAS_FUN( dusv_nz )( - const EXTBLAS_TYPE( sparse_vector ) x, - int * const nz -) { - auto vector = sparseblas::getDoubleVector( x ); - assert( vector->finalized ); - const size_t nnz = grb::nnz( *(vector->vector) ); - if( nnz > static_cast< size_t >( std::numeric_limits< int >::max() ) ) { - std::cerr << "Number of nonzeroes is larger than what can be represented by " - << "a SparseBLAS int!\n"; - return 10; - } - *nz = static_cast< int >(nnz); - return 0; -} - -int EXTBLAS_FUN( dusv_clear )( EXTBLAS_TYPE( sparse_vector ) x ) { - auto vector = sparseblas::getDoubleVector( x ); - assert( vector->finalized ); - const grb::RC rc = grb::clear( *(vector->vector) ); - if( rc != grb::SUCCESS ) { - return 10; - } - return 0; -} - -int EXTBLAS_FUN( dusv_open )( const EXTBLAS_TYPE( sparse_vector ) x ) { - auto vector = sparseblas::getDoubleVector( x ); - assert( vector->finalized ); - try { - vector->start = vector->vector->cbegin(); - vector->end = vector->vector->cend(); - } catch( ... ) { - return 10; - } - return 0; -} - -int EXTBLAS_FUN( dusv_get )( - const EXTBLAS_TYPE( sparse_vector ) x, - double * const val, int * const ind -) { - auto vector = sparseblas::getDoubleVector( x ); - assert( vector->finalized ); - assert( vector->start != vector->end ); - assert( val != nullptr ); - assert( ind != nullptr ); - *val = vector->start->second; - *ind = vector->start->first; - try { - (void) ++(vector->start); - } catch( ... ) { - return 2; - } - if( vector->start == vector->end ) { +extern "C" { + + extblas_sparse_vector EXTBLAS_dusv_begin( const int n ) { + return new sparseblas::SparseVector< double >( n ); + } + + int EXTBLAS_dusv_insert_entry( + extblas_sparse_vector x, + const double val, + const int index + ) { + auto vector = sparseblas::getDoubleVector( x ); + assert( !(vector->finalized) ); + try { + vector->add( val, index ); + } catch( ... ) { + return 20; + } return 0; - } else { - return 1; - } -} - -int EXTBLAS_FUN( dusv_close )( const EXTBLAS_TYPE( sparse_vector ) x ) { - auto vector = sparseblas::getDoubleVector( x ); - assert( vector->finalized ); - vector->start = vector->end; - return 0; -} - -blas_sparse_matrix BLAS_duscr_begin( const int m, const int n ) { - return new sparseblas::SparseMatrix< double >( m, n ); -} - -int BLAS_duscr_insert_entry( - blas_sparse_matrix A, - const double val, const int row, const int col -) { - auto matrix = sparseblas::getDoubleMatrix( A ); - assert( matrix->finalized == false ); - assert( matrix->ingest != nullptr ); - try { - matrix->ingest->add( val, row, col ); - } catch( ... ) { - return 2; - } - return 0; -} - -int BLAS_duscr_insert_entries( - blas_sparse_matrix A, - const int nnz, - const double * vals, const int * rows, const int * cols -) { - auto matrix = sparseblas::getDoubleMatrix( A ); - assert( matrix->finalized == false ); - assert( matrix->ingest != nullptr ); - try { - for( int k = 0; k < nnz; ++k ) { - matrix->ingest->add( vals[ k ], rows[ k ], cols[ k ] ); - } - } catch( ... ) { - return 3; - } - return 0; -} - -int BLAS_duscr_insert_col( - blas_sparse_matrix A, - const int j, const int nnz, - const double * vals, const int * rows -) { - auto matrix = sparseblas::getDoubleMatrix( A ); - assert( matrix->finalized == false ); - assert( matrix->ingest != nullptr ); - try { - for( int k = 0; k < nnz; ++k ) { - matrix->ingest->add( vals[ k ], rows[ k ], j ); - } - } catch( ... ) { - return 4; - } - return 0; -} - -int BLAS_duscr_insert_row( - blas_sparse_matrix A, - const int i, const int nnz, - const double * vals, const int * cols -) { - auto matrix = sparseblas::getDoubleMatrix( A ); - assert( matrix->finalized == false ); - assert( matrix->ingest != nullptr ); - try { - for( int k = 0; k < nnz; ++k ) { - matrix->ingest->add( vals[ k ], i, cols[ k ] ); - } - } catch( ... ) { - return 5; - } - return 0; -} - -int BLAS_duscr_end( blas_sparse_matrix A ) { - auto matrix = sparseblas::getDoubleMatrix( A ); - assert( matrix->finalized == false ); - assert( matrix->ingest != nullptr ); - try { - matrix->finalize(); - } catch( const std::runtime_error &e ) { - std::cerr << "Caught error: " << e.what() << "\n"; - return 1; - } - return 0; -} - -int EXTBLAS_dusm_clear( blas_sparse_matrix A ) { - auto matrix = sparseblas::getDoubleMatrix( A ); - assert( matrix->finalized ); - const grb::RC rc = grb::clear( *(matrix->A) ); - if( rc != grb::SUCCESS ) { - return 10; - } - return 0; -} - -int BLAS_usds( blas_sparse_matrix A ) { - delete sparseblas::getDoubleMatrix( A ); - return 0; -} - -int BLAS_dusmv( - const enum blas_trans_type transa, - const double alpha, const blas_sparse_matrix A, - const double * x, int incx, - double * const y, const int incy -) { - grb::Semiring< - grb::operators::add< double >, grb::operators::mul< double >, - grb::identities::zero, grb::identities::one - > ring; - auto matrix = sparseblas::getDoubleMatrix( A ); - if( alpha != 1.0 ) { - grb::Vector< double > output = grb::internal::template - wrapRawVector< double >( matrix->m, y ); - const grb::RC rc = grb::foldl< grb::descriptors::dense >( - output, 1.0 / alpha, ring.getMultiplicativeOperator() ); - if( rc != grb::SUCCESS ) { - std::cerr << "Error during pre-scaling during SpMV\n"; - return 50; + } + + int EXTBLAS_dusv_end( extblas_sparse_vector x ) { + auto vector = sparseblas::getDoubleVector( x ); + assert( !(vector->finalized) ); + try { + vector->finalize(); + } catch( ... ) { + return 30; } + return 0; } - if( incx != 1 || incy != 1 ) { - // TODO: requires ALP views - std::cerr << "Strided input and/or output vectors are not supported.\n"; - return 255; + + int EXTBLAS_dusvds( extblas_sparse_vector x ) { + auto vector = sparseblas::getDoubleVector( x ); + delete vector; + return 0; } - if( !(matrix->finalized) ) { - std::cerr << "Input matrix was not yet finalised; see BLAS_duscr_end.\n"; - return 100; + + int EXTBLAS_dusv_nz( const extblas_sparse_vector x, int * const nz ) { + auto vector = sparseblas::getDoubleVector( x ); + assert( vector->finalized ); + const size_t nnz = grb::nnz( *(vector->vector) ); + if( nnz > static_cast< size_t >( std::numeric_limits< int >::max() ) ) { + std::cerr << "Number of nonzeroes is larger than what can be represented by " + << "a SparseBLAS int!\n"; + return 10; + } + *nz = static_cast< int >(nnz); + return 0; } - assert( matrix->finalized ); - if( transa == blas_no_trans ) { - const grb::Vector< double > input = grb::internal::template - wrapRawVector< double >( matrix->n, x ); - grb::Vector< double > output = grb::internal::template - wrapRawVector< double >( matrix->m, y ); - const grb::RC rc = grb::mxv< grb::descriptors::dense >( - output, *(matrix->A), input, ring - ); + + int EXTBLAS_dusv_clear( extblas_sparse_vector x ) { + auto vector = sparseblas::getDoubleVector( x ); + assert( vector->finalized ); + const grb::RC rc = grb::clear( *(vector->vector) ); if( rc != grb::SUCCESS ) { - std::cerr << "ALP/GraphBLAS returns error during SpMV: " - << grb::toString( rc ) << ".\n"; - return 200; + return 10; } - } else { - const grb::Vector< double > input = grb::internal::template - wrapRawVector< double >( matrix->m, x ); - grb::Vector< double > output = grb::internal::template - wrapRawVector< double >( matrix->n, y ); - const grb::RC rc = grb::mxv< - grb::descriptors::dense | - grb::descriptors::transpose_matrix - >( - output, *(matrix->A), input, ring - ); + return 0; + } + + int EXTBLAS_dusv_open( const extblas_sparse_vector x ) { + auto vector = sparseblas::getDoubleVector( x ); + assert( vector->finalized ); + try { + vector->start = vector->vector->cbegin(); + vector->end = vector->vector->cend(); + } catch( ... ) { + return 10; + } + return 0; + } + + int EXTBLAS_dusv_get( + const extblas_sparse_vector x, + double * const val, int * const ind + ) { + auto vector = sparseblas::getDoubleVector( x ); + assert( vector->finalized ); + assert( vector->start != vector->end ); + assert( val != nullptr ); + assert( ind != nullptr ); + *val = vector->start->second; + *ind = vector->start->first; + try { + (void) ++(vector->start); + } catch( ... ) { + return 2; + } + if( vector->start == vector->end ) { + return 0; + } else { + return 1; + } + } + + int EXTBLAS_dusv_close( const extblas_sparse_vector x ) { + auto vector = sparseblas::getDoubleVector( x ); + assert( vector->finalized ); + vector->start = vector->end; + return 0; + } + + blas_sparse_matrix BLAS_duscr_begin( const int m, const int n ) { + return new sparseblas::SparseMatrix< double >( m, n ); + } + + int BLAS_duscr_insert_entry( + blas_sparse_matrix A, + const double val, const int row, const int col + ) { + auto matrix = sparseblas::getDoubleMatrix( A ); + assert( matrix->finalized == false ); + assert( matrix->ingest != nullptr ); + try { + matrix->ingest->add( val, row, col ); + } catch( ... ) { + return 2; + } + return 0; + } + + int BLAS_duscr_insert_entries( + blas_sparse_matrix A, + const int nnz, + const double * vals, const int * rows, const int * cols + ) { + auto matrix = sparseblas::getDoubleMatrix( A ); + assert( matrix->finalized == false ); + assert( matrix->ingest != nullptr ); + try { + for( int k = 0; k < nnz; ++k ) { + matrix->ingest->add( vals[ k ], rows[ k ], cols[ k ] ); + } + } catch( ... ) { + return 3; + } + return 0; + } + + int BLAS_duscr_insert_col( + blas_sparse_matrix A, + const int j, const int nnz, + const double * vals, const int * rows + ) { + auto matrix = sparseblas::getDoubleMatrix( A ); + assert( matrix->finalized == false ); + assert( matrix->ingest != nullptr ); + try { + for( int k = 0; k < nnz; ++k ) { + matrix->ingest->add( vals[ k ], rows[ k ], j ); + } + } catch( ... ) { + return 4; + } + return 0; + } + + int BLAS_duscr_insert_row( + blas_sparse_matrix A, + const int i, const int nnz, + const double * vals, const int * cols + ) { + auto matrix = sparseblas::getDoubleMatrix( A ); + assert( matrix->finalized == false ); + assert( matrix->ingest != nullptr ); + try { + for( int k = 0; k < nnz; ++k ) { + matrix->ingest->add( vals[ k ], i, cols[ k ] ); + } + } catch( ... ) { + return 5; + } + return 0; + } + + int BLAS_duscr_end( blas_sparse_matrix A ) { + auto matrix = sparseblas::getDoubleMatrix( A ); + assert( matrix->finalized == false ); + assert( matrix->ingest != nullptr ); + try { + matrix->finalize(); + } catch( const std::runtime_error &e ) { + std::cerr << "Caught error: " << e.what() << "\n"; + return 1; + } + return 0; + } + + int EXTBLAS_dusm_clear( blas_sparse_matrix A ) { + auto matrix = sparseblas::getDoubleMatrix( A ); + assert( matrix->finalized ); + const grb::RC rc = grb::clear( *(matrix->A) ); if( rc != grb::SUCCESS ) { - std::cerr << "ALP/GraphBLAS returns error during transposed SpMV: " - << grb::toString( rc ) << ".\n"; - return 200; + return 10; } + return 0; } - if( alpha != 1.0 ) { - grb::Vector< double > output = grb::internal::template - wrapRawVector< double >( matrix->m, y ); - const grb::RC rc = grb::foldl< grb::descriptors::dense >( - output, alpha, ring.getMultiplicativeOperator() ); + + int BLAS_usds( blas_sparse_matrix A ) { + delete sparseblas::getDoubleMatrix( A ); + return 0; + } + + int BLAS_dusmv( + const enum blas_trans_type transa, + const double alpha, const blas_sparse_matrix A, + const double * x, int incx, + double * const y, const int incy + ) { + grb::Semiring< + grb::operators::add< double >, grb::operators::mul< double >, + grb::identities::zero, grb::identities::one + > ring; + auto matrix = sparseblas::getDoubleMatrix( A ); + if( alpha != 1.0 ) { + grb::Vector< double > output = grb::internal::template + wrapRawVector< double >( matrix->m, y ); + const grb::RC rc = grb::foldl< grb::descriptors::dense >( + output, 1.0 / alpha, ring.getMultiplicativeOperator() ); + if( rc != grb::SUCCESS ) { + std::cerr << "Error during pre-scaling during SpMV\n"; + return 50; + } + } + if( incx != 1 || incy != 1 ) { + // TODO: requires ALP views + std::cerr << "Strided input and/or output vectors are not supported.\n"; + return 255; + } + if( !(matrix->finalized) ) { + std::cerr << "Input matrix was not yet finalised; see BLAS_duscr_end.\n"; + return 100; + } + assert( matrix->finalized ); + if( transa == blas_no_trans ) { + const grb::Vector< double > input = grb::internal::template + wrapRawVector< double >( matrix->n, x ); + grb::Vector< double > output = grb::internal::template + wrapRawVector< double >( matrix->m, y ); + const grb::RC rc = grb::mxv< grb::descriptors::dense >( + output, *(matrix->A), input, ring + ); + if( rc != grb::SUCCESS ) { + std::cerr << "ALP/GraphBLAS returns error during SpMV: " + << grb::toString( rc ) << ".\n"; + return 200; + } + } else { + const grb::Vector< double > input = grb::internal::template + wrapRawVector< double >( matrix->m, x ); + grb::Vector< double > output = grb::internal::template + wrapRawVector< double >( matrix->n, y ); + const grb::RC rc = grb::mxv< + grb::descriptors::dense | + grb::descriptors::transpose_matrix + >( + output, *(matrix->A), input, ring + ); + if( rc != grb::SUCCESS ) { + std::cerr << "ALP/GraphBLAS returns error during transposed SpMV: " + << grb::toString( rc ) << ".\n"; + return 200; + } + } + if( alpha != 1.0 ) { + grb::Vector< double > output = grb::internal::template + wrapRawVector< double >( matrix->m, y ); + const grb::RC rc = grb::foldl< grb::descriptors::dense >( + output, alpha, ring.getMultiplicativeOperator() ); + if( rc != grb::SUCCESS ) { + std::cerr << "Error during post-scaling during SpMV\n"; + return 250; + } + } + return 0; + } + + void spblas_dcsrgemv( + const char * transa, + const int * m_p, + const double * a, const int * ia, const int * ja, + const double * x, + double * y + ) { + // declare algebraic structures + grb::Semiring< + grb::operators::add< double >, grb::operators::mul< double >, + grb::identities::zero, grb::identities::one + > ring; + grb::Monoid< + grb::operators::max< int >, grb::identities::negative_infinity + > maxMonoid; + + // declare minimum necessary descriptors + constexpr grb::Descriptor minDescr = grb::descriptors::dense | + grb::descriptors::force_row_major; + + // determine matrix size + const int m = *m_p; + const grb::Vector< int > columnIndices = + grb::internal::template wrapRawVector< int >( ia[ m ], ja ); + int n = 0; + grb::RC rc = foldl( n, columnIndices, maxMonoid ); if( rc != grb::SUCCESS ) { - std::cerr << "Error during post-scaling during SpMV\n"; - return 250; - } - } - return 0; -} - -int BLAS_dusmm( - const enum blas_order_type order, - const enum blas_trans_type transa, - const int nrhs, - const double alpha, const blas_sparse_matrix A, - const double * B, const int ldb, - const double * C, const int ldc -) { - (void) order; - (void) transa; - (void) nrhs; - (void) alpha; - (void) A; - (void) B; - (void) ldb; - (void) C; - (void) ldc; - // TODO requires dense ALP and mixed sparse/dense ALP operations - std::cerr << "BLAS_dusmm (sparse matrix times dense matrix) has not yet " - << "been implemented.\n"; - assert( false ); - return 255; -} - -int EXTBLAS_dusmsv( - const enum blas_trans_type transa, - const double alpha, const blas_sparse_matrix A, - const EXTBLAS_TYPE( sparse_vector ) x, - EXTBLAS_TYPE( sparse_vector ) y -) { - grb::Semiring< - grb::operators::add< double >, grb::operators::mul< double >, - grb::identities::zero, grb::identities::one - > ring; - auto matrix = sparseblas::getDoubleMatrix( A ); - auto input = sparseblas::getDoubleVector( x ); - auto output = sparseblas::getDoubleVector( y ); - if( !(matrix->finalized) ) { - std::cerr << "Uninitialised input matrix during SpMSpV\n"; - return 10; - } - if( !(input->finalized) ) { - std::cerr << "Uninitialised input vector during SpMSpV\n"; - return 20; - } - if( !(output->finalized) ) { - std::cerr << "Uninitialised output vector during SpMSpV\n"; - return 30; - } - grb::RC rc = grb::SUCCESS; - if( alpha != 1.0 ) { - rc = grb::foldl( *(output->vector), 1.0 / alpha, - ring.getMultiplicativeOperator() ); + std::cerr << "Could not determine matrix column size\n"; + assert( false ); + return; + } + + // retrieve buffers (only when A needs to be output also) + //char * const bitmask = sparseblas::getBitmask( n ); + //char * const stack = sparseblas::getStack( n ); + //double * const buffer = sparseblas::template getBuffer< double >( n ); + + // retrieve necessary ALP/GraphBLAS container wrappers + const grb::Matrix< double, grb::config::default_backend, int, int, int > A = + grb::internal::wrapCRSMatrix( a, ja, ia, m, n ); + const grb::Vector< double > input = grb::internal::template + wrapRawVector< double >( n, x ); + grb::Vector< double > output = grb::internal::template + wrapRawVector< double >( m, y ); + + // set output vector to zero + rc = grb::set( output, ring.template getZero< double >() ); if( rc != grb::SUCCESS ) { - std::cerr << "Error during pre-scaling of SpMSpV\n"; - return 40; + std::cerr << "Could not set output vector to zero\n"; + assert( false ); + return; } + + // do either y=Ax or y=A^Tx + if( transa[0] == 'N' ) { + rc = grb::mxv< minDescr >( + output, A, input, ring + ); + if( rc != grb::SUCCESS ) { + std::cerr << "ALP/GraphBLAS returns error during SpMV: " + << grb::toString( rc ) << ".\n"; + assert( false ); + return; + } + } else { + // Hermitian is not supported + assert( transa[0] == 'T' ); + rc = grb::mxv< + minDescr | + grb::descriptors::transpose_matrix + >( + output, A, input, ring + ); + if( rc != grb::SUCCESS ) { + std::cerr << "ALP/GraphBLAS returns error during transposed SpMV: " + << grb::toString( rc ) << ".\n"; + assert( false ); + return; + } + } + + // done } - if( transa == blas_no_trans ) { - rc = grb::mxv( *(output->vector), *(matrix->A), *(input->vector), ring ); - } else { - rc = grb::mxv< grb::descriptors::transpose_matrix >( - *(output->vector), *(matrix->A), *(input->vector), ring ); + + int BLAS_dusmm( + const enum blas_order_type order, + const enum blas_trans_type transa, + const int nrhs, + const double alpha, const blas_sparse_matrix A, + const double * B, const int ldb, + const double * C, const int ldc + ) { + (void) order; + (void) transa; + (void) nrhs; + (void) alpha; + (void) A; + (void) B; + (void) ldb; + (void) C; + (void) ldc; + // TODO requires dense ALP and mixed sparse/dense ALP operations + std::cerr << "BLAS_dusmm (sparse matrix times dense matrix) has not yet " + << "been implemented.\n"; + assert( false ); + return 255; } - if( rc != grb::SUCCESS ) { - std::cerr << "Error during call to grb::mxv (SpMSpV)\n"; - return 50; + + void spblas_dcsrmm( + const char * const transa, + const int * m, const int * n, const int * k, + const double * alpha, + const char * matdescra, const double * val, const int * indx, + const int * pntrb, const int * pntre, + const double * b, const int * ldb, + const double * beta, + double * c, const int * ldc + ) { + assert( transa[0] == 'N' || transa[0] == 'T' ); + assert( m != NULL ); + assert( n != NULL ); + assert( k != NULL ); + assert( alpha != NULL ); + // not sure yet what constraints if any on matdescra + if( *m > 0 && *k > 0 ) { + assert( pntrb != NULL ); + assert( pntre != NULL ); + } + // val and indx could potentially be NULL if there are no nonzeroes + assert( b != NULL ); + assert( ldb != NULL ); + assert( beta != NULL ); + assert( c != NULL ); + assert( ldc != NULL ); + (void) transa; + (void) m; (void) n; (void) k; + (void) alpha; + (void) matdescra; (void) val; (void) indx; (void) pntrb; (void) pntre; + (void) b; (void) ldb; + (void) beta; + (void) c; (void) ldc; + // requires dense ALP and mixed sparse/dense operations + assert( false ); } - if( alpha != 1.0 ) { - rc = grb::foldl( *(output->vector), alpha, - ring.getMultiplicativeOperator() ); + + int EXTBLAS_dusmsv( + const enum blas_trans_type transa, + const double alpha, const blas_sparse_matrix A, + const extblas_sparse_vector x, + extblas_sparse_vector y + ) { + grb::Semiring< + grb::operators::add< double >, grb::operators::mul< double >, + grb::identities::zero, grb::identities::one + > ring; + auto matrix = sparseblas::getDoubleMatrix( A ); + auto input = sparseblas::getDoubleVector( x ); + auto output = sparseblas::getDoubleVector( y ); + if( !(matrix->finalized) ) { + std::cerr << "Uninitialised input matrix during SpMSpV\n"; + return 10; + } + if( !(input->finalized) ) { + std::cerr << "Uninitialised input vector during SpMSpV\n"; + return 20; + } + if( !(output->finalized) ) { + std::cerr << "Uninitialised output vector during SpMSpV\n"; + return 30; + } + grb::RC rc = grb::SUCCESS; + if( alpha != 1.0 ) { + rc = grb::foldl( *(output->vector), 1.0 / alpha, + ring.getMultiplicativeOperator() ); + if( rc != grb::SUCCESS ) { + std::cerr << "Error during pre-scaling of SpMSpV\n"; + return 40; + } + } + if( transa == blas_no_trans ) { + rc = grb::mxv( *(output->vector), *(matrix->A), *(input->vector), ring ); + } else { + rc = grb::mxv< grb::descriptors::transpose_matrix >( + *(output->vector), *(matrix->A), *(input->vector), ring ); + } if( rc != grb::SUCCESS ) { - std::cerr << "Error during post-scaling of SpMSpV\n"; - return 60; + std::cerr << "Error during call to grb::mxv (SpMSpV)\n"; + return 50; } + if( alpha != 1.0 ) { + rc = grb::foldl( *(output->vector), alpha, + ring.getMultiplicativeOperator() ); + if( rc != grb::SUCCESS ) { + std::cerr << "Error during post-scaling of SpMSpV\n"; + return 60; + } + } + return 0; } - return 0; -} - -int EXTBLAS_dusmsm( - const enum blas_trans_type transa, - const double alpha, const blas_sparse_matrix A, - const enum blas_trans_type transb, const blas_sparse_matrix B, - blas_sparse_matrix C -) { - grb::Semiring< - grb::operators::add< double >, grb::operators::mul< double >, - grb::identities::zero, grb::identities::one - > ring; - auto matA = sparseblas::getDoubleMatrix( A ); - auto matB = sparseblas::getDoubleMatrix( B ); - auto matC = sparseblas::getDoubleMatrix( C ); - if( !(matA->finalized) ) { - std::cerr << "Uninitialised left-hand input matrix during SpMSpM\n"; - return 10; - } - if( !(matB->finalized) ) { - std::cerr << "Uninitialised right-hand input matrix during SpMSpM\n"; - return 20; - } - if( !(matC->finalized) ) { - std::cerr << "Uninitialised output matrix during SpMSpM\n"; - return 30; - } - - grb::RC rc = grb::SUCCESS; - if( alpha != 1.0 ) { - /*const grb::RC rc = grb::foldl( *(matC->A), 1.0 / alpha, - ring.getMultiplicativeOperator() ); + + void extspblas_dcsrmultsv( + const char * trans, const int * request, + const int * m, const int * n, + const double * a, const int * ja, const int * ia, + const extblas_sparse_vector x, + extblas_sparse_vector y + ) { + grb::Semiring< + grb::operators::add< double >, grb::operators::mul< double >, + grb::identities::zero, grb::identities::one + > ring; + const grb::Matrix< double, grb::config::default_backend, int, int, int > A = + grb::internal::wrapCRSMatrix( a, ja, ia, *m, *n ); + auto input = sparseblas::getDoubleVector( x ); + auto output = sparseblas::getDoubleVector( y ); + if( !(input->finalized) ) { + throw std::runtime_error( "Uninitialised input vector during SpMSpV\n" ); + } + if( !(output->finalized) ) { + throw std::runtime_error( "Uninitialised output vector during SpMSpV\n" ); + } + if( request[ 0 ] != 0 && request[ 1 ] != 1 ) { + throw std::runtime_error( "Illegal request during call to dcsrmultsv\n" ); + } + grb::Phase phase = grb::EXECUTE; + if( request[ 0 ] == 1 ) { + phase = grb::RESIZE; + } + grb::RC rc; + if( trans[0] == 'N' ) { + rc = grb::mxv< grb::descriptors::force_row_major >( *(output->vector), A, + *(input->vector), ring, phase ); + } else { + if( trans[1] != 'T' ) { + throw std::runtime_error( "Illegal trans argument to dcsrmultsv\n" ); + } + rc = grb::mxv< + grb::descriptors::force_row_major | + grb::descriptors::transpose_matrix + >( *(output->vector), A, *(input->vector), ring, phase ); + } if( rc != grb::SUCCESS ) { - std::cerr << "Error during pre-scaling for SpMSpM\n"; - return 40; + throw std::runtime_error( "ALP/GraphBLAS returns error during call to " + "SpMSpV: " + grb::toString( rc ) ); + } + } + + int EXTBLAS_dusmsm( + const enum blas_trans_type transa, + const double alpha, const blas_sparse_matrix A, + const enum blas_trans_type transb, const blas_sparse_matrix B, + blas_sparse_matrix C + ) { + grb::Semiring< + grb::operators::add< double >, grb::operators::mul< double >, + grb::identities::zero, grb::identities::one + > ring; + auto matA = sparseblas::getDoubleMatrix( A ); + auto matB = sparseblas::getDoubleMatrix( B ); + auto matC = sparseblas::getDoubleMatrix( C ); + if( !(matA->finalized) ) { + std::cerr << "Uninitialised left-hand input matrix during SpMSpM\n"; + return 10; + } + if( !(matB->finalized) ) { + std::cerr << "Uninitialised right-hand input matrix during SpMSpM\n"; + return 20; + } + if( !(matC->finalized) ) { + std::cerr << "Uninitialised output matrix during SpMSpM\n"; + return 30; + } + + grb::RC rc = grb::SUCCESS; + if( alpha != 1.0 ) { + /*const grb::RC rc = grb::foldl( *(matC->A), 1.0 / alpha, + ring.getMultiplicativeOperator() ); + if( rc != grb::SUCCESS ) { + std::cerr << "Error during pre-scaling for SpMSpM\n"; + return 40; + }*/ + // TODO requires level-3 fold in ALP/GraphBLAS + std::cerr << "Any other alpha from 1.0 is currently not supported for " + << "SpMSpM multiplication\n"; + return 255; + } + + // resize phase + if( transa == blas_no_trans && transb == blas_no_trans ) { + rc = grb::mxm( *(matC->A), *(matA->A), *(matB->A), ring, grb::RESIZE ); + } else if( transa != blas_no_trans && transb == blas_no_trans ) { + rc = grb::mxm< grb::descriptors::transpose_left >( + *(matC->A), *(matA->A), *(matB->A), ring, grb::RESIZE ); + } else if( transa == blas_no_trans && transb != blas_no_trans ) { + rc = grb::mxm< grb::descriptors::transpose_right >( + *(matC->A), *(matA->A), *(matB->A), ring, grb::RESIZE ); + } else { + assert( transa != blas_no_trans ); + assert( transb != blas_no_trans ); + rc = grb::mxm< + grb::descriptors::transpose_left | + grb::descriptors::transpose_right + >( *(matC->A), *(matA->A), *(matB->A), ring, grb::RESIZE ); + } + if( rc != grb::SUCCESS ) { + std::cerr << "Error during call to ALP/GraphBLAS mxm (RESIZE phase): " + << grb::toString( rc ) << "\n"; + return 50; + } + + // execute phase + if( transa == blas_no_trans && transb == blas_no_trans ) { + rc = grb::mxm( *(matC->A), *(matA->A), *(matB->A), ring ); + } else if( transa != blas_no_trans && transb == blas_no_trans ) { + rc = grb::mxm< grb::descriptors::transpose_left >( + *(matC->A), *(matA->A), *(matB->A), ring ); + } else if( transa == blas_no_trans && transb != blas_no_trans ) { + rc = grb::mxm< grb::descriptors::transpose_right >( + *(matC->A), *(matA->A), *(matB->A), ring ); + } else { + assert( transa != blas_no_trans ); + assert( transb != blas_no_trans ); + rc = grb::mxm< + grb::descriptors::transpose_left | + grb::descriptors::transpose_right + >( *(matC->A), *(matA->A), *(matB->A), ring ); + } + if( rc != grb::SUCCESS ) { + std::cerr << "Error during call to ALP/GraphBLAS mxm (EXECUTE phase): \n" + << grb::toString( rc ) << "\n"; + return 60; + } + + /*TODO see above + if( alpha != 1.0 ) { + rc = grb::foldl( *(matC->A), 1.0 / alpha, + ring.getMultiplicativeOperator() ); + if( rc != grb::SUCCESS ) { + std::cerr << "Error during post-scaling for SpMSpM\n"; + return 70; + } }*/ - // TODO requires level-3 fold in ALP/GraphBLAS - std::cerr << "Any other alpha from 1.0 is currently not supported for " - << "SpMSpM multiplication\n"; - return 255; + return 0; } - // resize phase - if( transa == blas_no_trans && transb == blas_no_trans ) { - rc = grb::mxm( *(matC->A), *(matA->A), *(matB->A), ring, grb::RESIZE ); - } else if( transa != blas_no_trans && transb == blas_no_trans ) { - rc = grb::mxm< grb::descriptors::transpose_left >( - *(matC->A), *(matA->A), *(matB->A), ring, grb::RESIZE ); - } else if( transa == blas_no_trans && transb != blas_no_trans ) { - rc = grb::mxm< grb::descriptors::transpose_right >( - *(matC->A), *(matA->A), *(matB->A), ring, grb::RESIZE ); - } else { - assert( transa != blas_no_trans ); - assert( transb != blas_no_trans ); - rc = grb::mxm< - grb::descriptors::transpose_left | - grb::descriptors::transpose_right - >( *(matC->A), *(matA->A), *(matB->A), ring, grb::RESIZE ); - } - if( rc != grb::SUCCESS ) { - std::cerr << "Error during call to ALP/GraphBLAS mxm (RESIZE phase): " - << grb::toString( rc ) << "\n"; - return 50; - } - - // execute phase - if( transa == blas_no_trans && transb == blas_no_trans ) { - rc = grb::mxm( *(matC->A), *(matA->A), *(matB->A), ring ); - } else if( transa != blas_no_trans && transb == blas_no_trans ) { - rc = grb::mxm< grb::descriptors::transpose_left >( - *(matC->A), *(matA->A), *(matB->A), ring ); - } else if( transa == blas_no_trans && transb != blas_no_trans ) { - rc = grb::mxm< grb::descriptors::transpose_right >( - *(matC->A), *(matA->A), *(matB->A), ring ); - } else { - assert( transa != blas_no_trans ); - assert( transb != blas_no_trans ); - rc = grb::mxm< - grb::descriptors::transpose_left | - grb::descriptors::transpose_right - >( *(matC->A), *(matA->A), *(matB->A), ring ); - } - if( rc != grb::SUCCESS ) { - std::cerr << "Error during call to ALP/GraphBLAS mxm (EXECUTE phase): \n" - << grb::toString( rc ) << "\n"; - return 60; - } - - /*TODO see above - if( alpha != 1.0 ) { - rc = grb::foldl( *(matC->A), 1.0 / alpha, - ring.getMultiplicativeOperator() ); + void spblas_dcsrmultcsr( + const char * trans, const int * request, const int * sort, + const int * m_p, const int * n_p, const int * k_p, + double * a, int * ja, int * ia, + double * b, int * jb, int * ib, + double * c, int * jc, int * ic, + const int * nzmax, int * info + ) { + assert( trans[0] == 'N' ); + assert( sort != NULL && sort[0] == 7 ); + assert( m_p != NULL ); + assert( n_p != NULL ); + assert( k_p != NULL ); + assert( a != NULL ); assert( ja != NULL ); assert( ia != NULL ); + assert( b != NULL ); assert( jb != NULL ); assert( ib != NULL ); + assert( c != NULL ); assert( jc != NULL ); assert( ic != NULL ); + assert( nzmax != NULL ); + assert( info != NULL ); + + // declare algebraic structures + grb::Semiring< + grb::operators::add< double >, grb::operators::mul< double >, + grb::identities::zero, grb::identities::one + > ring; + + // check support + if( trans[ 0 ] != 'N' ) { + std::cerr << "ALP/SparseBLAS, error: illegal trans argument to dcsrmultcsr\n"; + *info = 4; + } + if( sort[ 0 ] != 7 ) { + std::cerr << "ALP/SparseBLAS, error: illegal sort argument to dcsrmultcsr\n"; + *info = 5; + return; + } + + // declare minimum necessary descriptors + constexpr const grb::Descriptor minDescr = grb::descriptors::dense | + grb::descriptors::force_row_major; + + // determine matrix size + const int m = *m_p; + const int n = *n_p; + const int k = *k_p; + + // retrieve buffers (only when A needs to be output also) + char * bitmask = nullptr; + char * stack = nullptr; + double * valbuf = nullptr; + if( sparseblas::template getBuffer< double >( + bitmask, stack, valbuf, n + ) == false + ) { + std::cerr << "ALP/SparseBLAS, error: could not allocate buffer for " + << "computations on an output matrix\n"; + *info = 10; + return; + } + + // retrieve necessary ALP/GraphBLAS container wrappers + const grb::Matrix< double, grb::config::default_backend, int, int, int > A = + grb::internal::wrapCRSMatrix( a, ja, ia, m, k ); + const grb::Matrix< double, grb::config::default_backend, int, int, int > B = + grb::internal::wrapCRSMatrix( b, jb, ib, k, n ); + grb::Matrix< double, grb::config::default_backend, int, int, int > C = + grb::internal::wrapCRSMatrix( + c, jc, ic, + m, n, *nzmax, + bitmask, stack, valbuf + ); + + // set output vector to zero + grb::RC rc = grb::clear( C ); if( rc != grb::SUCCESS ) { - std::cerr << "Error during post-scaling for SpMSpM\n"; - return 70; - } - }*/ - return 0; -} - -int EXTBLAS_dusm_nz( const blas_sparse_matrix A, int * nz ) { - auto matA = sparseblas::getDoubleMatrix( A ); - if( !(matA->finalized) ) { - std::cerr << "Uninitialised left-hand input matrix during dusm_nz\n"; - return 10; - } - const size_t grb_nz = grb::nnz( *(matA->A) ); - if( grb_nz > static_cast< size_t >(std::numeric_limits< int >::max()) ) { - std::cerr << "Number of nonzeroes in given sparse matrix is larger than " - << "what can be represented by a SparseBLAS int\n"; - return 20; - } - *nz = static_cast< int >( grb_nz ); - return 0; -} - -int EXTBLAS_dusm_open( const blas_sparse_matrix A ) { - auto matA = sparseblas::getDoubleMatrix( A ); - if( !(matA->finalized) ) { - std::cerr << "Uninitialised left-hand input matrix during dusm_nz\n"; - return 10; - } - try{ - matA->start = matA->A->cbegin(); - matA->end = matA->A->cend(); - } catch( ... ) { - std::cerr << "Could not retrieve matrix iterators\n"; - return 20; - } - return 0; -} - -int EXTBLAS_dusm_get( - const blas_sparse_matrix A, - double * value, int * row, int * col -) { - auto matA = sparseblas::getDoubleMatrix( A ); - if( !(matA->finalized) ) { - std::cerr << "Uninitialised left-hand input matrix during dusm_nz\n"; - return 10; - } - assert( matA->start != matA->end ); - const auto &triplet = *(matA->start); - *value = triplet.second; - *row = triplet.first.first; - *col = triplet.first.second; - try { - (void) ++(matA->start); - } catch( ... ) { - return 2; - } - if( matA->start == matA->end ) { + std::cerr << "ALP/SparseBLAS, error: Could not clear output matrix\n"; + assert( false ); + *info = 20; + return; + } + + // do either C=AB or C=A^TB + if( trans[0] == 'N' ) { + if( *request == 1 ) { + rc = grb::mxm< minDescr >( C, A, B, ring, grb::RESIZE ); + } else { + assert( *request == 0 || *request == 2 ); + rc = grb::mxm< minDescr >( C, A, B, ring ); + } + if( rc != grb::SUCCESS ) { + std::cerr << "ALP/SparseBLAS, error during call to SpMSpM: " + << grb::toString( rc ) << ".\n"; + assert( false ); + *info = 30; + return; + } + } else { + // this case is not supported + assert( false ); + } + + // done + if( *request == 1 ) { + *info = -1; + } else { + *info = 0; + } + } + + int EXTBLAS_dusm_nz( const blas_sparse_matrix A, int * nz ) { + auto matA = sparseblas::getDoubleMatrix( A ); + if( !(matA->finalized) ) { + std::cerr << "Uninitialised left-hand input matrix during dusm_nz\n"; + return 10; + } + const size_t grb_nz = grb::nnz( *(matA->A) ); + if( grb_nz > static_cast< size_t >(std::numeric_limits< int >::max()) ) { + std::cerr << "Number of nonzeroes in given sparse matrix is larger than " + << "what can be represented by a SparseBLAS int\n"; + return 20; + } + *nz = static_cast< int >( grb_nz ); + return 0; + } + + int EXTBLAS_dusm_open( const blas_sparse_matrix A ) { + auto matA = sparseblas::getDoubleMatrix( A ); + if( !(matA->finalized) ) { + std::cerr << "Uninitialised left-hand input matrix during dusm_nz\n"; + return 10; + } + try{ + matA->start = matA->A->cbegin(); + matA->end = matA->A->cend(); + } catch( ... ) { + std::cerr << "Could not retrieve matrix iterators\n"; + return 20; + } return 0; - } else { - return 1; } -} -int EXTBLAS_dusm_close( const blas_sparse_matrix A ) { - auto matA = sparseblas::getDoubleMatrix( A ); - if( !(matA->finalized) ) { - std::cerr << "Uninitialised left-hand input matrix during dusm_nz\n"; - return 10; + int EXTBLAS_dusm_get( + const blas_sparse_matrix A, + double * value, int * row, int * col + ) { + auto matA = sparseblas::getDoubleMatrix( A ); + if( !(matA->finalized) ) { + std::cerr << "Uninitialised left-hand input matrix during dusm_nz\n"; + return 10; + } + assert( matA->start != matA->end ); + const auto &triplet = *(matA->start); + *value = triplet.second; + *row = triplet.first.first; + *col = triplet.first.second; + try { + (void) ++(matA->start); + } catch( ... ) { + return 2; + } + if( matA->start == matA->end ) { + return 0; + } else { + return 1; + } } - matA->start = matA->end; - return 0; -} -int EXTBLAS_free() { - const grb::RC rc = grb::finalize(); - if( rc != grb::SUCCESS ) { - std::cerr << "Error during call to EXTBLAS_free\n"; - return 10; + int EXTBLAS_dusm_close( const blas_sparse_matrix A ) { + auto matA = sparseblas::getDoubleMatrix( A ); + if( !(matA->finalized) ) { + std::cerr << "Uninitialised left-hand input matrix during dusm_nz\n"; + return 10; + } + matA->start = matA->end; + return 0; } - return 0; -} + + int EXTBLAS_free() { + if( sparseblas::buffer != nullptr || sparseblas::buffer_size > 0 ) { + assert( sparseblas::buffer != nullptr ); + assert( sparseblas::buffer_size > 0 ); + free( sparseblas::buffer ); + sparseblas::buffer_size = 0; + } + const grb::RC rc = grb::finalize(); + if( rc != grb::SUCCESS ) { + std::cerr << "Error during call to EXTBLAS_free\n"; + return 10; + } + return 0; + } + + void extspblas_free() { + (void) EXTBLAS_free(); + } + +} // end extern "C" diff --git a/tests/smoke/CMakeLists.txt b/tests/smoke/CMakeLists.txt index 1f99446ee..1bef2efe5 100644 --- a/tests/smoke/CMakeLists.txt +++ b/tests/smoke/CMakeLists.txt @@ -180,6 +180,11 @@ add_grb_executables( kcore_decomposition kcore_decomposition.cpp BACKENDS reference reference_omp hyperdags nonblocking bsp1d hybrid ) +# add_grb_executables( triangle_count triangle_count.cpp +# ADDITIONAL_LINK_LIBRARIES test_utils_headers +# BACKENDS reference reference_omp bsp1d hybrid hyperdags nonblocking +# ) + # targets to list and build the test for this category get_property( smoke_tests_list GLOBAL PROPERTY tests_category_smoke ) add_custom_target( "list_tests_category_smoke" diff --git a/tests/smoke/smoketests.sh b/tests/smoke/smoketests.sh index c069445d9..ed29d69c4 100755 --- a/tests/smoke/smoketests.sh +++ b/tests/smoke/smoketests.sh @@ -366,6 +366,34 @@ for BACKEND in ${BACKENDS[@]}; do fi echo " " + echo ">>> [x] [ ] Testing the Triangle couting algorithm on the dwt_59.mtx" + if [ -f ${INPUT_DIR}/dwt_59.mtx ]; then + $runner ${TEST_BIN_DIR}/triangle_count_${BACKEND} ${INPUT_DIR}/dwt_59.mtx direct 30 1 1 &> ${TEST_OUT_DIR}/triangle_count_dwt_59_${BACKEND}_${P}_${T}.log + head -1 ${TEST_OUT_DIR}/triangle_count_dwt_59_${BACKEND}_${P}_${T}.log + if ! grep -q 'Test OK' ${TEST_OUT_DIR}/triangle_count_dwt_59_${BACKEND}_${P}_${T}.log; then + echo "Test FAILED" + else + echo "Test OK" + fi + else + echo "Test DISABLED: dwt_59.mtx was not found. To enable, please provide ${INPUT_DIR}/dwt_59.mtx" + fi + echo " " + + echo ">>> [x] [ ] Testing the Triangle couting algorithm on the gyro_m.mtx" + if [ -f ${INPUT_DIR}/gyro_m.mtx ]; then + $runner ${TEST_BIN_DIR}/triangle_count_${BACKEND} ${INPUT_DIR}/gyro_m.mtx direct 598470 1 1 &> ${TEST_OUT_DIR}/triangle_count_gyro_m_${BACKEND}_${P}_${T}.log + head -1 ${TEST_OUT_DIR}/triangle_count_gyro_m_${BACKEND}_${P}_${T}.log + if ! grep -q 'Test OK' ${TEST_OUT_DIR}/triangle_count_gyro_m_${BACKEND}_${P}_${T}.log; then + echo "Test FAILED" + else + echo "Test OK" + fi + else + echo "Test DISABLED: gyro_m.mtx was not found. To enable, please provide ${INPUT_DIR}/gyro_m.mtx" + fi + echo " " + if [ "$BACKEND" = "bsp1d" ] || [ "$BACKEND" = "hybrid" ]; then echo "Additional standardised smoke tests not yet supported for the ${BACKEND} backend" echo diff --git a/tests/smoke/triangle_count.cpp b/tests/smoke/triangle_count.cpp new file mode 100644 index 000000000..f7c8892db --- /dev/null +++ b/tests/smoke/triangle_count.cpp @@ -0,0 +1,293 @@ +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#ifdef _CG_COMPLEX +#include +#endif + +#include + +#include +#include +#include + +#include +#include + +/** Must be an integer type (int, long, unsigned, etc.) */ +using BaseScalarType = int; +#ifdef _CG_COMPLEX +using IntegerType = std::complex< BaseScalarType >; +#else +using IntegerType = BaseScalarType; +#endif + +constexpr BaseScalarType TOL = 0; +constexpr size_t MAX_ITERS = 10000; + +using namespace grb; +using namespace algorithms; + +struct input { + size_t inner_rep; + size_t outer_rep; + TriangleCountAlgorithm algorithm; + size_t expectedTriangleCount; + char filename[ 1024 ]; + bool direct; +}; + +struct output { + RC rc = RC::SUCCESS; + size_t inner_rep; + size_t outer_rep; + size_t iterations; + size_t triangleCount; + grb::utils::TimerResults times; +}; + +bool parse_arguments( int argc, char ** argv, input & in, int& err ); + +void grbProgram( const input & data_in, output & out ) { + // get user process ID + const size_t s = spmd<>::pid(); + assert( s < spmd<>::nprocs() ); + grb::utils::Timer timer; + + // Sanity checks on input + if( data_in.filename[ 0 ] == '\0' ) { + std::cerr << s << ": no file name given as input." << std::endl; + out.rc = ILLEGAL; + return; + } + + + timer.reset(); + // Create a local parser + grb::utils::MatrixFileReader< + void, + std::conditional< + ( sizeof( grb::config::RowIndexType ) > sizeof( grb::config::ColIndexType ) ), + grb::config::RowIndexType, + grb::config::ColIndexType + >::type + > parser( data_in.filename, data_in.direct ); + assert( parser.m() == parser.n() ); + const size_t n = parser.n(); + // Load the matrix, first as a pattern, then copy it into a matrix with integer values + Matrix< IntegerType > A( n, n ); + { + Matrix< void > A_pattern( n, n ); + { + const RC rc = buildMatrixUnique( A_pattern, + parser.begin( SEQUENTIAL ), parser.end( SEQUENTIAL), + SEQUENTIAL + ); + /* Once internal issue #342 is resolved this can be re-enabled + const RC rc = buildMatrixUnique( A_pattern, + parser.begin( PARALLEL ), parser.end( PARALLEL), + PARALLEL + );*/ + if( rc != SUCCESS ) { + std::cerr << "Failure: call to buildMatrixUnique did not succeed " + << "(" << toString( rc ) << ")." << std::endl; + return; + } + } + // Check number of non-zero entries between the parser and the matrix A_pattern + try { + const size_t global_nnz = nnz( A_pattern ); + const size_t parser_nnz = parser.nz(); + if( global_nnz != parser_nnz ) { + std::cerr << "Failure: global nnz (" << global_nnz << ") does not equal " + << "parser nnz (" << parser_nnz << ")." << std::endl; + return; + } + } catch( const std::runtime_error & ) { + std::cout << "Info: nonzero check skipped as the number of nonzeroes " + << "cannot be derived from the matrix file header. The " + << "grb::Matrix reports " << nnz( A ) << " nonzeroes.\n"; + } + // Build A from A_pattern, filled with static_cast< IntegerType>( 1 ) + std::vector< size_t > rows, cols; + rows.reserve( nnz( A_pattern ) ); + cols.reserve( nnz( A_pattern ) ); + for( const std::pair< size_t, size_t > p : A_pattern ) { + // FIXME: this is a workaround while waiting for a masked version of mxm + if( p.first == p.second ) { + continue; + } + + rows.push_back( p.first ); + cols.push_back( p.second ); + } + std::vector< IntegerType > values( rows.size(), static_cast< IntegerType >( 1 ) ); + assert( SUCCESS == + buildMatrixUnique( A, rows.data(), cols.data(), values.data(), values.size(), IOMode::SEQUENTIAL ) + ); + } + out.times.io = timer.time(); + + // Check that the input matrix does not contains self-loops + for( const auto & p : A ) { + if( p.first.first == p.first.second ) { + std::cerr << "Failure: input matrix contains self-loops." << std::endl; + return; + } + } + + timer.reset(); + // Allocate the buffers + Matrix< IntegerType > buffer( n, n ); + Matrix< IntegerType > buffer2( n, n ); + Matrix< IntegerType > L( n, n ); + Matrix< IntegerType > U( n, n ); + // Split A into L and U + grb::tril( L, A, Phase::RESIZE ); + grb::triu( U, A, Phase::RESIZE ); + grb::tril( L, A, Phase::EXECUTE ); + grb::triu( U, A, Phase::EXECUTE ); + out.times.preamble = timer.time(); + + timer.reset(); + out.rc = triangle_count( data_in.algorithm, out.triangleCount, A, buffer, buffer2, L, U ); + out.times.useful = timer.time(); +} + +int main( int argc, char ** argv ) { + (void)argc; + (void)argv; + + std::cout << "Test executable: " << argv[ 0 ] << std::endl; + + // Input struct + struct input in; + int err; + if( !parse_arguments( argc, argv, in, err ) ) { + return err; + } + + std::cout << "Executable called with parameters " << in.filename << ", " + << "inner repititions = " << in.inner_rep << ", and outer reptitions = " + << in.outer_rep << std::endl; + + // Run the test for all algorithms + RC all_algorithms_rc = RC::SUCCESS; + for( const std::pair< TriangleCountAlgorithm, std::string > & algo : TriangleCountAlgorithmNames ) { + in.algorithm = algo.first; + std::cout << " -- Running algorithm " << algo.second << std::endl; + + // Output struct + struct output out; + RC rc = RC::SUCCESS; + + // Launch the estimator (if requested) + if( in.inner_rep == 0 ) { + grb::Launcher< AUTOMATIC > launcher; + rc = launcher.exec( &grbProgram, in, out, true ); + if( rc == RC::SUCCESS ) { + in.inner_rep = out.inner_rep; + } + if( rc != RC::SUCCESS ) { + std::cerr << "launcher.exec returns with non-SUCCESS error code " + << (int)rc << std::endl; + return 6; + } + } + + // Launch the benchmarker + grb::Benchmarker< EXEC_MODE::AUTOMATIC > benchmarker; + rc = benchmarker.exec( &grbProgram, in, out, 1, in.outer_rep, true ); + if( rc != RC::SUCCESS ) { + std::cerr << "benchmarker.exec returns with non-SUCCESS error code " + << grb::toString( rc ) << std::endl; + return 8; + } + if( out.rc == RC::SUCCESS ) { + std::cout << "Benchmark completed successfully.\n"; + std::cout << "** Obtained " << out.triangleCount << " triangles.\n"; + std::cout << "** Expected " << in.expectedTriangleCount << " triangles.\n"; + if( out.triangleCount != in.expectedTriangleCount ) { + all_algorithms_rc = RC::FAILED; + } + } else { + std::cerr << "Benchmark failed with error code " + << grb::toString( out.rc ) << std::endl; + std::cerr << std::flush; + all_algorithms_rc = RC::FAILED; + } + std::cout << std::endl; + } + + if( all_algorithms_rc == RC::SUCCESS ) { + std::cout << "Test OK" << std::endl; + } else { + std::cout << "Test FAILED" << std::endl; + } + + return 0; +} + +bool parse_arguments( int argc, char ** argv, input & in, int& err ) { + // Check if we are testing on a file + if( argc < 4 || argc > 6 ) { + std::cerr << "Usages: \n\t" + << argv[ 0 ] << " (inner iterations) (outer iterations)" + << std::endl; + err = 1; + return false; + } + + // Get file name + (void)strncpy( in.filename, argv[ 1 ], 1023 ); + in.filename[ 1023 ] = '\0'; + + // Get direct or indirect addressing + in.direct = ( strncmp( argv[ 2 ], "direct", 6 ) == 0 ); + + // Get the expected number of triangles + in.expectedTriangleCount = std::stoul( argv[ 3 ] ); + + // Get the inner number of iterations + in.inner_rep = grb::config::BENCHMARKING::inner(); + char * end = nullptr; + if( argc > 4 ) { + in.inner_rep = strtoumax( argv[ 4 ], &end, 10 ); + if( argv[ 4 ] == end ) { + std::cerr << "Could not parse argument " << argv[ 4 ] << " " + << "for number of inner experiment repititions." << std::endl; + err = 4; + return false; + } + } + + // Get the outer number of iterations + in.outer_rep = grb::config::BENCHMARKING::outer(); + if( argc > 5 ) { + in.outer_rep = strtoumax( argv[ 5 ], &end, 10 ); + if( argv[ 5 ] == end ) { + std::cerr << "Could not parse argument " << argv[ 5 ] << " " + << "for number of outer experiment repititions." << std::endl; + err = 5; + return false; + } + } + return true; +} \ No newline at end of file diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index ab0dac498..c24026aa3 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -251,10 +251,6 @@ add_grb_executables( eWiseApplyMatrixReference eWiseApplyMatrixReference.cpp ADDITIONAL_LINK_LIBRARIES test_utils_headers ) -add_grb_executables( eWiseLambda eWiseLambda.cpp - BACKENDS reference reference_omp hyperdags nonblocking -) - add_grb_executables( outer outer.cpp BACKENDS reference reference_omp hyperdags nonblocking ) @@ -300,10 +296,6 @@ add_grb_executables( adapterIterator adapterIterator.cpp BACKENDS reference reference_omp hyperdags nonblocking bsp1d hybrid ) -add_grb_executables( vectorFromListConstructor vectorFromListConstructor.cpp - BACKENDS reference reference_omp bsp1d hybrid hyperdags nonblocking -) - # the below targets test successfully when they compile -- they do not need to # be executed successfully as part of the unit test suite. diff --git a/tests/unit/unittests.sh b/tests/unit/unittests.sh index 5b7dfc16a..0089d2674 100755 --- a/tests/unit/unittests.sh +++ b/tests/unit/unittests.sh @@ -407,12 +407,6 @@ for MODE in ${MODES}; do grep 'Test OK' ${TEST_OUT_DIR}/buildVector_${MODE}_${BACKEND}_${P}_${T}.log || echo "Test FAILED" echo " " - echo ">>> [x] [ ] Testing grb::Vector( initializer_list ) constructor" - $runner ${TEST_BIN_DIR}/vectorFromListConstructor_${MODE}_${BACKEND} &> ${TEST_OUT_DIR}/vectorFromListConstructor_${MODE}_${BACKEND}_${P}_${T}.log - head -1 ${TEST_OUT_DIR}/vectorFromListConstructor_${MODE}_${BACKEND}_${P}_${T}.log - grep 'Test OK' ${TEST_OUT_DIR}/vectorFromListConstructor_${MODE}_${BACKEND}_${P}_${T}.log || echo "Test FAILED" - echo " " - echo ">>> [x] [ ] Testing grb::vectorToMatrixConverter" $runner ${TEST_BIN_DIR}/vectorToMatrix_${MODE}_${BACKEND} &> ${TEST_OUT_DIR}/vectorToMatrix_${MODE}_${BACKEND}_${P}_${T}.log head -1 ${TEST_OUT_DIR}/vectorToMatrix_${MODE}_${BACKEND}_${P}_${T}.log @@ -631,12 +625,6 @@ for MODE in ${MODES}; do grep 'Test OK' ${TEST_OUT_DIR}/mxm_${MODE}_${BACKEND}_${P}_${T}.log || echo "Test FAILED" echo " " - echo ">>> [x] [ ] Testing grb::eWiseLambda on a small matrix" - $runner ${TEST_BIN_DIR}/eWiseLambda_${MODE}_${BACKEND} &> ${TEST_OUT_DIR}/eWiseLambda_${MODE}_${BACKEND}_${P}_${T}.log - head -1 ${TEST_OUT_DIR}/eWiseLambda_${MODE}_${BACKEND}_${P}_${T}.log - grep 'Test OK' ${TEST_OUT_DIR}/eWiseLambda_${MODE}_${BACKEND}_${P}_${T}.log || echo "Test FAILED" - echo " " - echo ">>> [x] [ ] Testing grb::outer on a small matrix" $runner ${TEST_BIN_DIR}/outer_${MODE}_${BACKEND} &> ${TEST_OUT_DIR}/outer_${MODE}_${BACKEND}_${P}_${T}.log head -1 ${TEST_OUT_DIR}/outer_${MODE}_${BACKEND}_${P}_${T}.log