diff --git a/CMakeLists.txt b/CMakeLists.txt index 68299ad49..8020c64ee 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,6 +52,7 @@ endif() option( WITH_REFERENCE_BACKEND "With Reference backend" ON ) option( WITH_ALP_REFERENCE_BACKEND "With Reference Dense backend" ON ) option( WITH_OMP_BACKEND "With OMP backend" ON ) +option( WITH_ALP_OMP_BACKEND "With OMP Dense backend" ON ) option( WITH_NUMA "With NUMA support" ON ) option( LPF_INSTALL_PATH "Path to the LPF tools for the BSP1D and Hybrid backends" OFF ) # the following options depend on LPF_INSTALL_PATH being set @@ -190,6 +191,7 @@ endif() set( WITH_REFERENCE_BACKEND_HEADERS OFF ) set( WITH_ALP_REFERENCE_BACKEND_HEADERS OFF ) set( WITH_OMP_BACKEND_HEADERS OFF ) +set( WITH_ALP_OMP_BACKEND_HEADERS OFF ) # activate headers based on requested backends if( WITH_REFERENCE_BACKEND OR WITH_BSP1D_BACKEND ) @@ -206,6 +208,10 @@ if( WITH_ALP_REFERENCE_BACKEND ) set( WITH_ALP_REFERENCE_BACKEND_HEADERS ON ) endif() +if( WITH_ALP_OMP_BACKEND ) + set( WITH_ALP_OMP_BACKEND_HEADERS ON ) +endif() + add_subdirectory( include ) ### BACKEND IMPLEMENTATIONS diff --git a/bootstrap.sh b/bootstrap.sh index 030279452..2fad6b04e 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -94,6 +94,7 @@ reference=yes banshee=no lpf=no alp_reference=yes +alp_omp=yes show=no FLAGS=$'' LPF_INSTALL_PATH= @@ -297,6 +298,11 @@ the current directory before invocation or confirm the deletion of its content w else CMAKE_OPTS+=" -DWITH_ALP_REFERENCE_BACKEND=ON" fi + if [[ "${alp_omp}" == "no" ]]; then + CMAKE_OPTS+=" -DWITH_ALP_OMP_BACKEND=OFF" + else + CMAKE_OPTS+=" -DWITH_ALP_OMP_BACKEND=ON" + fi if [[ "${lpf}" == "yes" ]]; then CMAKE_OPTS+=" -DLPF_INSTALL_PATH='${ABSOLUTE_LPF_INSTALL_PATH}'" fi diff --git a/cmake/AddGRBInstall.cmake b/cmake/AddGRBInstall.cmake index 3d50788a0..2e57c6c81 100644 --- a/cmake/AddGRBInstall.cmake +++ b/cmake/AddGRBInstall.cmake @@ -45,6 +45,7 @@ install( EXPORT GraphBLASTargets set( ALP_UTILS_INSTALL_DIR "${BINARY_LIBRARIES_INSTALL_DIR}" ) set( SHMEM_BACKEND_INSTALL_DIR "${BINARY_LIBRARIES_INSTALL_DIR}/sequential" ) set( ALP_REFERENCE_BACKEND_INSTALL_DIR "${BINARY_LIBRARIES_INSTALL_DIR}/alp/reference" ) +set( ALP_OMP_BACKEND_INSTALL_DIR "${BINARY_LIBRARIES_INSTALL_DIR}/alp/omp" ) set( BSP1D_BACKEND_INSTALL_DIR "${BINARY_LIBRARIES_INSTALL_DIR}/spmd" ) set( HYBRID_BACKEND_INSTALL_DIR "${BINARY_LIBRARIES_INSTALL_DIR}/hybrid" ) @@ -136,6 +137,14 @@ if( WITH_OMP_BACKEND ) ) endif() +if( WITH_ALP_OMP_BACKEND ) + addBackendWrapperGenOptions( "alp_omp" + COMPILE_DEFINITIONS "${ALP_OMP_SELECTION_DEFS}" + LINK_FLAGS "'${ALP_OMP_BACKEND_INSTALL_DIR}/lib${BACKEND_LIBRARY_OUTPUT_NAME}.a'" + "'${ALP_UTILS_INSTALL_DIR}/lib${ALP_UTILS_LIBRARY_OUTPUT_NAME}.a'" "${NUMA_LFLAG}" + ) +endif() + # distributed memory backends if( WITH_BSP1D_BACKEND OR WITH_HYBRID_BACKEND ) assert_valid_variables( LPFRUN LPFCPP ) diff --git a/cmake/AddGRBVars.cmake b/cmake/AddGRBVars.cmake index 2d324b404..e10bd37f3 100644 --- a/cmake/AddGRBVars.cmake +++ b/cmake/AddGRBVars.cmake @@ -32,6 +32,7 @@ set( REFERENCE_OMP_BACKEND_DEFAULT_NAME "backend_reference_omp" ) set( BSP1D_BACKEND_DEFAULT_NAME "backend_bsp1d" ) set( HYBRID_BACKEND_DEFAULT_NAME "backend_hybrid" ) set( ALP_REFERENCE_BACKEND_DEFAULT_NAME "backend_alp_reference" ) +set( ALP_OMP_BACKEND_DEFAULT_NAME "backend_alp_omp" ) ### COMPILER DEFINITIONS FOR HEADERS INCLUSION AND FOR BACKEND SELECTION @@ -41,11 +42,16 @@ set( REFERENCE_INCLUDE_DEFS "_GRB_WITH_REFERENCE" ) set( REFERENCE_OMP_INCLUDE_DEFS "_GRB_WITH_OMP" ) set( LPF_INCLUDE_DEFS "_GRB_WITH_LPF" ) set( ALP_REFERENCE_INCLUDE_DEFS "_ALP_WITH_REFERENCE" ) +set( ALP_OMP_INCLUDE_DEFS "_ALP_WITH_OMP;_ALP_OMP_WITH_REFERENCE" ) # compiler definitions to select a backend set( REFERENCE_SELECTION_DEFS "_GRB_BACKEND=reference" ) set( REFERENCE_OMP_SELECTION_DEFS "_GRB_BACKEND=reference_omp" ) set( ALP_REFERENCE_SELECTION_DEFS "_ALP_BACKEND=reference" ) +set( ALP_OMP_SELECTION_DEFS + "_ALP_BACKEND=omp" + "_ALP_SECONDARY_BACKEND=reference" +) set( BSP1D_SELECTION_DEFS "_GRB_BACKEND=BSP1D" "_GRB_BSP1D_BACKEND=reference" @@ -59,7 +65,7 @@ set( HYBRID_SELECTION_DEFS set( NO_NUMA_DEF "_GRB_NO_LIBNUMA" ) ### **ALL** BACKENDS, EVEN IF NOT ENABLED BY USER -set( ALL_BACKENDS "reference" "reference_omp" "bsp1d" "hybrid" "alp_reference" ) +set( ALL_BACKENDS "reference" "reference_omp" "bsp1d" "hybrid" "alp_reference" "alp_omp" ) # list of user-enabled backends, for tests and wrapper scripts (do not change!) @@ -81,6 +87,10 @@ if( WITH_ALP_REFERENCE_BACKEND ) list( APPEND AVAILABLE_BACKENDS "alp_reference" ) endif() +if( WITH_ALP_OMP_BACKEND ) + list( APPEND AVAILABLE_BACKENDS "alp_omp" ) +endif() + # distributed memory backends if( WITH_BSP1D_BACKEND ) list( APPEND AVAILABLE_BACKENDS "bsp1d" ) diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt index 4e2c17aa3..b9a92bff4 100644 --- a/include/CMakeLists.txt +++ b/include/CMakeLists.txt @@ -67,7 +67,9 @@ install( TARGETS alp_utils_headers EXPORT GraphBLASTargets INCLUDES DESTINATION "${INCLUDE_INSTALL_DIR}" ) -if( WITH_ALP_REFERENCE_BACKEND_HEADERS ) +if( WITH_ALP_REFERENCE_BACKEND_HEADERS OR + WITH_ALP_OMP_BACKEND_HEADERS +) add_subdirectory( alp ) endif() diff --git a/include/alp.hpp b/include/alp.hpp index 6e6646c78..3ebc36637 100644 --- a/include/alp.hpp +++ b/include/alp.hpp @@ -134,7 +134,7 @@ // clang-format re-ordering #if 1 // load active configuration -// #include //defines _ALP_BACKEND and _WITH_BSP + #include //defines _ALP_BACKEND #endif // #pragma message "Included ALP.hpp" diff --git a/include/alp/CMakeLists.txt b/include/alp/CMakeLists.txt index c2256749b..690a371b4 100644 --- a/include/alp/CMakeLists.txt +++ b/include/alp/CMakeLists.txt @@ -22,6 +22,7 @@ # set a default backend (if they want to do so). # assert_defined_variables( ALP_REFERENCE_INCLUDE_DEFS WITH_ALP_REFERENCE_BACKEND_HEADERS ) +assert_defined_variables( ALP_OMP_INCLUDE_DEFS WITH_ALP_OMP_BACKEND_HEADERS ) assert_valid_variables( INCLUDE_INSTALL_DIR ) # to avoid flaky acrobatics with regex or glob expressions, copy main files directly @@ -91,6 +92,22 @@ if( WITH_ALP_REFERENCE_BACKEND ) ) endif() +if( WITH_ALP_OMP_BACKEND_HEADERS ) + add_library( backend_alp_omp_headers INTERFACE ) + target_link_libraries( backend_alp_omp_headers INTERFACE backend_headers_nodefs ) + target_link_libraries( backend_alp_omp_headers INTERFACE OpenMP::OpenMP_CXX ) + target_compile_definitions( backend_alp_omp_headers INTERFACE "${ALP_OMP_INCLUDE_DEFS}" ) + + install( TARGETS backend_alp_omp_headers EXPORT GraphBLASTargets ) +endif() + +if( WITH_ALP_OMP_BACKEND ) + install( DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/alp/omp" + DESTINATION "${ALP_INCLUDE_INSTALL_DIR}/alp_omp" + FILES_MATCHING REGEX "${HEADERS_REGEX}" + ) +endif() + # this target lists the algorithms implemented on top of the generic functionalities, # hence it depends only on backend_headers_nodefs add_library( alp_algorithms INTERFACE ) diff --git a/include/alp/amf-based/functorbasedmatrix.hpp b/include/alp/amf-based/functorbasedmatrix.hpp index f48ecca4d..0e56a8688 100644 --- a/include/alp/amf-based/functorbasedmatrix.hpp +++ b/include/alp/amf-based/functorbasedmatrix.hpp @@ -84,6 +84,7 @@ namespace alp { typedef T value_type; /** Type returned by access function */ typedef T access_type; + typedef T const_access_type; /** Type of the index used to access the physical storage */ typedef std::pair< size_t, size_t > storage_index_type; diff --git a/include/alp/amf-based/matrix.hpp b/include/alp/amf-based/matrix.hpp index 13240bb6e..8fac4ec3f 100644 --- a/include/alp/amf-based/matrix.hpp +++ b/include/alp/amf-based/matrix.hpp @@ -90,7 +90,7 @@ namespace alp { typename MatrixType, std::enable_if_t< is_matrix< MatrixType >::value > * = nullptr > - const typename MatrixType::access_type access( const MatrixType &, const typename MatrixType::storage_index_type & ); + typename MatrixType::const_access_type access( const MatrixType &, const typename MatrixType::storage_index_type & ); template< typename MatrixType, @@ -146,7 +146,7 @@ namespace alp { typename MatrixType, std::enable_if_t< is_matrix< MatrixType >::value > * > - friend const typename MatrixType::access_type access( const MatrixType &A, const typename MatrixType::storage_index_type &storageIndex ); + friend typename MatrixType::const_access_type access( const MatrixType &A, const typename MatrixType::storage_index_type &storageIndex ); template< typename MatrixType, @@ -168,9 +168,9 @@ namespace alp { static_cast< DerivedMatrix & >( *this ).setInitialized( initialized ); } - template< typename AccessType, typename StorageIndexType > - const AccessType access( const StorageIndexType storageIndex ) const { - static_assert( std::is_same< AccessType, typename DerivedMatrix::access_type >::value ); + template< typename ConstAccessType, typename StorageIndexType > + ConstAccessType access( const StorageIndexType storageIndex ) const { + static_assert( std::is_same< ConstAccessType, typename DerivedMatrix::const_access_type >::value ); static_assert( std::is_same< StorageIndexType, typename DerivedMatrix::storage_index_type >::value ); return static_cast< const DerivedMatrix & >( *this ).access( storageIndex ); } @@ -348,8 +348,8 @@ namespace alp { > Matrix( const size_t rows, const size_t cols, const size_t cap = 0 ) : base_type( - storage::AMFFactory::FromPolynomial< - typename internal::determine_poly_factory< structure, ImfR, ImfC, backend >::factory_type + storage::AMFFactory< backend >::template FromPolynomial< + Structure, ImfR, ImfC >::Create( ImfR( rows ), ImfC( cols ) @@ -383,8 +383,8 @@ namespace alp { > Matrix( const size_t dim, const size_t cap = 0 ) : base_type( - storage::AMFFactory::FromPolynomial< - typename internal::determine_poly_factory< structure, ImfR, ImfC, backend >::factory_type + storage::AMFFactory< backend >::template FromPolynomial< + Structure, ImfR, ImfC >::Create( ImfR( dim ), ImfC( dim ) @@ -418,7 +418,7 @@ namespace alp { Matrix( SourceType &source_matrix, ImfR imf_r, ImfC imf_c ) : base_type( getContainer( source_matrix ), - storage::AMFFactory::Compose< + storage::AMFFactory< backend >::template Compose< ImfR, ImfC, typename SourceType::base_type::amf_type >::Create( imf_r, imf_c, internal::getAmf( source_matrix ) ) ) {} @@ -441,14 +441,43 @@ namespace alp { Matrix( SourceType &source_matrix ) : base_type( getContainer( source_matrix ), - storage::AMFFactory::Reshape< View::type_id, typename SourceType::amf_type >::Create( internal::getAmf( source_matrix ) ) + storage::AMFFactory< backend >::template Reshape< View::type_id, typename SourceType::amf_type >::Create( internal::getAmf( source_matrix ) ) ) {} /** - * @deprecated - * Constructor for a view over another storage-based matrix. + * Constructor for a view over an internal container of another matrix. * * @tparam SourceType The type of the target matrix. + * @tparam AmfType The type of the amf corresponding to the layout of + * the provided container. + * Used as a template parameter to avoid hard + * compilation error in the case of FunctorBasedMatrix, + * when base_type::amf_type does not exist. + */ + template< + typename BufferType, + typename AmfType, + std::enable_if_t< + !is_container< BufferType >::value && + internal::is_view_over_storage< View >::value + > * = nullptr + > + Matrix( BufferType &&buffer, const size_t buffer_size, AmfType &&amf ) : + base_type( + buffer, + buffer_size, + std::forward< typename base_type::amf_type >( amf ) + ) { + static_assert( + std::is_same< typename base_type::amf_type, typename std::remove_reference< AmfType >::type >::value, + "The type of the provided AMF does not match the type of constructed container's AMF" + ); + } + + /** + * Constructor for a view over another matrix' internal container. + * + * @tparam ContainerType The type of the internal container. * @tparam AmfType The type of the amf used to construct the matrix. * Used as a template parameter to benefit from * SFINAE for the case of FunctorBasedMatrix, when @@ -457,21 +486,21 @@ namespace alp { * result in a hard compilation error. */ template< - typename SourceType, + typename ContainerType, typename AmfType, std::enable_if_t< - std::is_same< SourceType, typename View::applied_to >::value && + internal::is_container< ContainerType >::value && internal::is_view_over_storage< View >::value && !internal::requires_allocation< View >::value > * = nullptr > - Matrix( SourceType &source_matrix, AmfType &&amf ) : + Matrix( ContainerType &container, AmfType &&amf ) : base_type( - getContainer( source_matrix ), + container, std::forward< typename base_type::amf_type >( amf ) ) { static_assert( - std::is_same< typename base_type::amf_type, AmfType >::value, + std::is_same< typename base_type::amf_type, typename std::remove_reference< AmfType >::type >::value, "The AMF type of the constructor parameter needs to match the AMF type of this container specialization." ); } @@ -1086,10 +1115,10 @@ namespace alp { typename MatrixType, std::enable_if_t< is_matrix< MatrixType >::value > * = nullptr > - const typename MatrixType::access_type access( const MatrixType &A, const typename MatrixType::storage_index_type &storageIndex ) { + typename MatrixType::const_access_type access( const MatrixType &A, const typename MatrixType::storage_index_type &storageIndex ) { return static_cast< const MatrixBase< typename MatrixType::base_type > & - >( A ).template access< typename MatrixType::access_type, typename MatrixType::storage_index_type >( storageIndex ); + >( A ).template access< typename MatrixType::const_access_type, typename MatrixType::storage_index_type >( storageIndex ); } /** Non-constant variant. **/ diff --git a/include/alp/amf-based/storage.hpp b/include/alp/amf-based/storage.hpp index 15a73572f..7d5561339 100644 --- a/include/alp/amf-based/storage.hpp +++ b/include/alp/amf-based/storage.hpp @@ -36,6 +36,27 @@ namespace alp { + namespace internal { + + /** + * Determines the mapping polynomial type and exposes a factory method + * to create instances of that polynomial. + * + * All specializations of this type trait should define the factory + * method following the same signature. The factory method shall + * return an object of the type exposed as \a type. + * + * @tparam Structure Matrix structure + * @tparam ImfR Row IMF type + * @tparam ImfC Column IMF type + * @tparam backend The backend + * + */ + template< typename Structure, typename ImfR, typename ImfC, enum Backend backend > + struct determine_poly_factory {}; + + } // namespace internal + namespace storage { enum StorageOrientation { @@ -581,6 +602,10 @@ namespace alp { }; // namespace polynomials + /** Forward declaration */ + template< enum Backend backend > + class AMFFactory; + /** * Access Mapping Function (AMF) maps logical matrix coordinates (i, j) * to the corresponding matrix element's location in the physical container. @@ -599,10 +624,13 @@ namespace alp { * All AMF specializations shall expose the effective types of the IMFs * and the mapping polynomial, since these may change after the fusion. */ - template< typename ImfR, typename ImfC, typename MappingPolynomial > + template< + typename ImfR, typename ImfC, typename MappingPolynomial, + enum Backend backend + > class AMF { - friend class AMFFactory; + friend class AMFFactory< backend >; public: @@ -747,8 +775,9 @@ namespace alp { }; // class AMF /** - * @brief Collects AMF factory classes + * Collects AMF factory classes. */ + template< enum Backend backend > struct AMFFactory { /** @@ -811,9 +840,9 @@ namespace alp { public: - typedef AMF< final_imf_r_type, final_imf_c_type, final_polynomial_type > amf_type; + typedef AMF< final_imf_r_type, final_imf_c_type, final_polynomial_type, backend > amf_type; - static amf_type Create( ViewImfR imf_r, ViewImfC imf_c, const AMF< SourceImfR, SourceImfC, SourcePoly > &amf ) { + static amf_type Create( ViewImfR imf_r, ViewImfC imf_c, const AMF< SourceImfR, SourceImfC, SourcePoly, backend > &amf ) { composed_imf_r_type composed_imf_r = imf::ComposedFactory< SourceImfR, ViewImfR >::create( amf.imf_r, imf_r ); composed_imf_c_type composed_imf_c = imf::ComposedFactory< SourceImfC, ViewImfC >::create( amf.imf_c, imf_c ); return amf_type( @@ -841,10 +870,21 @@ namespace alp { * @tparam PolyType Type of the mapping polynomial. * */ - template< typename PolyFactory > + template< typename Structure, typename ImfR, typename ImfC > struct FromPolynomial { - typedef AMF< imf::Id, imf::Id, typename PolyFactory::poly_type > amf_type; + // Ensure compatibility of IMF types. + // Original Matrix has imf::Id as both IMFs. + // Original Vector has ImfR = imf::Id and ImfC = imf::Zero. + static_assert( + std::is_same< ImfR, imf::Id >::value && + ( std::is_same< ImfC, imf::Id >::value || std::is_same< ImfC, imf::Zero >::value ), + "AMF factory FromPolynomial can only be used for an original container." + ); + + typedef typename internal::determine_poly_factory< Structure, ImfR, ImfC, backend >::factory_type PolyFactory; + + typedef AMF< imf::Id, imf::Id, typename PolyFactory::poly_type, backend > amf_type; /** * Factory method used by 2D containers. @@ -890,13 +930,13 @@ namespace alp { static_assert( std::is_same< amf_type, - typename Compose< imf::Id, imf::Zero, AMF< imf::Id, imf::Id, typename PolyFactory::poly_type > >::amf_type + typename Compose< imf::Id, imf::Zero, AMF< imf::Id, imf::Id, typename PolyFactory::poly_type, backend > >::amf_type >::value, "The factory method returns the object of different type than declared. This is a bug." ); - return Compose< imf::Id, imf::Zero, AMF< imf::Id, imf::Id, typename PolyFactory::poly_type > >::Create( + return Compose< imf::Id, imf::Zero, AMF< imf::Id, imf::Id, typename PolyFactory::poly_type, backend > >::Create( imf_r, imf_c, - FromPolynomial< PolyFactory >::Create( imf::Id( imf_r.N ), imf::Id( imf_c.N ) ) + FromPolynomial< Structure, imf::Id, imf::Zero >::Create( imf::Id( imf_r.N ), imf::Id( imf_c.N ) ) ); } @@ -950,7 +990,8 @@ namespace alp { typename polynomials::apply_view< view::transpose, typename SourceAMF::mapping_polynomial_type - >::type + >::type, + backend > amf_type; static amf_type Create( const SourceAMF &amf ) { @@ -958,7 +999,8 @@ namespace alp { return AMF< typename SourceAMF::imf_c_type, typename SourceAMF::imf_r_type, - new_mapping_polynomial_type + new_mapping_polynomial_type, + backend >( amf.imf_c, amf.imf_r, @@ -1004,7 +1046,7 @@ namespace alp { public: - typedef AMF< imf::Id, imf::Zero, new_poly_type > amf_type; + typedef AMF< imf::Id, imf::Zero, new_poly_type, backend > amf_type; static amf_type Create( const SourceAMF &amf ) { assert( amf.getLogicalDimensions().first == amf.getLogicalDimensions().second ); @@ -1034,10 +1076,10 @@ namespace alp { template< typename SourceAMF > struct Reshape< view::matrix, SourceAMF > { - typedef typename AMFFactory::Compose< imf::Id, imf::Id, SourceAMF >::amf_type amf_type; + typedef typename Compose< imf::Id, imf::Id, SourceAMF >::amf_type amf_type; static amf_type Create( const SourceAMF &amf ) { - return storage::AMFFactory::Compose< imf::Id, imf::Id, SourceAMF >::Create( + return Compose< imf::Id, imf::Id, SourceAMF >::Create( imf::Id( amf.getLogicalDimensions().first ), imf::Id( amf.getLogicalDimensions().second ), amf @@ -1054,23 +1096,6 @@ namespace alp { namespace internal { - /** - * Determines the mapping polynomial type and exposes a factory method - * to create instances of that polynomial. - * - * All specializations of this type trait should define the factory - * method following the same signature. The factory method shall - * return an object of the type exposed as \a type. - * - * @tparam Structure Matrix structure - * @tparam ImfR Row IMF type - * @tparam ImfC Column IMF type - * @tparam backend The backend - * - */ - template< typename Structure, typename ImfR, typename ImfC, enum Backend backend > - struct determine_poly_factory {}; - /** * Determines the AMF type for a matrix having the provided static properties. * @@ -1131,10 +1156,10 @@ namespace alp { typedef typename std::conditional< View::type_id == view::gather, - typename storage::AMFFactory::Compose< + typename storage::AMFFactory< backend >::template Compose< ImfR, ImfC, typename View::applied_to::amf_type >::amf_type, - typename storage::AMFFactory::Reshape< + typename storage::AMFFactory< backend >::template Reshape< View::type_id, typename View::applied_to::amf_type >::amf_type @@ -1142,7 +1167,7 @@ namespace alp { }; - /** Specialization for containers that allocate storage */ + /** Specialization for storage-based containers that allocate storage */ template< typename Structure, typename ImfC, enum Backend backend > struct determine_amf_type< Structure, view::Original< void >, imf::Id, ImfC, backend > { @@ -1151,12 +1176,12 @@ namespace alp { "Incompatible combination of parameters provided to determine_amf_type." ); - typedef typename storage::AMFFactory::FromPolynomial< - typename determine_poly_factory< Structure, imf::Id, ImfC, backend >::factory_type + typedef typename storage::AMFFactory< backend >::template FromPolynomial< + Structure, imf::Id, ImfC >::amf_type type; }; - /** Specialization for containers that allocate storage */ + /** Specialization for functor-based containers that allocate storage */ template< typename Structure, typename ImfC, enum Backend backend, typename Lambda > struct determine_amf_type< Structure, view::Functor< Lambda >, imf::Id, ImfC, backend > { @@ -1165,10 +1190,10 @@ namespace alp { "Incompatible combination of parameters provided to determine_amf_type." ); - typedef typename storage::AMFFactory::FromPolynomial< - storage::polynomials::NoneFactory - >::amf_type type; + // A functor-based container does not have an AMF + typedef void type; }; + } // namespace internal } // namespace alp diff --git a/include/alp/amf-based/storagebasedmatrix.hpp b/include/alp/amf-based/storagebasedmatrix.hpp index d1c38c59f..71c112554 100644 --- a/include/alp/amf-based/storagebasedmatrix.hpp +++ b/include/alp/amf-based/storagebasedmatrix.hpp @@ -62,19 +62,6 @@ namespace alp { > void setInitialized( MatrixType &, const bool ) noexcept; - /** Forward declarations for access functions */ - template< - typename MatrixType, - std::enable_if_t< is_matrix< MatrixType >::value > * = nullptr - > - const typename MatrixType::access_type access( const MatrixType &, const typename MatrixType::storage_index_type & ); - - template< - typename MatrixType, - std::enable_if_t< is_matrix< MatrixType >::value > * = nullptr - > - typename MatrixType::access_type access( MatrixType &, const typename MatrixType::storage_index_type & ); - /** Forward declaration */ template< typename T, typename AmfType, bool requires_allocation, Backend backend > class StorageBasedMatrix; @@ -126,29 +113,6 @@ namespace alp { > const typename MatrixType::amf_type &getAmf( const MatrixType &A ) noexcept; - /** Forward declaration */ - template< typename DerivedMatrix > - class MatrixBase; - - template< typename DerivedMatrix > - std::pair< size_t, size_t > dims( const MatrixBase< DerivedMatrix > & A ) noexcept; - - template< - typename MatrixType, - std::enable_if_t< internal::is_storage_based< MatrixType >::value > * = nullptr - > - size_t getStorageDimensions( const MatrixType &A ) noexcept; - - template< typename MatrixType, - std::enable_if_t< is_matrix< MatrixType>::value > * = nullptr - > - bool getInitialized( const MatrixType &A ) noexcept; - - template< typename MatrixType, - std::enable_if_t< is_matrix< MatrixType>::value > * = nullptr - > - void setInitialized( MatrixType &, const bool ) noexcept; - /** * Matrix container specialization * Implements both original containers and views on containers. @@ -181,6 +145,7 @@ namespace alp { typedef typename AmfType::imf_c_type imf_c_type; /** Type returned by access function */ typedef T &access_type; + typedef const T &const_access_type; /** Type of the index used to access the physical storage */ typedef size_t storage_index_type; @@ -269,7 +234,7 @@ namespace alp { * * @return const reference or value of the element at given position. */ - const access_type access( const storage_index_type &storageIndex ) const { + const_access_type access( const storage_index_type &storageIndex ) const { return container[ storageIndex ]; } @@ -310,6 +275,11 @@ namespace alp { container( container ), amf( std::move( amf ) ) {} + /** View on another raw container */ + StorageBasedMatrix( T *buffer, const size_t buffer_size, AmfType &&amf ) : + container( buffer, buffer_size ), + amf( std::move( amf ) ) {} + }; // class StorageBasedMatrix diff --git a/include/alp/backends.hpp b/include/alp/backends.hpp index 1b7befaf0..4b1aa8b52 100644 --- a/include/alp/backends.hpp +++ b/include/alp/backends.hpp @@ -40,7 +40,12 @@ namespace alp { /** * The ALP reference backend. */ - reference + reference, + + /** + * The ALP OpenMP backend. + */ + omp, }; diff --git a/include/alp/base/config.hpp b/include/alp/base/config.hpp index 00455225b..5a5dcc0fe 100644 --- a/include/alp/base/config.hpp +++ b/include/alp/base/config.hpp @@ -41,6 +41,15 @@ #define _ALP_BACKEND reference #endif +// if the user did not define _ALP_SECONDARY_BACKEND, set it to the default +// sequential implementation. This setting may be used by other backends for +// backend-specific purposes. For example, a parallel backend may use this +// setting to constrol to which sequential backend it dispatches sequential +// work. +#ifndef _ALP_SECONDARY_BACKEND + #define _ALP_SECONDARY_BACKEND reference +#endif + /** * The main GraphBLAS namespace. * diff --git a/include/alp/blas3.hpp b/include/alp/blas3.hpp index e1f6d0baa..976d3ccb7 100644 --- a/include/alp/blas3.hpp +++ b/include/alp/blas3.hpp @@ -26,6 +26,9 @@ #ifdef _ALP_WITH_REFERENCE #include #endif +#ifdef _ALP_WITH_OMP + #include +#endif #endif // end _H_ALP_BLAS3 diff --git a/include/alp/config.hpp b/include/alp/config.hpp index 630c30de7..3913ee6da 100644 --- a/include/alp/config.hpp +++ b/include/alp/config.hpp @@ -27,6 +27,9 @@ #ifdef _ALP_WITH_REFERENCE #include "alp/reference/config.hpp" #endif +#ifdef _ALP_WITH_OMP + #include "alp/omp/config.hpp" +#endif #endif // end ``_H_ALP_CONFIG'' diff --git a/include/alp/exec.hpp b/include/alp/exec.hpp index 9fbadaee7..c20e21cfd 100644 --- a/include/alp/exec.hpp +++ b/include/alp/exec.hpp @@ -31,6 +31,9 @@ #ifdef _ALP_WITH_REFERENCE #include "alp/reference/exec.hpp" #endif +#ifdef _ALP_WITH_OMP + #include "alp/omp/exec.hpp" +#endif #ifdef _ALP_BACKEND namespace alp { diff --git a/include/alp/init.hpp b/include/alp/init.hpp index a07812bfe..d59782b24 100644 --- a/include/alp/init.hpp +++ b/include/alp/init.hpp @@ -27,6 +27,9 @@ #ifdef _ALP_WITH_REFERENCE #include "alp/reference/init.hpp" #endif +#ifdef _ALP_WITH_OMP + #include "alp/omp/init.hpp" +#endif #endif // end ``_H_ALP_INIT'' diff --git a/include/alp/io.hpp b/include/alp/io.hpp index de1536188..73bb7e082 100644 --- a/include/alp/io.hpp +++ b/include/alp/io.hpp @@ -27,6 +27,9 @@ #ifdef _ALP_WITH_REFERENCE #include #endif +#ifdef _ALP_WITH_OMP + #include +#endif #endif // end ``_H_ALP_IO'' diff --git a/include/alp/matrix.hpp b/include/alp/matrix.hpp index 22b63eff0..20031e3c7 100644 --- a/include/alp/matrix.hpp +++ b/include/alp/matrix.hpp @@ -31,6 +31,9 @@ #ifdef _ALP_WITH_REFERENCE #include #endif +#ifdef _ALP_WITH_OMP + #include +#endif // specify default only if requested during compilation #ifdef _ALP_BACKEND diff --git a/include/alp/omp/blas3.hpp b/include/alp/omp/blas3.hpp new file mode 100644 index 000000000..4954e00b5 --- /dev/null +++ b/include/alp/omp/blas3.hpp @@ -0,0 +1,511 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * @author A. N. Yzelman + * @date 14th of January 2022 + */ + +#ifndef _H_ALP_OMP_BLAS3 +#define _H_ALP_OMP_BLAS3 + +#include // for std::min/max +#include // for std::enable_if + +#include +#include + +#include + +#include "matrix.hpp" +#include "storage.hpp" + +// Include backend to which sequential work is delegated +#ifdef _ALP_OMP_WITH_REFERENCE + #include + #include + #include +#endif + +#ifndef NDEBUG +#include "../../../tests/utils/print_alp_containers.hpp" +#endif + + +namespace alp { + + namespace internal { + + /** + * \internal general mxm implementation that all mxm variants using + * structured matrices refer to. + */ + template< + bool allow_void, + class MulMonoid, + typename OutputType, typename InputType1, typename InputType2, + class Operator, class Monoid, + typename OutputStructure, typename OutputView, + typename OutputImfR, typename OutputImfC, + typename InputStructure1, typename InputView1, + typename InputImfR1, typename InputImfC1, + typename InputStructure2, typename InputView2, + typename InputImfR2, typename InputImfC2 + > + RC mxm_generic( + alp::Matrix< OutputType, OutputStructure, + Density::Dense, OutputView, OutputImfR, OutputImfC, omp > &C, + alp::Matrix< InputType1, InputStructure1, + Density::Dense, InputView1, InputImfR1, InputImfC1, omp > &A, + alp::Matrix< InputType2, InputStructure2, + Density::Dense, InputView2, InputImfR2, InputImfC2, omp > &B, + const Operator &oper, + const Monoid &monoid, + const MulMonoid &mulMonoid, + const std::enable_if_t< !alp::is_object< OutputType >::value && + !alp::is_object< InputType1 >::value && ! + alp::is_object< InputType2 >::value && + alp::is_operator< Operator >::value && + alp::is_monoid< Monoid >::value, + void > * const = NULL + ) { + + static_assert( + !( + std::is_same< InputType1, void >::value || + std::is_same< InputType2, void >::value + ), + "alp::internal::mxm_generic: the operator-monoid version of mxm cannot be " + "used if either of the input matrices is a pattern matrix (of type " + "void)" + ); + +#ifndef NDEBUG + std::cout << "In alp::internal::mxm_generic (omp)\n"; +#endif + + // Early exit checks + if( ! internal::getInitialized( A ) || ! internal::getInitialized( B ) || + ! internal::getInitialized( C ) + ) { + internal::setInitialized( C, false ); + return SUCCESS; + } + + const std::ptrdiff_t m { static_cast< std::ptrdiff_t >( nrows( C ) ) }; + const std::ptrdiff_t n { static_cast< std::ptrdiff_t >( ncols( C ) ) }; + const std::ptrdiff_t m_a { static_cast< std::ptrdiff_t >( nrows( A ) ) }; + const std::ptrdiff_t k { static_cast< std::ptrdiff_t >( ncols( A ) ) }; + const std::ptrdiff_t k_b { static_cast< std::ptrdiff_t >( nrows( B ) ) }; + const std::ptrdiff_t n_b { static_cast< std::ptrdiff_t >( ncols( B ) ) }; + + if( m != m_a || k != k_b || n != n_b ) { + return MISMATCH; + } + + const Distribution_2_5D &da = internal::getAmf( A ).getDistribution(); + const Distribution_2_5D &db = internal::getAmf( B ).getDistribution(); + const Distribution_2_5D &dc = internal::getAmf( C ).getDistribution(); + + const auto tg_a = da.getThreadGridDims(); + const auto tg_b = db.getThreadGridDims(); + const auto tg_c = dc.getThreadGridDims(); + + // 2.5D factor ranges within 3D limits + if( tg_c.rt != tg_a.rt || tg_a.rt != tg_b.rt || ( tg_a.tc % tg_a.rt ) != 0 ) { + return MISMATCH; + } + + if( tg_c.tr != tg_a.tr || tg_c.tc != tg_b.tc || tg_a.tc != tg_b.tr ) { + return MISMATCH; + } + + using th_coord_t = typename Distribution_2_5D::ThreadCoords; + + RC rc = SUCCESS; + +#ifndef NDEBUG + #pragma omp critical + std::cerr << "In alp::internal::mxm_generic (omp)\n" + "\tBeginning parallel region" << std::endl; +#endif + + #pragma omp parallel + { + const size_t thread = config::OMP::current_thread_ID(); + + const th_coord_t th_ijk_a = da.getThreadCoords( thread ); + const th_coord_t th_ijk_b = db.getThreadCoords( thread ); + const th_coord_t th_ijk_c = dc.getThreadCoords( thread ); + + RC local_rc = SUCCESS; + + // Broadcast A and B to all c-dimensional layers + if( local_rc == SUCCESS && da.isActiveThread( th_ijk_a ) && th_ijk_a.rt > 0 ) { + +#ifndef NDEBUG + #pragma omp critical + std::cerr << "Thread " << thread << " in alp::internal::mxm_generic (omp)\n" + "\tCopying A." << std::endl; +#endif + + const auto set_block_grid_dims_a = da.getLocalBlockGridDims( th_ijk_a ); + + th_coord_t th_ij0_a( th_ijk_a.tr, th_ijk_a.tc, 0 ); + + for( size_t br = 0; br < set_block_grid_dims_a.first; ++br ) { + for( size_t bc = 0; bc < set_block_grid_dims_a.second; ++bc ) { + + auto refAij0 = get_view( A, th_ij0_a, br, bc ); + auto refAijk = get_view( A, th_ijk_a, br, bc ); + + local_rc = local_rc ? local_rc : set( refAijk, refAij0 ); + + } + } + + } // End Broadcast of A + + if( local_rc == SUCCESS && db.isActiveThread( th_ijk_b ) && th_ijk_b.rt > 0 ) { + +#ifndef NDEBUG + #pragma omp critical + std::cerr << "Thread " << thread << " in alp::internal::mxm_generic (omp)\n" + "\tCopying B." << std::endl; +#endif + + const auto set_block_grid_dims_b = db.getLocalBlockGridDims( th_ijk_b ); + + th_coord_t th_ij0_b( th_ijk_b.tr, th_ijk_b.tc, 0 ); + + for( size_t br = 0; br < set_block_grid_dims_b.first; ++br ) { + for( size_t bc = 0; bc < set_block_grid_dims_b.second; ++bc ) { + + auto refBij0 = get_view( B, th_ij0_b, br, bc ); + auto refBijk = get_view( B, th_ijk_b, br, bc ); + + local_rc = local_rc ? local_rc : set( refBijk, refBij0 ); + } + } + } // End Broadcast of B + + if( local_rc == SUCCESS && dc.isActiveThread( th_ijk_c ) && th_ijk_c.rt > 0 ) { + +#ifndef NDEBUG + #pragma omp critical + std::cerr << "Thread " << thread << " in alp::internal::mxm_generic (omp)\n" + "\tZeroing C." << std::endl; +#endif + + const auto block_grid_dims_c = dc.getLocalBlockGridDims( th_ijk_c ); + + alp::Scalar< + OutputType, alp::structures::General, config::default_sequential_backend + > zero( monoid.template getIdentity< OutputType >() ); + + for( size_t br = 0; br < block_grid_dims_c.first; ++br ) { + for( size_t bc = 0; bc < block_grid_dims_c.second; ++bc ) { + auto refCijk = get_view( C, th_ijk_c, br, bc ); + local_rc = local_rc ? local_rc : set( refCijk, zero ); + } + } + } // End Zero-ing of C + + // Different values for rc could converge here (eg, MISMATCH, FAILED). + if( local_rc != SUCCESS ) { +#ifndef NDEBUG + #pragma omp critical + std::cerr << "Thread " << thread << " in alp::internal::mxm_generic (omp)\n" + "\tIssues replicating input matrices." << std::endl; +#endif + rc = local_rc; + } + + // End Broadcast of A and B and zero-ing of C + #pragma omp barrier + +#ifndef NDEBUG + #pragma omp critical + std::cerr << "Thread " << thread << " in alp::internal::mxm_generic (omp)\n" + "\tPassing barrier" << std::endl; +#endif + + if( rc == SUCCESS && dc.isActiveThread( th_ijk_c ) ) { + + const auto block_grid_dims_c = dc.getLocalBlockGridDims( th_ijk_c ); + + // Initialize circular shifts at stride of Rt + size_t c_a = utils::modulus( th_ijk_a.tc + th_ijk_a.tr + th_ijk_a.rt * tg_a.tc / tg_a.rt, tg_a.tc ); + size_t r_b = utils::modulus( th_ijk_b.tr + th_ijk_b.tc + th_ijk_b.rt * tg_b.tr / tg_b.rt, tg_b.tr ); + + // Per-c-dimensional-layer partial computation + for( size_t r = 0; r < tg_a.tc / tg_a.rt; ++r ) { + + const th_coord_t th_isk_a( th_ijk_a.tr, c_a, th_ijk_a.rt ); + const th_coord_t th_sjk_b( r_b, th_ijk_b.tc, th_ijk_b.rt ); + +#ifndef NDEBUG + if( thread == 1 ) { + #pragma omp critical + { + std::cerr << "Thread " << thread << " in alp::internal::mxm_generic (omp)\n" + "\tCompute iteration r=" << r << "\n" + "\tStarting from A( " << th_ijk_a.tr << ", " << c_a << ", " << th_ijk_a.rt << " )\n" + "\tStarting from B( " << r_b << ", " << th_ijk_b.tc << ", " << th_ijk_b.rt << " )" <( refC_ijk, refA_loc, refB_loc, oper, monoid, mulMonoid ); + +#ifndef NDEBUG + if( thread == 1 ) { + #pragma omp critical + { + print_matrix("Cref post", refC_ijk ); + } + } +#endif + + } + } + } + + // Circular shift rightwards for A + c_a = utils::modulus( c_a + 1, tg_a.tc ); + // Circular shift downwards for B + r_b = utils::modulus( r_b + 1, tg_b.tr ); + + } + + } + + } // End computation per layer of c-dimension + + // Different values for rc could converge here (eg, MISMATCH, FAILED). + if( local_rc != SUCCESS ) { +#ifndef NDEBUG + #pragma omp critical + std::cerr << "Thread " << thread << " in alp::internal::mxm_generic (omp)\n" + "\tIssues with local mxm computations." << std::endl; +#endif + rc = local_rc; + } + + // End layer-by-layer partial computation + #pragma omp barrier + + // Final c-dimension reduction + if( rc == SUCCESS && dc.isActiveThread( th_ijk_c ) && th_ijk_c.rt == 0 ) { + +#ifndef NDEBUG + #pragma omp critical + std::cerr << "Thread " << thread << " in alp::internal::mxm_generic (omp)\n" + "\tEntering reduction." << std::endl; +#endif + + const auto block_grid_dims_c = dc.getLocalBlockGridDims( th_ijk_c ); + + for( size_t r = 1; r < tg_c.rt; ++r ) { + +#ifndef NDEBUG + #pragma omp critical + std::cerr << "Thread " << thread << " in alp::internal::mxm_generic (omp)\n" + "\tReduction iteration r=" << r << std::endl; +#endif + + const th_coord_t th_ijr_c( th_ijk_c.tr, th_ijk_c.tc, r ); + + for( size_t br = 0; br < block_grid_dims_c.first; ++br ) { + for( size_t bc = 0; bc < block_grid_dims_c.second; ++bc ) { + + auto refCij0 = internal::get_view( C, th_ijk_c, br, bc ); // k == 0 + auto refCijr = internal::get_view( C, th_ijr_c, br, bc ); + + // Final result in C at layer 0 + local_rc = local_rc ? local_rc : foldl( refCij0, refCijr, monoid ); + } + } + + } + + } // End of final reduction along c-dimension + + // Different values for rc could converge here (eg, MISMATCH, FAILED). + if( local_rc != SUCCESS ) { +#ifndef NDEBUG + #pragma omp critical + std::cerr << "Thread " << thread << " in alp::internal::mxm_generic (omp)\n" + "\tIssues with final reduction." << std::endl; +#endif + rc = local_rc; + } + + } + + return rc; + + } + + } // namespace internal + + /** + * Dense Matrix-Matrix multiply between structured matrices. + * Version with semiring parameter. + * + * @tparam descr The descriptors under which to perform the computation. + * @tparam OutputStructMatT The structured matrix type of the output matrix. + * @tparam InputStructMatT1 The structured matrix type of the the left-hand side input + * matrix. + * @tparam InputStructMatT2 The structured matrix type of the right-hand side input + * matrix. + * @tparam Semiring The semiring under which to perform the + * multiplication. + * + * @returns SUCCESS If the computation completed as intended. + * @returns MISMATCH Whenever the structures or dimensions of \a A, \a B, and \a C do + * not match. All input data containers are left + * untouched if this exit code is returned; it will be + * as though this call was never made. + * + * @param[out] C The output matrix \f$ C = AB \f$ when the function returns + * #SUCCESS. + * @param[in] A The left-hand side input matrix \f$ A \f$. + * @param[in] B The left-hand side input matrix \f$ B \f$. + * @param[in] ring (Optional.) The semiring under which the computation should + * proceed. + * @param phase The execution phase. + */ + template< + typename OutputType, typename InputType1, typename InputType2, + typename OutputStructure, typename OutputView, + typename OutputImfR, typename OutputImfC, + typename InputStructure1, typename InputView1, + typename InputImfR1, typename InputImfC1, + typename InputStructure2, typename InputView2, + typename InputImfR2, typename InputImfC2, + class Semiring + > + RC mxm( + alp::Matrix< OutputType, OutputStructure, + Density::Dense, OutputView, OutputImfR, OutputImfC, omp > &C, + alp::Matrix< InputType1, InputStructure1, + Density::Dense, InputView1, InputImfR1, InputImfC1, omp > &A, + alp::Matrix< InputType2, InputStructure2, + Density::Dense, InputView2, InputImfR2, InputImfC2, omp > &B, + const Semiring & ring = Semiring(), + const PHASE &phase = NUMERICAL, + const std::enable_if_t< + !alp::is_object< OutputType >::value && + !alp::is_object< InputType1 >::value && + !alp::is_object< InputType2 >::value && + alp::is_semiring< Semiring >::value, + void + > * const = NULL + ) { + (void)phase; + + return internal::mxm_generic< false >( + C, A, B, + ring.getMultiplicativeOperator(), ring.getAdditiveMonoid(), ring.getMultiplicativeMonoid() + ); + + } + + /** + * Dense Matrix-Matrix multiply between structured matrices. + * Version with additive monoid and multiplicative operator + */ + template< + typename OutputType, typename InputType1, typename InputType2, + typename OutputStructure, typename OutputView, + typename OutputImfR, typename OutputImfC, + typename InputStructure1, typename InputView1, + typename InputImfR1, typename InputImfC1, + typename InputStructure2, typename InputView2, + typename InputImfR2, typename InputImfC2, + class Operator, class Monoid + > + RC mxm( + alp::Matrix< OutputType, OutputStructure, + Density::Dense, OutputView, OutputImfR, OutputImfC, omp > &C, + alp::Matrix< InputType1, InputStructure1, + Density::Dense, InputView1, InputImfR1, InputImfC1, omp > &A, + alp::Matrix< InputType2, InputStructure2, + Density::Dense, InputView2, InputImfR2, InputImfC2, omp > &B, + const Operator & mulOp, + const Monoid & addM, + const PHASE &phase = NUMERICAL, + const std::enable_if_t< + !alp::is_object< OutputType >::value && + !alp::is_object< InputType1 >::value && + !alp::is_object< InputType2 >::value && + alp::is_operator< Operator >::value && alp::is_monoid< Monoid >::value, + void + > * const = NULL + ) { + (void)phase; + + return internal::mxm_generic< false >( C, A, B, mulOp, addM, Monoid() ); + + } + +} // end namespace ``alp'' + +#endif // end ``_H_ALP_OMP_BLAS3'' + diff --git a/include/alp/omp/config.hpp b/include/alp/omp/config.hpp new file mode 100644 index 000000000..79200558f --- /dev/null +++ b/include/alp/omp/config.hpp @@ -0,0 +1,52 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#ifndef _H_ALP_OMP_CONFIG +#define _H_ALP_OMP_CONFIG + +#include + +#include + + +namespace alp { + + namespace config { + + /** The default sequential backend to be selected for this parallel backend. */ + static constexpr alp::Backend default_sequential_backend = _ALP_SECONDARY_BACKEND; + + class OMP : public grb::config::OMP {}; + + // Dimensions of blocks counted in number of elements per dimension + constexpr size_t BLOCK_ROW_DIM = 2; + constexpr size_t BLOCK_COL_DIM = 2; + + // Temporary squared solution to accomodate for 2.5D mxm + constexpr size_t THREAD_ROW_DIM = 4; + constexpr size_t THREAD_COL_DIM = 4; + + constexpr size_t REPLICATION_FACTOR_THREADS = 1; + + + } // namespace config + +} // namespace alp + +#endif + diff --git a/include/alp/omp/exec.hpp b/include/alp/omp/exec.hpp new file mode 100644 index 000000000..ff62c14f4 --- /dev/null +++ b/include/alp/omp/exec.hpp @@ -0,0 +1,113 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * @author A. N. Yzelman + * @date 14th of January 2022 + */ + +#ifndef _H_ALP_OMP_EXEC +#define _H_ALP_OMP_EXEC + +#include + +#include + +#ifdef _ALP_OMP_WITH_REFERENCE + #include +#endif +namespace alp { + + /** + * \internal No implementation notes. + */ + template< EXEC_MODE mode > + class Launcher< mode, omp > { + + public: + + /** \internal No implementation notes. */ + Launcher( + const size_t process_id = 0, + const size_t nprocs = 1, + const std::string hostname = "localhost", + const std::string port = "0" + ) { + (void) process_id; + (void) nprocs; + (void) hostname; + (void) port; + } + + /** \internal No implementation notes. */ + template< typename U > + RC exec( + void ( *alp_program )( const void *, const size_t, U & ), + const void *data_in, const size_t in_size, + U &data_out, + const bool broadcast = false + ) const { + (void)broadcast; // value doesn't matter for a single user process + std::cerr << "Entering exec().\n"; + // intialise GraphBLAS + RC ret = init(); + // call graphBLAS algo + if( ret == SUCCESS ) { + ( *alp_program )( data_in, in_size, data_out ); + } + // finalise the GraphBLAS + if( ret == SUCCESS ) { + ret = finalize(); + } + // and done + return ret; + } + + /** \internal No implementation notes. */ + template< typename T, typename U > + RC exec( + void ( *alp_program )( const T &, U & ), + const T &data_in, U &data_out, + const bool broadcast = false + ) { + (void)broadcast; // value doesn't matter for a single user process + std::cerr << "Entering exec().\n"; + // intialise GraphBLAS + RC ret = init(); + // call graphBLAS algo + if( ret == SUCCESS ) { + ( *alp_program )( data_in, data_out ); + } + // finalise the GraphBLAS + if( ret == SUCCESS ) { + ret = finalize(); + } + // and done + return ret; + } + + /** \internal No implementation notes. */ + static inline RC finalize() { + return SUCCESS; + } + + }; + +} // end namespace ``alp'' + +#endif // end ``_H_ALP_OMP_EXEC'' + diff --git a/include/alp/omp/init.hpp b/include/alp/omp/init.hpp new file mode 100644 index 000000000..4d63409f9 --- /dev/null +++ b/include/alp/omp/init.hpp @@ -0,0 +1,41 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * @author A. N. Yzelman + * @date 14th of January 2022 + */ + +#ifndef _H_ALP_OMP_INIT +#define _H_ALP_OMP_INIT + +#include + +namespace alp { + + /** \internal No-op init */ + template<> + RC init< omp >( const size_t, const size_t, void * const ); + + /** \internal No-op init */ + template<> + RC finalize< omp >(); + +} // end namespace ``alp'' + +#endif // end ``_H_ALP_OMP_INIT'' + diff --git a/include/alp/omp/io.hpp b/include/alp/omp/io.hpp new file mode 100644 index 000000000..9fc66e815 --- /dev/null +++ b/include/alp/omp/io.hpp @@ -0,0 +1,189 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * @author A. N. Yzelman + * @date 14th of January 2022 + */ + +#ifndef _H_ALP_OMP_IO +#define _H_ALP_OMP_IO + +#include + +#include + +#include "matrix.hpp" +#include "scalar.hpp" +#include "storage.hpp" + +// Include backend to which sequential work is delegated +#ifdef _ALP_OMP_WITH_REFERENCE + #include +#endif + +#define NO_CAST_ASSERT( x, y, z ) \ + static_assert( x, \ + "\n\n" \ + "********************************************************************" \ + "********************************************************************" \ + "******************************\n" \ + "* ERROR | " y " " z ".\n" \ + "********************************************************************" \ + "********************************************************************" \ + "******************************\n" \ + "* Possible fix 1 | Remove no_casting from the template parameters " \ + "in this call to " y ".\n" \ + "* Possible fix 2 | Provide a value that matches the expected type.\n" \ + "********************************************************************" \ + "********************************************************************" \ + "******************************\n" ); + +namespace alp { + + /** + * Sets all elements of the given matrix to the value of the given scalar. + * C = val + * + * @tparam descr + * @tparam OutputType Data type of the output matrix C + * @tparam OutputStructure Structure of the matrix C + * @tparam OutputView View type applied to the matrix C + * @tparam InputType Data type of the scalar a + * + * @param C Matrix whose values are to be set + * @param val The value to set the elements of the matrix C + * + * @return RC SUCCESS on the successful execution of the set + */ + template< Descriptor descr = descriptors::no_operation, + typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC, + typename InputType, typename InputStructure + > + RC set( + Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, omp > &C, + const Scalar< InputType, InputStructure, omp > &val + ) noexcept { + + static_assert( + !std::is_same< OutputType, void >::value, + "alp::set (set to matrix): cannot have a pattern matrix as output" + ); +#ifdef _DEBUG + std::cout << "Called alp::set (matrix-to-value, omp)" << std::endl; +#endif + // static checks + NO_CAST_ASSERT( + ( !( descr & descriptors::no_casting ) || std::is_same< InputType, OutputType >::value ), + "alp::set", "called with non-matching value types" + ); + + static_assert( + !internal::is_functor_based< + Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, omp > + >::value, + "alp::set cannot be called with a functor-based matrix as a destination." + ); + + if( !internal::getInitialized( val ) ) { + internal::setInitialized( C, false ); + return SUCCESS; + } + + const Distribution_2_5D &d = internal::getAmf( C ).getDistribution(); + + RC rc = SUCCESS; + + #pragma omp parallel for + for( size_t thread = 0; thread < config::OMP::current_threads(); ++thread ) { + const auto t_coords = d.getThreadCoords( thread ); + const auto block_grid_dims = d.getLocalBlockGridDims( t_coords ); + + RC local_rc = SUCCESS; + + for( size_t br = 0; br < block_grid_dims.first; ++br ) { + for( size_t bc = 0; bc < block_grid_dims.second; ++bc ) { + + // Get a sequential matrix view over the block + auto refC = internal::get_view( C, t_coords, br, bc ); + + // Construct a sequential Scalar container from the input Scalar + Scalar< InputType, InputStructure, config::default_sequential_backend > ref_val( *val ); + + // Delegate the call to the sequential set implementation + local_rc = local_rc ? local_rc : set( refC, ref_val ); + + if( local_rc != SUCCESS ) { + rc = local_rc; + } + } + } + } + + internal::setInitialized( C, true ); + return rc; + } + + /** + * Temporarily assuming 1-1, row-major mapping with user container. + */ + template< + typename InputType, typename Structure, typename View, + typename ImfR, typename ImfC, typename fwd_iterator + > + RC buildMatrix( + Matrix< InputType, Structure, Density::Dense, View, ImfR, ImfC, omp > &A, + const fwd_iterator &start, + const fwd_iterator &end + ) noexcept { + + static_assert( + std::is_same< InputType, typename fwd_iterator::value_type >::value, + "alp::buildMatrix (omp): Mismatching type between user-provided input " + "container and output ALP container." + ); + + const size_t m = nrows( A ); + const size_t n = ncols( A ); + + if( ( end - start ) != static_cast< std::ptrdiff_t >( m * n ) ) { + + std::cerr << "alp::buildMatrix (omp): Mismatching sizes between " + "user-provided input container and output ALP container." << std::endl; + + return MISMATCH; + } + + internal::setInitialized(A, true); + + fwd_iterator it = start; + + for( size_t i = 0; i < m; ++i ) { + for( size_t j = 0; j < n; ++j ) { + internal::access( A, internal::getStorageIndex( A, i, j ) ) = *( it++ ); + } + } + + return SUCCESS; + } + +} // end namespace ``alp'' + +#undef NO_CAST_ASSERT + +#endif // end ``_H_ALP_OMP_IO'' + diff --git a/include/alp/omp/matrix.hpp b/include/alp/omp/matrix.hpp new file mode 100644 index 000000000..94e600979 --- /dev/null +++ b/include/alp/omp/matrix.hpp @@ -0,0 +1,111 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * @author A. N. Yzelman + * @date 14th of January 2022 + */ + +#ifndef _H_ALP_OMP_MATRIX +#define _H_ALP_OMP_MATRIX + +#include + +#include +#include + +#include +#include + +#include "storage.hpp" +#include "storagebasedmatrix.hpp" +#include "vector.hpp" + +#ifdef _ALP_OMP_WITH_REFERENCE + #include // For AMFFactory +#endif + + +namespace alp { + + // Currently no backend specific implementation + + namespace internal { + + /** + * Exposes a block of the parallel matrix as a sequential ALP matrix. + * + * The underlying container (buffer/block) is obtained from the parallel + * container, while the AMF is constructed based on the properties of the + * block and the applied gather view (i.e., the IMFs associated to it). + * + */ + template< + enum view::Views target_view = view::original, + typename SourceMatrix, + typename ThreadCoords, + std::enable_if_t< + alp::is_matrix< SourceMatrix >::value + > * = nullptr + > + typename new_container_type_from< + typename SourceMatrix::template view_type< view::gather >::type + >::template change_backend< config::default_sequential_backend >::type + get_view( SourceMatrix &source, const ThreadCoords t, const size_t br, const size_t bc ) { + + // get the container + const auto &distribution = getAmf( source ).getDistribution(); + const size_t thread_id = distribution.getThreadId( t ); + const size_t block_id = distribution.getLocalBlockId( t, br, bc ); + auto &container = getLocalContainer( getContainer( source ), thread_id, block_id ); + + // make an AMF + // note: When making a view over a vector, the second imf must be imf::Zero + + // Type of AMF factory corresponding to the full local block's AMF + using original_amf_factory = alp::storage::AMFFactory< config::default_sequential_backend >::FromPolynomial< + structures::General, imf::Id, imf::Id + >; + + // AMF factory after applying the global view + using amf_factory = alp::storage::AMFFactory< config::default_sequential_backend >::Compose< + imf::Strided, imf::Strided, typename original_amf_factory::amf_type + >; + + const auto block_dims = distribution.getBlockDimensions(); + + typename amf_factory::amf_type amf = amf_factory::Create( + imf::Id( block_dims.first ), + imf::Id( block_dims.second ), + original_amf_factory::Create( imf::Id( block_dims.first ), imf::Id( block_dims.second ) ) + ); + + // create a sequential container with the container and AMF above + using target_t = typename new_container_type_from< + typename SourceMatrix::template view_type< view::gather >::type + >::template change_backend< config::default_sequential_backend >::type; + + target_t blk_matrix( container, amf ); + internal::setInitialized( blk_matrix, internal::getInitialized( source ) ); + return blk_matrix; + } + + } // namespace internal + +} // namespace alp + +#endif // end ``_H_ALP_OMP_MATRIX'' diff --git a/include/alp/omp/scalar.hpp b/include/alp/omp/scalar.hpp new file mode 100644 index 000000000..881e43139 --- /dev/null +++ b/include/alp/omp/scalar.hpp @@ -0,0 +1,204 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _H_ALP_OMP_SCALAR +#define _H_ALP_OMP_SCALAR + + +#include +#include + +#include + +#include +#include +#include +#include +#include + +#include + + +namespace alp { + + namespace internal { + template< typename T, typename Structure > + bool getInitialized( const Scalar< T, Structure, omp > & ) noexcept; + + template< typename T, typename Structure > + void setInitialized( Scalar< T, Structure, omp > &, bool ) noexcept; + } // end namespace ``alp::internal'' + + /** + * An ALP scalar. + * + * This is an opaque data type for scalars. + * + * @tparam T The type of the vector elements. \a T shall not + * be a ALP type. + * @tparam Structure One of the structures. + * + * \warning Creating a alp::Scalar of other ALP types is + * not allowed. + * Passing a ALP type as template parameter will lead to + * undefined behaviour. + * + */ + template< typename T, typename Structure > + class Scalar< T, Structure, omp > { + + private: + + typedef Scalar< T, Structure, omp > self_type; + + friend bool internal::getInitialized<>( const self_type & ) noexcept; + + friend void internal::setInitialized<>( self_type &, bool ) noexcept; + + // Scalar value + T value; + + /** Whether the scalar value is currently initialized */ + bool initialized; + + public: + /** @see Vector::value_type. */ + typedef T value_type; + + /** @see Vector::lambda_reference */ + typedef T& lambda_reference; + typedef const T& const_lambda_reference; + + /** + * The main ALP scalar constructor. + * + * The constructed object will be uninitalised after successful construction. + * + * + * @return SUCCESS This function never fails. + * + * \parblock + * \par Performance semantics. + * -# This constructor entails \f$ \Theta(1) \f$ amount of work. + * -# This constructor may allocate \f$ \Theta(1) \f$ bytes + * of dynamic memory. + * -# This constructor will use \f$ \Theta(1) \f$ extra bytes of + * memory beyond that at constructor entry. + * -# This constructor incurs \f$ \Theta(1) \f$ data movement. + * -# This constructor \em may make system calls. + * \endparblock + * + */ + Scalar() : initialized( false ) {} + + /** + * The ALP scalar constructor for converting C/C++ scalar to ALP scalar. + * + * The constructed object will be initialized after successful construction. + * + * + * \parblock + * \par Performance semantics. + * -# This constructor entails \f$ \Theta(1) \f$ amount of work. + * -# This constructor may allocate \f$ \Theta(1) \f$ bytes + * of dynamic memory. + * -# This constructor will use \f$ \Theta(1) \f$ extra bytes of + * memory beyond that at constructor entry. + * -# This constructor incurs \f$ \Theta(1) \f$ data movement. + * -# This constructor \em may make system calls. + * \endparblock + * + */ + explicit Scalar( const T &value ) : value( value ), initialized( true ) {} + + /** + * Copy constructor. + * + * @param other The scalar to copy. The initialization state of the copy + * reflects the state of \a other. + * + * \parblock + * \par Performance semantics. + * -# This constructor entails \f$ \Theta(1) \f$ amount of work. + * -# This constructor allocates \f$ \Theta(1) \f$ bytes + * of dynamic memory. + * -# This constructor incurs \f$ \Theta(1) \f$ of data + * movement. + * -# This constructor \em may make system calls. + * \endparblock + * + */ + Scalar( const Scalar &other ) : value( other.value ), initialized( other.initialized ) { + // const RC rc = set( *this, other ); // note: initialized will be set as part of this call + // if( rc != SUCCESS ) { + // throw std::runtime_error( "alp::Scalar< T, Structure, Density::Dense, View::Original< void >, omp > (copy constructor): error during call to alp::set (" + toString( rc ) + ")" ); + // } + } + + /** + * Move constructor. The new scalar equals the given + * scalar. Invalidates the use of the input scalar. + * + * @param[in] other The ALP scalar to move to this new instance. + * + * \parblock + * \par Performance semantics. + * -# This constructor entails \f$ \Theta(1) \f$ amount of work. + * -# This constructor will not allocate any new dynamic memory. + * -# This constructor will use \f$ \Theta(1) \f$ extra bytes of + * memory beyond that at constructor entry. + * -# This constructor will move \f$ \Theta(1) \f$ bytes of data. + * \endparblock + */ + Scalar( Scalar &&other ) : value( other.value ), initialized( other.initialized ) { + other.initialized = false; + } + + /** \internal No implementation notes. */ + lambda_reference operator*() noexcept { + assert( internal::getInitialized( *this ) ); + return value; + } + + /** \internal No implementation notes. */ + const_lambda_reference operator*() const noexcept { + assert( internal::getInitialized( *this ) ); + return value; + } + + }; // class Scalar with physical container + + /** Identifies any omp scalar as an ALP scalar. */ + template< typename T, typename Structure > + struct is_scalar< Scalar< T, Structure, omp > > : std::true_type {}; + + namespace internal { + template< typename T, typename Structure > + bool getInitialized( const Scalar< T, Structure, omp > &s ) noexcept { + return s.initialized; + } + + template< typename T, typename Structure > + void setInitialized( Scalar< T, Structure, omp > &s, bool initialized ) noexcept { + s.initialized = initialized; + } + } // end namespace ``alp::internal'' + +} // end namespace ``alp'' + +#endif // end ``_H_ALP_OMP_SCALAR'' + diff --git a/include/alp/omp/storage.hpp b/include/alp/omp/storage.hpp new file mode 100644 index 000000000..6e3e23cf6 --- /dev/null +++ b/include/alp/omp/storage.hpp @@ -0,0 +1,772 @@ +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * + * @file + * + * This file registers mechanisms for coordinate mapping between + * logical and physical iteration spaces. + * + */ + +#ifndef _H_ALP_OMP_STORAGE +#define _H_ALP_OMP_STORAGE + +#include + +#include + +namespace alp { + + namespace internal { + + /** Specialization for matrices */ + template< typename Structure > + struct determine_poly_factory< Structure, imf::Id, imf::Id, omp > { + + typedef storage::polynomials::FullFactory<> factory_type; + }; + + /** Specialization for vectors */ + template< typename Structure > + struct determine_poly_factory< Structure, imf::Id, imf::Zero, omp > { + + typedef storage::polynomials::ArrayFactory factory_type; + }; + + } // namespace internal + + + /** + * Implements mapping between global and local iteration spaces + * for shared-memory parallel backend. + * The logical coordinates are represented as pair (i, j) of row and + * column positions within the matrix. + * The local coordinates are represented as (tr, tc, rt, br, bc, il, jl), + * where: + * - tr is thread row-coordinate + * - tc is thread column-coordinate + * - rt is replication factor for thread coordinates + * - br is block row-coordinate + * - bc is block column-coordinate + * - i is element's row-coordinate within its block + * - j is element's column-coordinate within its block + * + * This implementation assumes block-cyclic distribution of blocks + * among threads. + * + */ + class Distribution_2_5D { + + public: + + /** Type encapsulating thread coordinates within the thread grid. */ + struct ThreadCoords { + const size_t tr; + const size_t tc; + const size_t rt; + + ThreadCoords( const size_t tr, const size_t tc, const size_t rt ) : tr( tr ), tc( tc ), rt( rt ) {} + }; + + /** Type encapsulating the global element coordinate. */ + struct GlobalCoord { + + const size_t i; + const size_t j; + + GlobalCoord( const size_t i, const size_t j ) : i( i ), j( j ) {} + + }; + + /** Type encapsulating the local element coordinate. */ + struct LocalCoord { + + const ThreadCoords t; + const size_t br; + const size_t bc; + const size_t i; + const size_t j; + + LocalCoord( + const size_t tr, const size_t tc, + const size_t rt, + const size_t br, const size_t bc, + const size_t i, const size_t j + ) : + t( tr, tc, rt ), + br( br ), bc( bc ), + i( i ), j( j ) {} + + const ThreadCoords &getThreadCoords() const { + return t; + } + + }; + + /** Type encapsulating the global block coordinate. */ + struct GlobalBlockCoord { + + const size_t br; + const size_t bc; + + GlobalBlockCoord( + const size_t br, const size_t bc + ) : + br( br ), bc( bc ) {} + + }; + + // /** Type encapsulating the local block coordinate. */ + // struct LocalBlockCoord { + + // const size_t tr; + // const size_t tc; + // const size_t rt; + // const size_t br; + // const size_t bc; + + // LocalBlockCoord( + // const size_t tr, const size_t tc, + // const size_t rt, + // const size_t br, const size_t bc + // ) : + // tr( tr ), tc( tc ), + // rt( rt ), + // br( br ), bc( bc ) {} + + // }; + + private: + + /** Row and column dimensions of the associated container */ + const size_t m; + const size_t n; + /** Replication factor in thread-coordinate space */ + static constexpr size_t Rt = config::REPLICATION_FACTOR_THREADS; + /** The row and column dimensions of the global block grid */ + const size_t Br; + const size_t Bc; + /** The row and column dimensions of the thread grid */ + const size_t Tr; + const size_t Tc; + + public: + + Distribution_2_5D( + const size_t m, const size_t n, + const size_t num_threads + ) : + m( m ), n( n ), + Br( static_cast< size_t >( std::ceil( static_cast< double >( m ) / config::BLOCK_ROW_DIM ) ) ), + Bc( static_cast< size_t >( std::ceil( static_cast< double >( n ) / config::BLOCK_COL_DIM ) ) ), + Tr( ( Br > config::THREAD_ROW_DIM ) ? config::THREAD_ROW_DIM : Br ), + Tc( ( Bc > config::THREAD_COL_DIM ) ? config::THREAD_COL_DIM : Bc ) + { + if( num_threads != config::THREAD_ROW_DIM * config::THREAD_COL_DIM * config::REPLICATION_FACTOR_THREADS ) { + std::cerr << "Warning: Provided number of threads cannot be factorized in a 2.5D grid:\n" + "\t" << num_threads << " != " << config::THREAD_ROW_DIM << " x " << config::THREAD_COL_DIM << " x " << config::REPLICATION_FACTOR_THREADS << std::endl; + } + } + + // LocalBlockCoord mapBlockGlobalToLocal( const GlobalBlockCoord &g ) const { + // (void) g; + // return LocalBlockCoord( 0, 0, 0, 0, 0 ); + // } + + // GlobalBlockCoord mapBlockLocalToGlobal( const LocalBlockCoord &l ) const { + // const size_t block_id_r = l.br * Tr + l.tr; + // const size_t block_id_c = l.bc * Tc + l.tc; + // return GlobalBlockCoord( block_id_r, block_id_c );// Temporary + // } + + LocalCoord mapGlobalToLocal( const GlobalCoord &g ) const { + const size_t global_br = g.i / config::BLOCK_ROW_DIM; + const size_t local_br = global_br / Tr; + const size_t tr = global_br % Tr; + const size_t local_i = g.i % config::BLOCK_ROW_DIM; + + const size_t global_bc = g.j / config::BLOCK_COL_DIM; + const size_t local_bc = global_bc / Tc; + const size_t tc = global_bc % Tc; + const size_t local_j = g.j % config::BLOCK_COL_DIM; + + return LocalCoord( + tr, tc, + 0, // Rt always maps to the front layer + local_br, local_bc, + local_i, local_j + ); + } + + // /** + // * Maps coordinates from local to global space. + // * + // * \todo Add implementation + // */ + // GlobalCoord mapLocalToGlobal( const LocalCoord &l ) const { + // (void) l; + // return GlobalCoord( 0, 0 ); + // } + + /** + * Returns the thread ID corresponding to the given thread coordinates. + * The fixed thread grid enables to map left-over threads. + */ + size_t getThreadId( const ThreadCoords t ) const { + return t.rt * config::THREAD_ROW_DIM * config::THREAD_COL_DIM + t.tr * config::THREAD_COL_DIM + t.tc; + } + + size_t getNumberOfThreads() const { + return config::THREAD_ROW_DIM * config::THREAD_COL_DIM * config::REPLICATION_FACTOR_THREADS; + } + + /** Returns the thread grid size */ + ThreadCoords getThreadGridDims() const { + return { Tr, Tc, Rt }; + } + + /** Returns the total global amount of blocks */ + std::pair< size_t, size_t > getGlobalBlockGridDims() const { + return { Br, Bc }; + } + + /** Returns the dimensions of the block grid associated to the given thread */ + std::pair< size_t, size_t > getLocalBlockGridDims( const ThreadCoords t ) const { + // The RHS of the + operand covers the case + // when the last block of threads is not full + const size_t blocks_r = Br / Tr + ( t.tr < Br % Tr ? 1 : 0 ); + const size_t blocks_c = Bc / Tc + ( t.tc < Bc % Tc ? 1 : 0 ); + return { blocks_r, blocks_c }; + } + + // /** Returns the global block coordinates based on the thread and local block coordinates */ + // std::pair< size_t, size_t > getGlobalBlockCoords( const size_t tr, const size_t tc, const size_t br, const size_t bc ) const { + // const size_t global_br = br * Tr + tr % Tr; + // const size_t global_bc = bc * Tc + tc % Tc; + // return { global_br, global_bc }; + // } + + // size_t getGlobalBlockId( const size_t tr, const size_t tc, const size_t br, const size_t bc ) const { + // const auto global_coords = getGlobalBlockCoords( tr, tc, br, bc ); + // return global_coords.first * Bc + global_coords.second; + // } + + size_t getLocalBlockId( const LocalCoord &local ) const { + return local.br * getLocalBlockGridDims( local.getThreadCoords() ).second + local.bc; + } + + size_t getLocalBlockId( const ThreadCoords &t, const size_t br, const size_t bc ) const { + return br * getLocalBlockGridDims( t ).second + bc; + } + + /** + * Returns the dimensions of the block given by the block id + */ + //std::pair< size_t, size_t > getBlockDimensions( const size_t tr, const size_t tc, const size_t br, const size_t bc ) const { + // const auto global_block_coords = getGlobalBlockCoords( tr, tc, br, bc ); + // const size_t block_height = ( global_block_coords.first < Br - 1 ) ? + // ( config::BLOCK_ROW_DIM ) : + // ( m - config::BLOCK_ROW_DIM * ( Br - 1 ) ); + // const size_t block_width = ( global_block_coords.second < Bc - 1 ) ? + // ( config::BLOCK_COL_DIM ) : + // ( n - config::BLOCK_COL_DIM * ( Bc - 1 ) ); + // return { block_height, block_width }; + //} + + /** Returns the dimensions of the block given by the block id */ + constexpr std::pair< size_t, size_t > getBlockDimensions() const { + return { config::BLOCK_ROW_DIM, config::BLOCK_COL_DIM }; + } + + /** Returns the size (in number of elements) of the block defined by the thread and local block coordinates. */ + size_t getBlockSize() const { + const auto dims = getBlockDimensions(); + return dims.first * dims.second; + } + + /** For a given block, returns its offset from the beginning of the buffer in which it is stored */ + size_t getBlocksOffset( const ThreadCoords &t, const size_t br, const size_t bc ) const { + // The offset is calculated as the sum of sizes of all previous blocks + const size_t block_coord_1D = br * getLocalBlockGridDims( t ).second + bc; + return block_coord_1D * getBlockSize(); + } + + ThreadCoords getThreadCoords( const size_t thread_id ) const { + const size_t rt = thread_id / ( config::THREAD_ROW_DIM * config::THREAD_COL_DIM ); + const size_t tr = ( thread_id % ( config::THREAD_ROW_DIM * config::THREAD_COL_DIM ) ) / config::THREAD_COL_DIM; + const size_t tc = ( thread_id % ( config::THREAD_ROW_DIM * config::THREAD_COL_DIM ) ) % config::THREAD_COL_DIM; + return { tr, tc, rt }; + } + + bool isActiveThread(const ThreadCoords &t ) const { + return ( t.tr < Tr ) && ( t.tc < Tc ) && ( t.rt < Rt ); + } + + bool isActiveThread(const size_t thread_id ) const { + const auto th_coords = getThreadCoords( thread_id ); + return isActiveThread( th_coords ); + } + + }; + + namespace storage { + + /** + * AMF for parallel shared memory backend. + * + * This implementation makes the following assumptions: + * - all blocks use the same storage scheme, independent of their non-zero structure + * + * @tparam ImfR Index-Mapping Function associated to row dimension. + * @tparam ImfC Index-Mapping Function associated to column dimension. + * @tparam PolyFactory The type of factory for storage polynomials + * used to construct polynomials for all blocks. + */ + template< typename ImfR, typename ImfC, typename PolyFactory > + class AMF< ImfR, ImfC, PolyFactory, omp > { + + friend class AMFFactory< omp >; + + public: + + /** Expose static properties */ + typedef ImfR imf_r_type; + typedef ImfC imf_c_type; + typedef PolyFactory poly_factory_type; + typedef typename PolyFactory::poly_type mapping_polynomial_type; + + /** Expose types defined within the class */ + typedef struct StorageIndexType { + + size_t buffer_id; + size_t block_id; + size_t offset; + + StorageIndexType( const size_t buffer_id, const size_t block_id, const size_t offset ) : + buffer_id( buffer_id ), block_id( block_id ), offset( offset ) {} + + } storage_index_type; + + private: + + const imf_r_type imf_r; + const imf_c_type imf_c; + + constexpr static bool is_matrix = std::is_same< ImfC, imf::Id >::value; + + /** + * Number of threads used to initialize the associated container. + * This impacts the number of allocated blocks. + */ + const size_t num_threads; + + const Distribution_2_5D distribution; + + AMF( + ImfR imf_r, + ImfC imf_c, + size_t num_threads = config::OMP::threads() + ) : + imf_r( imf_r ), imf_c( imf_c ), + num_threads( num_threads ), + distribution( imf_r.n, imf_c.n, num_threads ) { + std::cout << "Entering AMF normal constructor\n"; + } + + AMF( const AMF & ) = delete; + AMF &operator=( const AMF & ) = delete; + + public: + + AMF( AMF &&amf ) : + imf_r( std::move( amf.imf_r ) ), + imf_c( std::move( amf.imf_c ) ), + num_threads( amf.num_threads ), + distribution( std::move( amf.distribution ) ) { + std::cout << "Entering OMP AMF move constructor\n"; + } + + const Distribution_2_5D &getDistribution() const { + return distribution; + } + + /** + * Returns dimensions of the logical layout of the associated container. + * + * @return A pair of two values, number of rows and columns, respectively. + */ + std::pair< size_t, size_t> getLogicalDimensions() const { + return std::make_pair( imf_r.n, imf_c.n ); + } + + /** + * @brief Returns a storage index based on the coordinates in the + * logical iteration space. + * + * @tparam R ImfR type + * @tparam C ImfC type + * + * @param[in] i row-coordinate + * @param[in] j column-coordinate + * @param[in] s current process ID + * @param[in] P total number of processes + * + * @return storage index corresponding to the provided logical + * coordinates and parameters s and P. + * + * \note It is not necessary to call imf.map() function if the imf + * has the type imf::Id. To implement SFINAE-driven selection + * of the getStorageIndex, dummy parameters R and C are added. + * They are set to the ImfR and ImfC by default and a static + * assert ensures that external caller does not force a call + * to wrong implementation by explicitly specifying values + * for R and/or C. + * + */ + storage_index_type getStorageIndex( const size_t i, const size_t j, const size_t s, const size_t P ) const { + (void) s; + (void) P; + const typename Distribution_2_5D::GlobalCoord global( imf_r.map( i ), imf_c.map( j ) ); + const typename Distribution_2_5D::LocalCoord local = distribution.mapGlobalToLocal( global ); + + const size_t thread = distribution.getThreadId( local.getThreadCoords() ); + const size_t local_block = distribution.getLocalBlockId( local ); + const size_t local_element = local.i * config::BLOCK_ROW_DIM + local.j; + + return storage_index_type( thread, local_block, local_element ); + } + + /** + * Returns coordinates in the logical iteration space based on + * the storage index. + * + * @param[in] storageIndex storage index in the physical + * iteration space + * @param[in] s current process ID + * @param[in] P total number of processes + * + * @return a pair of row- and column-coordinates in the + * logical iteration space. + */ + std::pair< size_t, size_t > getCoords( const size_t storageIndex, const size_t s, const size_t P ) const; + + }; // class AMF + + /** + * Collects AMF factory classes. + */ + template<> + struct AMFFactory< omp > { + + /** + * Specialization of AMFFactory for shared-memory parallel backend. + * + * @tparam ViewImfR The type of IMF applied to the row coordinate. + * @tparam ViewImfC The type of IMF applied to the column coordinate. + * @tparam SourceAMF The type of the target AMF + * + */ + template< typename ViewImfR, typename ViewImfC, typename SourceAMF > + struct Compose { + + private: + + /** Extract target IMF and polynomial types */ + typedef typename SourceAMF::imf_r_type SourceImfR; + typedef typename SourceAMF::imf_c_type SourceImfC; + typedef typename SourceAMF::mapping_polynomial_type SourcePoly; + + /** Compose row and column IMFs */ + typedef typename imf::ComposedFactory< SourceImfR, ViewImfR >::type composed_imf_r_type; + typedef typename imf::ComposedFactory< SourceImfC, ViewImfC >::type composed_imf_c_type; + + /** Fuse composed row IMF into the target polynomial */ + typedef typename polynomials::fuse_on_i< + composed_imf_r_type, + SourcePoly + > fused_row; + + /** Fuse composed column IMF into the intermediate polynomial obtained above */ + typedef typename polynomials::fuse_on_j< + composed_imf_c_type, + typename fused_row::resulting_polynomial_type + > fused_row_col; + + typedef typename fused_row::resulting_imf_type final_imf_r_type; + typedef typename fused_row_col::resulting_imf_type final_imf_c_type; + typedef typename fused_row_col::resulting_polynomial_type final_polynomial_type; + + public: + + typedef AMF< final_imf_r_type, final_imf_c_type, final_polynomial_type, omp > amf_type; + + static amf_type Create( ViewImfR imf_r, ViewImfC imf_c, const AMF< SourceImfR, SourceImfC, SourcePoly, omp > &amf ) { + composed_imf_r_type composed_imf_r = imf::ComposedFactory< SourceImfR, ViewImfR >::create( amf.imf_r, imf_r ); + composed_imf_c_type composed_imf_c = imf::ComposedFactory< SourceImfC, ViewImfC >::create( amf.imf_c, imf_c ); + return amf_type( + fused_row::CreateImf( composed_imf_r ), + fused_row_col::CreateImf( composed_imf_c ), + fused_row_col::CreatePolynomial( + composed_imf_c, + fused_row::CreatePolynomial( composed_imf_r, amf.map_poly ) + ), + amf.storage_dimensions + ); + } + + Compose() = delete; + + }; // class Compose + + /** + * @brief Describes an AMF for a container that requires allocation + * and exposes the AMFs type and a factory method to create it. + * + * A container that requires allocation is accompanied by Id IMFs for + * both row and column dimensions and the provided mapping polynomial. + * + * @tparam PolyType Type of the mapping polynomial. + * + */ + template< typename Structure, typename ImfR, typename ImfC > + struct FromPolynomial { + + // Ensure compatibility of IMF types. + // Original Matrix has imf::Id as both IMFs. + // Original Vector has ImfR = imf::Id and ImfC = imf::Zero. + static_assert( + std::is_same< ImfR, imf::Id >::value && + ( std::is_same< ImfC, imf::Id >::value || std::is_same< ImfC, imf::Zero >::value ), + "AMF factory FromPolynomial can only be used for an original container." + ); + + typedef typename internal::determine_poly_factory< Structure, ImfR, ImfC, omp >::factory_type PolyFactory; + + typedef AMF< imf::Id, imf::Id, PolyFactory, omp > amf_type; + + /** + * Factory method used by 2D containers. + * + * @param[in] imf_r Row IMF + * @param[in] imf_c Column IMF + * @param[in] poly Mapping polynomial + * @param[in] storage_dimensions Size of the allocated storage + * + * @return An AMF object of the type \a amf_type + * + */ + static amf_type Create( imf::Id imf_r, imf::Id imf_c ) { + return amf_type( imf_r, imf_c ); + } + + /** + * Factory method used by 1D containers. + * + * Exploits the fact that fusion of strided IMFs into the polynomial + * always succeeds and results in Id IMFs. As a result, the + * constructed AMF is of the type \a amf_type. + * + * @param[in] imf_r Row IMF + * @param[in] imf_c Column IMF + * @param[in] poly Mapping polynomial + * @param[in] storage_dimensions Size of the allocated storage + * + * @return An AMF object of the type \a amf_type + * + * \note \internal To exploit existing mechanism for IMF fusion + * into the polynomial, this method creates a + * dummy AMF out of two Id IMFs and the provided + * polynomial and composes the provided Strided + * IMFs with the dummy AMF. + */ + static amf_type Create( imf::Id imf_r, imf::Zero imf_c ) { + + /** + * Ensure that the assumptions do not break upon potential + * future changes to AMFFactory::Compose. + */ + static_assert( + std::is_same< + amf_type, + typename Compose< imf::Id, imf::Zero, AMF< imf::Id, imf::Id, typename PolyFactory::poly_type, omp > >::amf_type + >::value, + "The factory method returns the object of different type than declared. This is a bug." + ); + return Compose< imf::Id, imf::Zero, AMF< imf::Id, imf::Id, typename PolyFactory::poly_type, omp > >::Create( + imf_r, imf_c, + FromPolynomial< structures::General, imf::Id, imf::Zero >::Create( imf::Id( imf_r.N ), imf::Id( imf_c.N ) ) + ); + } + + FromPolynomial() = delete; + + }; // class FromPolynomial + + /** + * @brief Transforms the provided AMF by applying the provided View type. + * + * Exposes the type of the resulting AMF and implements a factory + * method that creates objects of such type. + * + * @tparam view The enum value of the desired view type. + * @tparam SourceAMF The type of the target AMF + * + */ + template< enum view::Views view, typename SourceAMF > + struct Reshape { + + typedef SourceAMF amf_type; + + static amf_type Create( const SourceAMF &amf ) { + throw std::invalid_argument( "Not implemented for the provided view type." ); + return amf; + } + + Reshape() = delete; + + }; // class Reshape + + template< typename SourceAMF > + struct Reshape< view::original, SourceAMF > { + + typedef SourceAMF amf_type; + + static amf_type Create( const SourceAMF &amf ) { + return amf_type( amf.imf_r, amf.imf_c, amf.map_poly, amf.storage_dimensions ); + } + + Reshape() = delete; + + }; // class Reshape< original, ... > + + template< typename SourceAMF > + struct Reshape< view::transpose, SourceAMF > { + + typedef AMF< + typename SourceAMF::imf_c_type, + typename SourceAMF::imf_r_type, + typename polynomials::apply_view< + view::transpose, + typename SourceAMF::mapping_polynomial_type + >::type, + omp + > amf_type; + + static amf_type Create( const SourceAMF &amf ) { + typedef typename polynomials::apply_view< view::transpose, typename SourceAMF::mapping_polynomial_type >::type new_mapping_polynomial_type; + return AMF< + typename SourceAMF::imf_c_type, + typename SourceAMF::imf_r_type, + new_mapping_polynomial_type, + omp + >( + amf.imf_c, + amf.imf_r, + new_mapping_polynomial_type( + amf.map_poly.ay2, amf.map_poly.ax2, amf.map_poly.axy, + amf.map_poly.ay, amf.map_poly.ax, + amf.map_poly.a0 + ), + amf.storage_dimensions + ); + } + + Reshape() = delete; + + }; // class Reshape< transpose, ... > + + /** + * Specialization for diagonal views + * + * Diagonal view is implemented by taking a square view over the matrix. + * + * \note \internal Converts a mapping polynomial from a bivariate-quadratic + * to univariate quadratic by summing j-factors into + * corresponding i-factors. + * Implicitely applies a largest possible square view by + * using Strided IMFs. + * + */ + template< typename SourceAMF > + struct Reshape< view::diagonal, SourceAMF > { + + private: + + /** Short name of the original mapping polynomial type */ + typedef typename SourceAMF::mapping_polynomial_type orig_p; + + /** The type of the resulting polynomial */ + typedef polynomials::BivariateQuadratic< + orig_p::Ax2 || orig_p::Ay2 || orig_p::Axy, 0, 0, + orig_p::Ax || orig_p::Ay, 0, + orig_p::A0, orig_p::D + > new_poly_type; + + public: + + typedef AMF< imf::Id, imf::Zero, new_poly_type, omp > amf_type; + + static amf_type Create( const SourceAMF &amf ) { + assert( amf.getLogicalDimensions().first == amf.getLogicalDimensions().second ); + return amf_type( + imf::Id( amf.getLogicalDimensions().first ), + imf::Zero( 1 ), + new_poly_type( + orig_p::Ax2 * amf.map_poly.ax2 + orig_p::Ay2 * amf.map_poly.ay2 + orig_p::Axy * amf.map_poly.axy, 0, 0, + orig_p::Ax * amf.map_poly.ax + orig_p::Ay * amf.map_poly.ay, 0, + amf.map_poly.a0 + ), + amf.storage_dimensions + ); + } + + Reshape() = delete; + + }; // class Reshape< diagonal, ... > + + /** + * Specialization for matrix views over vectors + * + * \note \internal The resulting AMF is equivalent to applying + * a composition with two ID IMFs. + * + */ + template< typename SourceAMF > + struct Reshape< view::matrix, SourceAMF > { + + typedef typename Compose< imf::Id, imf::Id, SourceAMF >::amf_type amf_type; + + static amf_type Create( const SourceAMF &amf ) { + return Compose< imf::Id, imf::Id, SourceAMF >::Create( + imf::Id( amf.getLogicalDimensions().first ), + imf::Id( amf.getLogicalDimensions().second ), + amf + ); + } + + Reshape() = delete; + + }; // class Reshape< diagonal, ... > + + }; // class AMFFactory + + } // namespace storage + +} // namespace alp + +#endif // _H_ALP_OMP_STORAGE diff --git a/include/alp/omp/storagebasedmatrix.hpp b/include/alp/omp/storagebasedmatrix.hpp new file mode 100644 index 000000000..50b333300 --- /dev/null +++ b/include/alp/omp/storagebasedmatrix.hpp @@ -0,0 +1,178 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _H_ALP_OMP_STORAGEBASEDMATRIX +#define _H_ALP_OMP_STORAGEBASEDMATRIX + +#include +#include +#include +#include + +#include + +#include "config.hpp" +#include "storage.hpp" +#include "vector.hpp" + + +namespace alp { + + namespace internal { + + /** + * Matrix container specialization + * Implements both original containers and views on containers. + * @tparam requires_allocation True if the class is an original container + * False if the class is a view of another matrix + */ + template< typename T, typename AmfType, bool requires_allocation > + class StorageBasedMatrix< T, AmfType, requires_allocation, omp > : + public MatrixBase< StorageBasedMatrix< T, AmfType, requires_allocation, omp > > { + + template< + typename MatrixType, + std::enable_if_t< internal::is_storage_based< MatrixType >::value > * + > + friend size_t getStorageDimensions( const MatrixType &A ) noexcept; + + /** Get the reference to the AMF of a storage-based matrix */ + template< + typename MatrixType, + std::enable_if_t< internal::is_storage_based< MatrixType >::value > * + > + friend const typename MatrixType::amf_type &getAmf( const MatrixType &A ) noexcept; + + public: + + /** Expose static properties */ + + typedef T value_type; + typedef AmfType amf_type; + typedef typename AmfType::imf_r_type imf_r_type; + typedef typename AmfType::imf_c_type imf_c_type; + /** Type returned by access function */ + typedef T &access_type; + typedef const T &const_access_type; + /** Type of the index used to access the physical storage */ + typedef typename AmfType::storage_index_type storage_index_type; + + protected: + typedef StorageBasedMatrix< T, AmfType, requires_allocation, omp > self_type; + friend MatrixBase< self_type >; + + typedef typename std::conditional< + requires_allocation, + internal::Vector< T, omp >, + internal::Vector< T, omp > & + >::type container_type; + + /** A container-type view is characterized by its association with a physical container */ + container_type container; + + /** + * Access mapping function maps a pair of logical coordinates + * into the concrete coordinate inside the actual container. + * \see AMF + */ + AmfType amf; + + /** + * determines the size of the matrix via the domain of + * the index mapping functions. + * + * @return A pair of dimensions. + */ + std::pair< size_t, size_t > dims() const noexcept { + return amf.getLogicalDimensions(); + } + + size_t getStorageDimensions() const noexcept { + return amf.getStorageDimensions(); + } + + friend const Vector< T, omp > &getContainer( const self_type &A ) { + return A.container; + } + + friend Vector< T, omp > &getContainer( self_type &A ) { + return A.container; + } + + bool getInitialized() const noexcept { + return internal::getInitialized( container ); + } + + void setInitialized( const bool initialized ) noexcept { + internal::setInitialized( container , initialized ); + } + + const AmfType &getAmf() const noexcept { + return amf; + } + + /** + * Returns a constant reference to the element corresponding to + * the provided storage index. + * + * @param storageIndex storage index in the physical iteration + * space. + * + * @return const reference or value of the element at given position. + * \note This function may result in accessing memory belonging to + * another thread, which may incurr performance penalty. + */ + const_access_type access( const storage_index_type &si ) const { + return getRaw( getLocalContainer( container, si.buffer_id, si.block_id ) )[ si.offset ]; + } + + access_type access( const storage_index_type &si ) { + return getRaw( getLocalContainer( container, si.buffer_id, si.block_id ) )[ si.offset ]; + } + + storage_index_type getStorageIndex( const size_t i, const size_t j, const size_t s, const size_t P ) const { + return amf.getStorageIndex( i, j, s, P ); + } + + /** + * Construct a new structured matrix Base object + * + * @param rows The number of rows of the matrix. + * @param cols The number of columns of the matrix. + * @param smf The storage mapping function assigned to this matrix. + */ + StorageBasedMatrix( AmfType &&amf ) : + // \todo enable only if ImfR and ImfC are imf::Id + container( amf.getDistribution() ), + amf( std::move( amf ) ) { +#ifdef DEBUG + std::cout << "Entering OMP StorageBasedMatrix constructor\n"; +#endif + } + + /** View on another container */ + StorageBasedMatrix( Vector< T, omp > &container, AmfType &&amf ) : + container( container ), + amf( std::move( amf ) ) {} + + }; // class StorageBasedMatrix + + } // namespace internal + +} // namespace alp + +#endif // end ``_H_ALP_OMP_STORAGEBASEDMATRIX'' diff --git a/include/alp/omp/vector.hpp b/include/alp/omp/vector.hpp new file mode 100644 index 000000000..b653de004 --- /dev/null +++ b/include/alp/omp/vector.hpp @@ -0,0 +1,355 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * @author A. N. Yzelman + * @date 14th of January 2022 + */ + +#ifndef _H_ALP_OMP_VECTOR +#define _H_ALP_OMP_VECTOR + + +#include +#include + +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include "config.hpp" +#include "matrix.hpp" +#include "storage.hpp" + +#ifdef _ALP_OMP_WITH_REFERENCE + #include +#endif + + +namespace alp { + + namespace internal { + + template< typename T > + size_t getLength( const Vector< T, omp > & ) noexcept; + + template< typename T > + const bool & getInitialized( const Vector< T, omp > & v ) noexcept; + + template< typename T > + void setInitialized( Vector< T, omp > & v, const bool initialized ) noexcept; + + template< typename T > + const Vector< T, config::default_sequential_backend > &getLocalContainer( + const Vector< T, omp > &v, const size_t thread, const size_t block + ) noexcept; + + template< typename T > + Vector< T, config::default_sequential_backend > &getLocalContainer( + Vector< T, omp > &v, const size_t thread, const size_t block + ) noexcept; + + /** + * The parallel shared memory implementation of the ALP/Dense vector. + * + * @tparam T The type of an element of this vector. \a T shall not be a + * GraphBLAS type. + * + * \warning Creating a alp::Vector of other GraphBLAS types is + * not allowed. + * Passing a GraphBLAS type as template parameter will lead to + * undefined behaviour. + */ + template< typename T > + class Vector< T, omp > { + + /* ******************** + IO friends + ******************** */ + + friend size_t internal::getLength< T >( const Vector< T, omp > & ) noexcept; + + friend const bool & internal::getInitialized< T >( const Vector< T, omp > & ) noexcept; + + friend void internal::setInitialized< T >( Vector< T, omp > & , bool ) noexcept; + + friend const Vector< T, config::default_sequential_backend > &getLocalContainer<>( + const Vector< T, omp > &v, const size_t thread, const size_t block + ) noexcept; + + friend Vector< T, config::default_sequential_backend > &getLocalContainer<>( + Vector< T, omp > &v, const size_t thread, const size_t block + ) noexcept; + + private: + + /** The distribution of the vector among threads. */ + Distribution_2_5D distr; + + /** The number of buffers. */ + size_t num_buffers; + + /** The array of buffers. */ + T **buffers; + + /** Containers constructed around portions of buffers. */ + using container_type = Vector< T, config::default_sequential_backend >; + std::vector< std::vector< container_type > > containers; + + /** Whether the container is presently initialized. */ + bool initialized; + + public: + + /** Exposes the element type. */ + typedef T value_type; + + /** + * The main ALP/Dense vector constructor. + * + * The constructed object will be uninitalised after successful construction. + * + * + * @param length The number of elements in the new vector. + * + * @return SUCCESS This function never fails. + * + * \parblock + * \par Performance semantics. + * -# This constructor entails \f$ \Theta(1) \f$ amount of work. + * -# This constructor may allocate \f$ \Theta( length ) \f$ bytes + * of dynamic memory. + * -# This constructor will use \f$ \Theta(1) \f$ extra bytes of + * memory beyond that at constructor entry. + * -# This constructor incurs \f$ \Theta(1) \f$ data movement. + * -# This constructor \em may make system calls. + * \endparblock + * + * \warning Avoid the use of this constructor within performance critical + * code sections. + */ + Vector( + const Distribution_2_5D &d, + const size_t cap = 0 + ) : distr( d ), + num_buffers( d.getNumberOfThreads() ), + containers( num_buffers ), + initialized( false ) { + + (void) cap; + +#ifdef DEBUG + std::cout << "Entered OMP internal::Vector constructor\n"; +#endif + + // TODO: Implement allocation properly + buffers = new ( std::nothrow ) value_type*[ num_buffers ]; + if( ( num_buffers > 0 ) && ( buffers == nullptr ) ){ + throw std::runtime_error( "Could not allocate memory during alp::Vector construction." ); + } + + #pragma omp parallel + { + const size_t thread = config::OMP::current_thread_ID(); + + const auto t_coords = d.getThreadCoords( thread ); + + if ( d.isActiveThread( t_coords ) ) { + + const auto block_grid_dims = d.getLocalBlockGridDims( t_coords ); + + // Assuming that all blocks are of the same size + const size_t alloc_size = block_grid_dims.first * block_grid_dims.second * d.getBlockSize(); + +#ifdef DEBUG + #pragma omp critical + { + if( thread != config::OMP::current_thread_ID() ) { + std::cout << "Warning: thread != OMP::current_thread_id()\n"; + } + std::cout << "Thread with global coordinates tr = " << t_coords.tr << " tc = " << t_coords.tc + << " on OpenMP thread " << config::OMP::current_thread_ID() + << " allocating buffer of " << alloc_size << " elements " + << " holding " << block_grid_dims.first << " x " << block_grid_dims.second << " blocks.\n"; + } +#endif + + // TODO: Implement allocation properly + buffers[ thread ] = new ( std::nothrow ) value_type[ alloc_size ]; + + if( buffers[ thread ] == nullptr ) { + throw std::runtime_error( "Could not allocate memory during alp::Vector construction." ); + } + + // Reserve space for all internal container wrappers to avoid re-allocation + containers[ thread ].reserve( block_grid_dims.first * block_grid_dims.second ); + + // Populate the array of internal container wrappers + for( size_t br = 0; br < block_grid_dims.first; ++br ) { + for( size_t bc = 0; bc < block_grid_dims.second; ++bc ) { + const size_t offset = d.getBlocksOffset( t_coords, br, bc ); + containers[ thread ].emplace_back( &( buffers[ thread ][ offset ] ), d.getBlockSize() ); + } + } + + // Ensure that the array contains the expected number of containers + assert( containers[ thread ].size() == block_grid_dims.first * block_grid_dims.second ); + + } // End active threads allocation + } + } + + /** + * Copy constructor. + * + * @param other The vector to copy. The initialization state of the copy + * reflects the state of \a other. + * + * \parblock + * \par Performance semantics. + * Allocates the same capacity as the \a other vector, even if the + * actual number of elements contained in \a other is less. + * -# This constructor entails \f$ \Theta(1) \f$ amount of work. + * -# This constructor allocates \f$ \Theta(\max{mn, cap} ) \f$ bytes + * of dynamic memory. + * -# This constructor incurs \f$ \Theta(mn) \f$ of data + * movement. + * -# This constructor \em may make system calls. + * \endparblock + * + * \warning Avoid the use of this constructor within performance critical + * code sections. + */ + Vector( const Vector< T, omp > &other ) : Vector( other.n, other.cap ) { + initialized = other.initialized; + // const RC rc = set( *this, other ); // note: initialized will be set as part of this call + // if( rc != SUCCESS ) { + // throw std::runtime_error( "alp::Vector< T, omp > (copy constructor): error during call to alp::set (" + toString( rc ) + ")" ); + // } + } + + /** + * Move constructor. The new vector equal the given + * vector. Invalidates the use of the input vector. + * + * @param[in] other The GraphBLAS vector to move to this new instance. + * + * \parblock + * \par Performance semantics. + * -# This constructor entails \f$ \Theta(1) \f$ amount of work. + * -# This constructor will not allocate any new dynamic memory. + * -# This constructor will use \f$ \Theta(1) \f$ extra bytes of + * memory beyond that at constructor entry. + * -# This constructor will move \f$ \Theta(1) \f$ bytes of data. + * \endparblock + */ + Vector( Vector< T, omp > &&other ) : buffers( other.buffers ) { + other.buffers = nullptr; + // data_deleter = std::move( other.data_deleter ); + // initialized = other.initialized; other.initialized = false; + } + + /** + * Vector destructor. + * + * \parblock + * \par Performance semantics. + * -# This destructor entails \f$ \Theta(1) \f$ amount of work. + * -# This destructor will not perform any memory allocations. + * -# This destructor will use \f$ \mathcal{O}(1) \f$ extra bytes of + * memory beyond that at constructor entry. + * -# This destructor will move \f$ \Theta(1) \f$ bytes of data. + * -# This destructor makes system calls. + * \endparblock + * + * \warning Avoid calling destructors from within performance critical + * code sections. + */ + ~Vector() { + if( buffers != nullptr ) { + #pragma omp parallel + { + const size_t thread = config::OMP::current_thread_ID(); + + if ( distr.isActiveThread( thread ) ) { + if( buffers[ thread ] != nullptr ) { + delete [] buffers[ thread ]; + } + } + } + delete [] buffers; + } + } + + }; + + /** Identifies any omp internal vector as an internal container. */ + template< typename T > + struct is_container< internal::Vector< T, omp > > : std::true_type {}; + + } // end namespace ``alp::internal'' + + namespace internal { + + template< typename T > + size_t getLength( const Vector< T, omp > &v ) noexcept { + return v.n; + } + + template< typename T > + const bool & getInitialized( const Vector< T, omp > & v ) noexcept { + return v.initialized; + } + + template< typename T > + void setInitialized( Vector< T, omp > & v, bool initialized ) noexcept { + v.initialized = initialized; + } + + template< typename T > + const Vector< T, config::default_sequential_backend > &getLocalContainer( + const Vector< T, omp > &v, + const size_t thread, const size_t block + ) noexcept { + assert( thread < v.num_buffers ); + assert( block < v.containers[ thread ].size() ); + return v.containers[ thread ][ block ]; + } + + template< typename T > + Vector< T, config::default_sequential_backend > &getLocalContainer( + Vector< T, omp > &v, + const size_t thread, const size_t block + ) noexcept { + assert( thread < v.num_buffers ); + assert( block < v.containers[ thread ].size() ); + return v.containers[ thread ][ block ]; + } + + } // end namespace ``alp::internal'' + +} // end namespace ``alp'' + +#endif // end ``_H_ALP_OMP_VECTOR'' + diff --git a/include/alp/reference/blas2.hpp b/include/alp/reference/blas2.hpp index e29d6457e..09a4820dc 100644 --- a/include/alp/reference/blas2.hpp +++ b/include/alp/reference/blas2.hpp @@ -950,6 +950,9 @@ namespace alp { !alp::is_object< IOType >::value && !alp::is_object< InputType >::value && alp::is_monoid< Monoid >::value > * const = nullptr ) { +#ifdef _DEBUG + std::cout << "Called alp::foldl (scalar-to-matrix, monoid, reference)" << std::endl; +#endif // static sanity checks NO_CAST_OP_ASSERT( ( !( descr & descriptors::no_casting ) || std::is_same< typename Monoid::D1, IOType >::value ), @@ -992,6 +995,9 @@ namespace alp { !alp::is_object< IOType >::value && !alp::is_object< InputType >::value && alp::is_operator< Operator >::value > * const = nullptr ) { +#ifdef _DEBUG + std::cout << "Called alp::foldl (scalar-to-matrix, operator, reference)" << std::endl; +#endif // static sanity checks NO_CAST_OP_ASSERT( ( !( descr & descriptors::no_casting ) || std::is_same< typename Operator::D1, IOType >::value ), diff --git a/include/alp/reference/blas3.hpp b/include/alp/reference/blas3.hpp index b0ad5ef3d..8a76ee6e3 100644 --- a/include/alp/reference/blas3.hpp +++ b/include/alp/reference/blas3.hpp @@ -490,46 +490,71 @@ namespace alp { * @param phase The execution phase. */ template< - typename OutputStructMatT, - typename InputStructMatT1, - typename InputStructMatT2, + typename OutputType, typename InputType1, typename InputType2, + typename OutputStructure, typename OutputView, + typename OutputImfR, typename OutputImfC, + typename InputStructure1, typename InputView1, + typename InputImfR1, typename InputImfC1, + typename InputStructure2, typename InputView2, + typename InputImfR2, typename InputImfC2, class Semiring > RC mxm( - OutputStructMatT & C, - const InputStructMatT1 & A, - const InputStructMatT2 & B, + alp::Matrix< OutputType, OutputStructure, + Density::Dense, OutputView, OutputImfR, OutputImfC, reference > &C, + const alp::Matrix< InputType1, InputStructure1, + Density::Dense, InputView1, InputImfR1, InputImfC1, reference > &A, + const alp::Matrix< InputType2, InputStructure2, + Density::Dense, InputView2, InputImfR2, InputImfC2, reference > &B, const Semiring & ring = Semiring(), const PHASE &phase = NUMERICAL, - const std::enable_if_t< - ! alp::is_object< typename OutputStructMatT::value_type >::value && ! alp::is_object< typename InputStructMatT1::value_type >::value && ! alp::is_object< typename InputStructMatT2::value_type >::value && alp::is_semiring< Semiring >::value - > * const = NULL + const typename std::enable_if< + !alp::is_object< OutputType >::value && + !alp::is_object< InputType1 >::value && + !alp::is_object< InputType2 >::value && + alp::is_semiring< Semiring >::value, + void + >::type * const = NULL ) { (void)phase; - return internal::mxm_generic< false >( C, A, B, ring.getMultiplicativeOperator(), ring.getAdditiveMonoid(), ring.getMultiplicativeMonoid() ); + return internal::mxm_generic< false >( + C, A, B, + ring.getMultiplicativeOperator(), ring.getAdditiveMonoid(), ring.getMultiplicativeMonoid() + ); } /** * Dense Matrix-Matrix multiply between structured matrices. * Version with additive monoid and multiplicative operator */ - template< typename OutputStructMatT, - typename InputStructMatT1, - typename InputStructMatT2, + template< + typename OutputType, typename InputType1, typename InputType2, + typename OutputStructure, typename OutputView, + typename OutputImfR, typename OutputImfC, + typename InputStructure1, typename InputView1, + typename InputImfR1, typename InputImfC1, + typename InputStructure2, typename InputView2, + typename InputImfR2, typename InputImfC2, class Operator, class Monoid > RC mxm( - OutputStructMatT & C, - const InputStructMatT1 & A, - const InputStructMatT2 & B, + alp::Matrix< OutputType, OutputStructure, + Density::Dense, OutputView, OutputImfR, OutputImfC, reference > &C, + const alp::Matrix< InputType1, InputStructure1, + Density::Dense, InputView1, InputImfR1, InputImfC1, reference > &A, + const alp::Matrix< InputType2, InputStructure2, + Density::Dense, InputView2, InputImfR2, InputImfC2, reference > &B, const Operator & mulOp, const Monoid & addM, const PHASE &phase = NUMERICAL, - const std::enable_if_t< - ! alp::is_object< typename OutputStructMatT::value_type >::value && ! alp::is_object< typename InputStructMatT1::value_type >::value && ! alp::is_object< typename InputStructMatT2::value_type >::value && - alp::is_operator< Operator >::value && alp::is_monoid< Monoid >::value - > * const = NULL + const typename std::enable_if< + !alp::is_object< OutputType >::value && + !alp::is_object< InputType1 >::value && + !alp::is_object< InputType2 >::value && + alp::is_operator< Operator >::value && alp::is_monoid< Monoid >::value, + void + >::type * const = NULL ) { (void)phase; diff --git a/include/alp/reference/io.hpp b/include/alp/reference/io.hpp index c6de63f5d..da1e287c5 100644 --- a/include/alp/reference/io.hpp +++ b/include/alp/reference/io.hpp @@ -878,9 +878,6 @@ namespace alp { const fwd_iterator &start, const fwd_iterator &end ) noexcept { - (void)A; - (void)start; - (void)end; // Temporarily assuming 1-1 mapping with user container internal::setInitialized(A, true); diff --git a/include/alp/reference/vector.hpp b/include/alp/reference/vector.hpp index d84e6f986..e1b035442 100644 --- a/include/alp/reference/vector.hpp +++ b/include/alp/reference/vector.hpp @@ -107,6 +107,8 @@ namespace alp { /** Whether the container presently is uninitialized. */ bool initialized; + /** Whether the destructor should free the vector data. True when the vector data is allocated by this class. */ + bool should_free_data; public: @@ -140,7 +142,8 @@ namespace alp { * \warning Avoid the use of this constructor within performance critical * code sections. */ - Vector( const size_t length, const size_t cap = 0 ) : n( length ), cap( std::max( length, cap ) ), initialized( false ) { + Vector( const size_t length, const size_t cap = 0 ) : + n( length ), cap( std::max( length, cap ) ), initialized( false ), should_free_data( true ) { // TODO: Implement allocation properly if( n > 0) { data = new (std::nothrow) T[ n ]; @@ -153,6 +156,34 @@ namespace alp { } } + /** + * The ALP/Dense vector constructor providing an already allocated data. + * + * The constructed object will be uninitalised after successful construction. + * + * @param data The pointer to an allocated array of elements of + * elements of types T. + * @param length The number of elements the vector above guarantees + * to hold. + * @param cap The capacity (not used, kept for compatibility with sparse backend) + * + * \parblock + * \par Performance semantics. + * -# This constructor entails \f$ \Theta(1) \f$ amount of work. + * -# This constructor may allocate \f$ \Theta( 1 ) \f$ bytes + * of dynamic memory. + * -# This constructor will use \f$ \Theta(1) \f$ extra bytes of + * memory beyond that at constructor entry. + * -# This constructor incurs \f$ \Theta(1) \f$ data movement. + * -# This constructor does \em not make system calls. + * \endparblock + * + * \warning Avoid the use of this constructor within performance critical + * code sections. + */ + Vector( T *data, const size_t length, const size_t cap = 0 ) : + n( length ), cap( std::max( length, cap ) ), data( data ), initialized( false ), should_free_data( false ) {} + /** * Copy constructor. * @@ -222,7 +253,7 @@ namespace alp { * code sections. */ ~Vector() { - if( data != nullptr ) { + if( data != nullptr && should_free_data ) { delete [] data; } } diff --git a/include/alp/scalar.hpp b/include/alp/scalar.hpp index 5cd5f30b3..33210aba2 100644 --- a/include/alp/scalar.hpp +++ b/include/alp/scalar.hpp @@ -28,6 +28,9 @@ #ifdef _ALP_WITH_REFERENCE #include #endif +#ifdef _ALP_WITH_OMP + #include +#endif // specify default only if requested during compilation // #ifdef _ALP_BACKEND diff --git a/include/alp/storage.hpp b/include/alp/storage.hpp index d13957fd6..2fd3f8816 100644 --- a/include/alp/storage.hpp +++ b/include/alp/storage.hpp @@ -30,4 +30,8 @@ #include #endif +#ifdef _ALP_WITH_OMP + #include +#endif + #endif // _H_ALP_STORAGE diff --git a/include/alp/type_traits.hpp b/include/alp/type_traits.hpp index 39f3ebf6a..975a8a14b 100644 --- a/include/alp/type_traits.hpp +++ b/include/alp/type_traits.hpp @@ -409,6 +409,12 @@ namespace alp { typedef new_container_type_from< type > _and_; }; + template< enum Backend new_backend > + struct change_backend { + typedef Container< T, Structure, density, View, ImfR, ImfC, new_backend > type; + typedef new_container_type_from< type > _and_; + }; + private: new_container_type_from() = delete; }; diff --git a/include/alp/utils.hpp b/include/alp/utils.hpp index 330c69b3b..d053f998d 100644 --- a/include/alp/utils.hpp +++ b/include/alp/utils.hpp @@ -26,6 +26,7 @@ #include #include //fabs +#include // modulus #include //numeric_limits #include @@ -295,7 +296,21 @@ namespace alp { } }; - + + + /** + * Computation of modulus operation \f$ mod(x, n) \f$ based on remainder + * of division for type \a T within range [0, n). + * Assumes \a T implements: \a operator%, \a operator<, and \a operator+. + */ + template < typename T > + T modulus( const T x, const T n ) { + + const T rem = x % n; + + return ( rem < static_cast< T >( 0 ) ) ? rem + n : rem; + }; + } // namespace utils } // namespace alp diff --git a/include/alp/vector.hpp b/include/alp/vector.hpp index a5b8a8c41..78d19a323 100644 --- a/include/alp/vector.hpp +++ b/include/alp/vector.hpp @@ -30,6 +30,9 @@ #ifdef _ALP_WITH_REFERENCE #include #endif +#ifdef _ALP_WITH_OMP + #include +#endif // specify default only if requested during compilation #ifdef _ALP_BACKEND diff --git a/src/alp/CMakeLists.txt b/src/alp/CMakeLists.txt index 29cc55f2c..18f4f4b9b 100644 --- a/src/alp/CMakeLists.txt +++ b/src/alp/CMakeLists.txt @@ -36,3 +36,7 @@ set( backend_alp_reference_srcs if( WITH_ALP_REFERENCE_BACKEND ) add_subdirectory( reference ) endif() + +if( WITH_ALP_OMP_BACKEND ) + add_subdirectory( omp ) +endif() diff --git a/src/alp/omp/CMakeLists.txt b/src/alp/omp/CMakeLists.txt new file mode 100644 index 000000000..9cf4ff7c1 --- /dev/null +++ b/src/alp/omp/CMakeLists.txt @@ -0,0 +1,54 @@ +# +# Copyright 2021 Huawei Technologies Co., Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +assert_valid_variables( BACKEND_LIBRARY_OUTPUT_NAME VERSION + ALP_OMP_BACKEND_INSTALL_DIR INCLUDE_INSTALL_DIR + ALP_OMP_BACKEND_DEFAULT_NAME + ALP_OMP_SELECTION_DEFS +) + +# sources for ALP OMP backend +set( backend_alp_omp_srcs + ${CMAKE_CURRENT_SOURCE_DIR}/../descriptors.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../rc.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/init.cpp +) + +add_library( backend_alp_omp_static STATIC "${backend_alp_omp_srcs}" ) + +target_link_libraries( backend_alp_omp_static PUBLIC backend_alp_omp_headers ) + +set_target_properties( backend_alp_omp_static PROPERTIES + OUTPUT_NAME "${BACKEND_LIBRARY_OUTPUT_NAME}" +) + +set_target_properties( backend_alp_omp_static PROPERTIES + ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/omp" +) + +target_compile_definitions( backend_alp_omp_static PUBLIC "${ALP_OMP_SELECTION_DEFS}" ) +target_link_libraries( backend_alp_omp_static PRIVATE backend_flags ) + +add_dependencies( libs backend_alp_omp_static ) + +install( TARGETS backend_alp_omp_static + EXPORT GraphBLASTargets + ARCHIVE DESTINATION "${ALP_OMP_BACKEND_INSTALL_DIR}" + ) + +# this is an alias for add_grb_executables() to select the backend to link against +# DO NOT CHANGE THE ALIAS NAME! +add_library( "${ALP_OMP_BACKEND_DEFAULT_NAME}" ALIAS backend_alp_omp_static ) diff --git a/src/alp/omp/init.cpp b/src/alp/omp/init.cpp new file mode 100644 index 000000000..f57351285 --- /dev/null +++ b/src/alp/omp/init.cpp @@ -0,0 +1,60 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * @author A. N. Yzelman + * @date 2nd of February, 2017 + */ + +#include +#include +#include + +#ifndef _GRB_NO_LIBNUMA + #include //numa_set_localalloc +#endif +#include //omp_get_num_threads + + +template<> +alp::RC alp::init< alp::omp >( const size_t s, const size_t P, void * const data ) { + (void) data; + RC rc = alp::SUCCESS; + // print output + const auto T = config::OMP::threads(); + std::cout << "Info: alp::init (omp) called. OpenMP is set to utilise " << T << " threads.\n"; + + // sanity checks + if( P > 1 ) { + return alp::UNSUPPORTED; + } + if( s > 0 ) { + return alp::PANIC; + } +#ifndef _GRB_NO_LIBNUMA + // set memory policy + numa_set_localalloc(); +#endif + return rc; +} + +template<> +alp::RC alp::finalize< alp::omp >() { + std::cout << "Info: alp::finalize (omp) called.\n"; + return alp::SUCCESS; +} + diff --git a/tests/smoke/smoketests.sh b/tests/smoke/smoketests.sh index 9838353c2..883b68797 100755 --- a/tests/smoke/smoketests.sh +++ b/tests/smoke/smoketests.sh @@ -511,7 +511,9 @@ for BACKEND in ${BACKENDS[@]}; do done for BACKEND in ${BACKENDS[@]}; do - if [ "${BACKEND:0:4}" != "alp_" ]; then + # Temporarily execute tests only for alp_reference backend + # until all backends start supporting all smoke tests. + if [ "${BACKEND}" != "alp_reference" ]; then continue fi diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 3674ad618..f16504294 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -265,6 +265,14 @@ add_grb_executables( dense_mxm dense_mxm.cpp BACKENDS alp_reference ) +add_grb_executables( dense_omp_matrix dense_omp_matrix.cpp + BACKENDS alp_omp +) + +add_grb_executables( dense_omp_mxm dense_omp_mxm.cpp + BACKENDS alp_omp +) + add_grb_executables( dense_outer dense_outer.cpp BACKENDS alp_reference ) diff --git a/tests/unit/dense_omp_matrix.cpp b/tests/unit/dense_omp_matrix.cpp new file mode 100644 index 000000000..bf93e808b --- /dev/null +++ b/tests/unit/dense_omp_matrix.cpp @@ -0,0 +1,95 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define _DEBUG + +#include +#include +#include +#include +#include +#include + +#include +#include "../utils/print_alp_containers.hpp" + +void alp_program( const size_t &n, alp::RC &rc ) { + + typedef double T; + + rc = alp::SUCCESS; + alp::Semiring< alp::operators::add< T >, alp::operators::mul< T >, alp::identities::zero, alp::identities::one > ring; + + std::cout << "\tStarting dense OMP matrix test with size: " << n << " x " << n << "\n"; + + // create the matrix + alp::Matrix< float, alp::structures::General > M( n, n ); + + // set all matrix elements to 1 + rc = rc ? rc : alp::set( M, alp::Scalar< T >( ring.template getOne< T >() ) ); + + print_matrix( "M", M ); + +} + +int main( int argc, char **argv ) { + // defaults + bool printUsage = false; + size_t in = 5; + + // error checking + if( argc > 2 ) { + printUsage = true; + } + if( argc == 2 ) { + size_t read; + std::istringstream ss( argv[ 1 ] ); + if( ! ( ss >> read ) ) { + std::cerr << "Error parsing first argument\n"; + printUsage = true; + } else if( ! ss.eof() ) { + std::cerr << "Error parsing first argument\n"; + printUsage = true; + } else if( read % 2 != 0 ) { + std::cerr << "Given value for n is odd\n"; + printUsage = true; + } else { + // all OK + in = read; + } + } + if( printUsage ) { + std::cerr << "Usage: " << argv[ 0 ] << " [n]\n"; + std::cerr << " -n (optional, default is 100): an even integer, the " + "test size.\n"; + return 1; + } + + std::cout << "This is functional test " << argv[ 0 ] << "\n"; + alp::Launcher< alp::AUTOMATIC > launcher; + alp::RC out; + if( launcher.exec( &alp_program, in, out, true ) != alp::SUCCESS ) { + std::cerr << "Launching test FAILED\n"; + return 255; + } + if( out != alp::SUCCESS ) { + std::cerr << "Test FAILED (" << alp::toString( out ) << ")" << std::endl; + } else { + std::cout << "Test OK" << std::endl; + } + return 0; +} diff --git a/tests/unit/dense_omp_mxm.cpp b/tests/unit/dense_omp_mxm.cpp new file mode 100644 index 000000000..3628d5d45 --- /dev/null +++ b/tests/unit/dense_omp_mxm.cpp @@ -0,0 +1,348 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#ifdef NDEBUG + #include "../utils/print_alp_containers.hpp" +#endif + +using namespace alp; + + +template< typename T > +void print_stdvec_as_matrix( std::string name, const std::vector< T > &vA, const size_t m, const size_t n, const size_t lda ) { + + std::cout << "Vec " << name << ":" << std::endl; + for( size_t row = 0; row < m; ++row ) { + std::cout << "[\t"; + for( size_t col = 0; col < n; ++col ) { + std::cout << vA[ row * lda + col ] << "\t"; + } + std::cout << "]" << std::endl; + } +} + +template< typename T, typename Operator, typename Monoid > +void mxm_stdvec_as_matrix( + std::vector< T > &vC, const size_t ldc, + const std::vector< T > &vA, const size_t lda, + const std::vector< T > &vB, const size_t ldb, + const size_t m, const size_t k, const size_t n, + const Operator oper, + const Monoid monoid +) { + + T temp; + +#ifndef NDEBUG + print_stdvec_as_matrix( "vA", vA, n, n, n ); + print_stdvec_as_matrix( "vB", vB, n, n, n ); + print_stdvec_as_matrix( "vC - PRE", vC, n, n, n ); +#endif + + for( size_t i = 0; i < m; ++i ) { + for( size_t j = 0; j < n; ++j ) { + T &c_val { vC[ i * ldc + j ] }; + for( size_t l = 0; l < k; ++l ) { + const T &a_val { vA[ i * lda + l ] }; + const T &b_val { vB[ l * ldb + j ] }; + // std::cout << c_val << " += " << a_val << " * " << b_val << std::endl; + (void)internal::apply( temp, a_val, b_val, oper ); + // std::cout << "temp = " << temp << std::endl; + (void)internal::foldl( c_val, temp, monoid.getOperator() ); + } + } + } + +#ifndef NDEBUG + print_stdvec_as_matrix( "vC - POST", vC, n, n, n ); +#endif +} + +template< typename Structure, typename T > +void stdvec_build_matrix( std::vector< T > &vA, const size_t m, const size_t n, const size_t lda, const T zero, const T one ) { + + if( std::is_same< Structure, structures::General >::value ) { + std::fill( vA.begin(), vA.end(), one ); + } else if( std::is_same< Structure, structures::Symmetric >::value ) { + std::fill( vA.begin(), vA.end(), one ); + } else if( std::is_same< Structure, structures::UpperTriangular >::value ) { + for( size_t row = 0; row < m; ++row ) { + for( size_t col = 0; col < row; ++col ) { + vA[ row * lda + col ] = zero; + } + for( size_t col = row; col < n; ++col ) { + vA[ row * lda + col ] = one; + } + } + } + +} + +template< typename Structure, typename T > +void stdvec_build_matrix( std::vector< T > &vA, const size_t m, const size_t n, const size_t lda, const T zero, const T one, const T inc ) { + + T val = one; + if( std::is_same< Structure, structures::General >::value ) { + for( size_t row = 0; row < m; ++row ) { + for( size_t col = 0; col < n; ++col ) { + vA[ row * lda + col ] = val; + val += inc; + } + } + } else if( std::is_same< Structure, structures::Symmetric >::value ) { + for( size_t row = 0; row < m; ++row ) { + for( size_t col = row; col < n; ++col ) { + vA[ row * lda + col ] = vA[ col * lda + row ] = val; + val += inc; + } + } + } else if( std::is_same< Structure, structures::UpperTriangular >::value ) { + for( size_t row = 0; row < m; ++row ) { + for( size_t col = 0; col < row; ++col ) { + vA[ row * lda + col ] = zero; + } + for( size_t col = row; col < n; ++col ) { + vA[ row * lda + col ] = val; + val += inc; + } + } + } + +} + +template< typename Structure, typename T > +void stdvec_build_matrix_packed( std::vector< T > &vA, const T one ) { + + std::fill( vA.begin(), vA.end(), one ); + +} + +template< typename Structure, typename T > +void stdvec_build_matrix_packed( std::vector< T > &vA, const T one, const T inc ) { + + T val = one; + if( std::is_same< Structure, structures::Symmetric >::value ) { // Assumes Packed Row - Upper + for( auto &elem: vA ) { + elem = val; + val += inc; + } + } else if( std::is_same< Structure, structures::UpperTriangular >::value ) { // Assumes Packed Row - Upper + for( auto &elem: vA ) { + elem = val; + val += inc; + } + } + +} + +template< typename MatType, typename T > +void diff_stdvec_matrix( const std::vector< T > &vA, const size_t m, const size_t n, const size_t lda, + const MatType & mA, double threshold = 1e-7 ) { + + if( std::is_same< typename MatType::structure, structures::General >::value ) { + for( size_t row = 0; row < m; ++row ) { + for( size_t col = 0; col < n; ++col ) { + double va = ( double )( vA[ row * lda + col ] ); + double vm = ( double )( internal::access( mA, internal::getStorageIndex( mA, row, col ) ) ); + double re = std::abs( ( va - vm ) / va ); + if( re > threshold ) { + std::cout << "Error ( " << row << ", " << col << " ): " << va << " v " << vm << std::endl; + } + } + } + } else if( std::is_same< typename MatType::structure, structures::Symmetric >::value ) { + for( size_t row = 0; row < m; ++row ) { + for( size_t col = row; col < n; ++col ) { + double va = ( double )( vA[ row * lda + col ] ); + double vm = ( double )( internal::access( mA, internal::getStorageIndex( mA, row, col ) ) ); + double re = std::abs( ( va - vm ) / va ); + if( re > threshold ) { + std::cout << "Error ( " << row << ", " << col << " ): " << va << " v " << vm << std::endl; + } + } + } + } else if( std::is_same< typename MatType::structure, structures::UpperTriangular >::value ) { + for( size_t row = 0; row < m; ++row ) { + for( size_t col = row; col < n; ++col ) { + double va = ( double )( vA[ row * lda + col ] ); + double vm = ( double )( internal::access( mA, internal::getStorageIndex( mA, row, col ) ) ); + double re = std::abs( ( va - vm ) / va ); + if( re > threshold ) { + std::cout << "Error ( " << row << ", " << col << " ): " << va << " v " << vm << std::endl; + } + } + } + } + +} + +template < typename T, typename SemiringT > +void run_mxm( const size_t m, const size_t k, const size_t n, alp::RC &rc ) { + + const SemiringT ring; + const T one = ring.template getOne< T >(); + const T zero = ring.template getZero< T >(); + + std::vector< T > A_data( m * k ); + std::vector< T > B_data( k * n ); + std::vector< T > C_data( m * n, zero ); + + std::vector< T > A_vec( m * k ); + std::vector< T > B_vec( k * n ); + std::vector< T > C_vec( m * n, zero ); + + std::cout << "\tTesting dense General mxm " << m << " " << k << " " << n << std::endl; + + stdvec_build_matrix< structures::General >( A_data, m, k, k, zero, one, one ); + stdvec_build_matrix< structures::General >( B_data, k, n, n, zero, one, one ); + + // initialize test + alp::Matrix< T, structures::General > A( m, k ); + alp::Matrix< T, structures::General > B( k, n ); + alp::Matrix< T, structures::General > C( m, n ); + + // Initialize input matrices + rc = rc ? rc : alp::buildMatrix( A, A_data.begin(), A_data.end() ); + if ( rc != alp::SUCCESS ) { + std::cerr << "\tIssues building A" << std::endl; + return; + } + rc = rc ? rc : alp::buildMatrix( B, B_data.begin(), B_data.end() ); + rc = rc ? rc : alp::buildMatrix( C, C_data.begin(), C_data.end() ); + + if ( rc != alp::SUCCESS ) { + std::cerr << "\tIssues building matrices" << std::endl; + return; + } + +#ifndef NDEBUG + print_matrix( "A", A ); + print_matrix( "B", B ); + print_matrix( "C - PRE", C ); +#endif + + rc = rc ? rc : alp::mxm( C, A, B, ring ); + +#ifndef NDEBUG + print_matrix( "C - POST", C ); +#endif + + if ( rc != alp::SUCCESS ) + return; + + stdvec_build_matrix< structures::General >( A_vec, m, k, k, zero, one, one ); + stdvec_build_matrix< structures::General >( B_vec, k, n, n, zero, one, one ); + + mxm_stdvec_as_matrix( C_vec, n, A_vec, k, B_vec, n, m, k, n, ring.getMultiplicativeOperator(), ring.getAdditiveMonoid() ); + + diff_stdvec_matrix( C_vec, m, n, n, C ); + + + std::cout << "\tDone." << std::endl; + +} + +#define M ( alp::config::BLOCK_ROW_DIM * n ) +#define K ( alp::config::BLOCK_COL_DIM * 2 * n ) +#define N ( alp::config::BLOCK_COL_DIM * 3 * n ) + +void alp_program( const size_t &n, alp::RC &rc ) { + + using T = double; + + using SemiringT = alp::Semiring< + alp::operators::add< T >, alp::operators::mul< T >, + alp::identities::zero, alp::identities::one + >; + + rc = alp::SUCCESS; + + /** + * Testing cubic mxm. + */ + run_mxm< T, SemiringT >( M, M, M, rc ); + + /** + * Testing rectangular mxm + */ + run_mxm< T, SemiringT >( M, K, N, rc ); + + /** + * Testing outer-prod of blocks mxm + */ + run_mxm< T, SemiringT >( M, alp::config::BLOCK_COL_DIM, N, rc ); + + /** + * Testing dot-prod of blocks mxm + */ + run_mxm< T, SemiringT >( alp::config::BLOCK_ROW_DIM, M, alp::config::BLOCK_COL_DIM, rc ); + +} + +int main( int argc, char **argv ) { + // defaults + bool printUsage = false; + size_t in = 4; + + // error checking + if( argc > 2 ) { + printUsage = true; + } + if( argc == 2 ) { + size_t read; + std::istringstream ss( argv[ 1 ] ); + if( ! ( ss >> read ) ) { + std::cerr << "Error parsing first argument\n"; + printUsage = true; + } else if( ! ss.eof() ) { + std::cerr << "Error parsing first argument\n"; + printUsage = true; + } else if( read % 2 != 0 ) { + std::cerr << "Given value for n is odd\n"; + printUsage = true; + } else { + // all OK + in = read; + } + } + if( printUsage ) { + std::cerr << "Usage: " << argv[ 0 ] << " [n]\n"; + std::cerr << " -n (optional, default is 100): an even integer, the " + "test size.\n"; + return 1; + } + + std::cout << "This is functional test " << argv[ 0 ] << " " << in << "\n"; + alp::Launcher< AUTOMATIC > launcher; + alp::RC out; + if( launcher.exec( &alp_program, in, out, true ) != SUCCESS ) { + std::cerr << "Launching test FAILED\n"; + return 255; + } + if( out != SUCCESS ) { + std::cerr << "Test FAILED (" << alp::toString( out ) << ")" << std::endl; + } else { + std::cout << "Test OK" << std::endl; + } + return 0; +} +