diff --git a/include/graphblas/base/io.hpp b/include/graphblas/base/io.hpp
index 493dfcfea..c904570fc 100644
--- a/include/graphblas/base/io.hpp
+++ b/include/graphblas/base/io.hpp
@@ -1218,15 +1218,17 @@ namespace grb {
* -# #grb::descriptors::invert_mask, and
* -# #grb::descriptors::structural.
*
- * However, and differently from most ALP primtivies, the
+ * However, and differently from most ALP/GraphBLAS primitivies, the
* #grb::descriptors::invert_mask and #grb::descriptors::structural are
* mutually exclusive for this primitive.
* \endparblock
*
* @tparam OutputType The type of each element in the given matrix.
* @tparam MaskType The type of each element in the given mask.
- * @tparam ValueType The type of the given value. Should be convertible
- * to \a OutputType.
+ * @tparam ValueType The type of the given value.
+ *
+ * The given \a ValueType must be convertible to \a OutputType.
+ *
* @tparam RIT The integer type for encoding row indices.
* @tparam CIT The integer type for encoding column indices.
* @tparam NIT The integer type for encoding nonzero indices.
@@ -1244,51 +1246,50 @@ namespace grb {
* the default is #grb::EXECUTE.
*
* In #grb::RESIZE mode:
- * @returns #grb::SUCCESS When the capacity of \a C has been (made) sufficient
- * to store the requested output.
+ *
+ * @returns #grb::SUCCESS When the capacity of \a C (has been made or already
+ * was) sufficient to store the requested output.
* @returns #grb::OUTOFMEM When out-of-memory conditions have been met while
- * executing. If this error code is returned, \a C
+ * resizing \a C. If this error code is returned, \a C
* shall be unmodified compared to its state at
* function entry.
*
* In #grb::EXECUTE mode:
- * @returns #grb::SUCCESS When the call completes successfully.
+ *
+ * @returns #grb::SUCCESS When the computation has completed or will execute
+ * successfully.
* @returns #grb::ILLEGAL When \a C did not have enough capacity to store the
* output of the requested computation.
*
* Either mode may additionally return:
- * @returns #grb::ILLEGAL In case the given \a mask was empty.
+ *
* @returns #grb::MISMATCH In case \a C and \a mask have mismatching sizes.
* @returns #grb::PANIC In case an unmitigable error was encountered. The
- * caller is suggested to exit gracefully, and in any
- * case to not make any further calls to ALP.
+ * caller, when encountering this return code, is
+ * suggested to exit gracefully and to not make any
+ * further calls to ALP.
*
* When \a descr includes #grb::descriptors::no_casting then code shall not
* compile if one of the following conditions are met:
* -# \a ValueType does not match \a OutputType; or
* -# \a MaskType does not match bool.
*
- * In these cases, the code shall not compile: implementations must throw
- * a static assertion failure in this case.
- *
* Similarly, it is forbidden to call this function with both following
* descriptors simultaneously:
* - #grb::descriptors::invert_mask \em and #grb::descriptors::structural.
*
* The use of the #grb::descriptors::structural_complement descriptor hence is
- * is forbidden also. Implementations shall throw a static assertion failure
- * if the user nonetheless asks for structural mask inversion.
+ * is forbidden also. These conditions, when encountered, should lead to
+ * compile-time errors also.
+ *
+ * \note One vehicle to ensure compilation does not occur in these cases is via
+ * static_assert.
*
* \parblock
* \par Performance semantics
* Each backend must define performance semantics for this primitive.
*
* @see perfSemantics
- *
- * \warning Generally, if \a mask equals \a C and the mask is non-structural,
- * then optimised implementations will assign higher costs than when
- * \a mask does not equal \a C. This is because the nonzero structure
- * update cannot be done in-place.
* \endparblock
*/
template<
@@ -1320,6 +1321,128 @@ namespace grb {
return UNSUPPORTED;
}
+ /**
+ * Sets all values of a matrix to that of a given source matrix, if and only if
+ * the corresponding value coordinates evaluate true at the given mask
+ * matrix.
+ *
+ * @tparam descr The descriptor used for this operation.
+ *
+ * \parblock
+ * \par Accepted descriptors
+ * -# #grb::descriptors::no_operation,
+ * -# #grb::descriptors::no_casting,
+ * -# #grb::descriptors::invert_mask,
+ * -# #grb::descriptors::structural
+ *
+ * However, and differently from most ALP primtivies, the
+ * #grb::descriptors::invert_mask and #grb::descriptors::structural are
+ * mutually exclusive for this primitive.
+ * \endparblock
+ *
+ * @tparam OutputType The type of each element in the destination matrix.
+ * @tparam MaskType The type of each element in the output mask.
+ * @tparam ValueType The type of each element in the source matrix.
+ *
+ * The given \a ValueType must be convertible to \a OutputType.
+ *
+ * \internal
+ * @tparam RIT1 The integer type for encoding row indices in \a C.
+ * @tparam CIT1 The integer type for encoding column indices in \a C.
+ * @tparam NIT1 The integer type for encoding nonzero indices in \a C.
+ * @tparam RIT2 The integer type for encoding row indices in \a mask.
+ * @tparam CIT2 The integer type for encoding column indices in \a mask.
+ * @tparam NIT2 The integer type for encoding nonzero indices in \a mask.
+ * @tparam RIT3 The integer type for encoding row indices in \a A.
+ * @tparam CIT3 The integer type for encoding column indices in \a A.
+ * @tparam NIT3 The integer type for encoding nonzero indices in \a A.
+ * @tparam backend The backend selected for executing this primitive.
+ * \endinternal
+ *
+ * @param[out] C The matrix that will be a masked copy of \a A.
+ * @param[in] mask Matrix that acts as output mask on \a C.
+ * @param[in] A The source matrix which will (partially) be copied to
+ * \a A.
+ * @param[in] phase Which #grb::Phase of the operation is requested. Optional;
+ * the default is #grb::EXECUTE.
+ *
+ * In #grb::RESIZE mode:
+ *
+ * @returns #grb::SUCCESS When the capacity of \a C (has been made or already
+ * was) sufficient to store the requested output.
+ * @returns #grb::OUTOFMEM When out-of-memory conditions were met while
+ * resizing \a C. If this error code is returned, \a C
+ * shall be left unmodified compared to its state at
+ * function entry.
+ *
+ * In #grb::EXECUTE mode:
+ *
+ * @returns #grb::SUCCESS When the computation has completed or will execute
+ * successfully.
+ * @returns #grb::ILLEGAL When \a C did not have enough capacity to store the
+ * output of the requested computation.
+ *
+ * Either mode may additionally return:
+ *
+ * @returns #grb::MISMATCH When \a A and \a C have mismatching sizes.
+ * @returns #grb::MISMATCH When \a C and \a mask have mismatching sizes.
+ * @returns #grb::PANIC In case an unmitigable error was encountered. The
+ * caller, when encountering this return code, is
+ * suggested to exit the program gracefully and to not
+ * make any further calls to ALP.
+ *
+ * When \a descr includes #grb::descriptors::no_casting, then code shall not
+ * comile if one of the following conditions are met:
+ * -# \a ValueType does not match \a OutputType; or
+ * -# \a MaskType does not match bool.
+ *
+ * Similarly, it is forbidden to call this function with both following
+ * descriptors simultaneously:
+ * - #grb::descriptors::invert_mask \em and #grb::descriptors::structural.
+ *
+ * The use of the #grb::descriptors::structural_complement descriptor hence is
+ * is forbidden also. These conditions should lead to compile-time errors also.
+ *
+ * \note One vehicle to ensure compilation does not occur in these cases is via
+ * static_assert.
+ *
+ * \parblock
+ * \par Performance semantics
+ * Each backend must define performance semantics for this primitive.
+ *
+ * @see perfSemantics
+ * \endparblock
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename OutputType, typename MaskType, typename ValueType,
+ typename RIT1, typename CIT1, typename NIT1,
+ typename RIT2, typename CIT2, typename NIT2,
+ typename RIT3, typename CIT3, typename NIT3,
+ Backend backend
+ >
+ RC set(
+ Matrix< OutputType, backend, RIT1, CIT1, NIT1 > &C,
+ const Matrix< MaskType, backend, RIT2, CIT2, NIT2 > &mask,
+ const Matrix< ValueType, backend, RIT3, CIT3, NIT3 > &A,
+ const Phase &phase = EXECUTE,
+ const typename std::enable_if<
+ !grb::is_object< OutputType >::value &&
+ !grb::is_object< ValueType >::value &&
+ !grb::is_object< MaskType >::value,
+ void >::type * const = nullptr
+ ) noexcept {
+#ifndef NDEBUG
+ const bool should_not_call_base_matrix_masked_matrix_set = false;
+ assert( should_not_call_base_matrix_masked_matrix_set );
+#endif
+ (void) C;
+ (void) mask;
+ (void) A;
+ (void) phase;
+ return UNSUPPORTED;
+ }
+
/**
* Sets the element of a given vector at a given position to a given value.
*
@@ -1763,7 +1886,7 @@ namespace grb {
* successfully, then a call to this function shall return #grb::SUCCESS.
*
* The are several other cases in which the computation of nonblocking
- * primtives is forced:
+ * primitives is forced:
* -# whenever an output iterator of an output container of any of the non-
* blocking primitives is requested; and
* -# whenever an output container of any of the non-blocking primitives is
diff --git a/include/graphblas/bsp1d/init.hpp b/include/graphblas/bsp1d/init.hpp
index 582479bf1..d5798d894 100644
--- a/include/graphblas/bsp1d/init.hpp
+++ b/include/graphblas/bsp1d/init.hpp
@@ -34,6 +34,7 @@
#include
#include
+#include //uintptr_t
#include //assertions
#include
diff --git a/include/graphblas/bsp1d/io.hpp b/include/graphblas/bsp1d/io.hpp
index edada4191..b8181041a 100644
--- a/include/graphblas/bsp1d/io.hpp
+++ b/include/graphblas/bsp1d/io.hpp
@@ -32,7 +32,6 @@
#include
#endif
-#include "graphblas/blas1.hpp" // for grb::size
#include "graphblas/nonzeroStorage.hpp"
// the below transforms an std::vector iterator into an ALP/GraphBLAS-compatible
@@ -976,20 +975,166 @@ namespace grb {
#ifdef _BSP1D_IO_DEBUG
std::cout << "Called grb::set( matrix, mask, value ) (BSP1D)\n";
#endif
+ // catch trivial cases
const size_t m = nrows( C );
const size_t n = ncols( C );
+ if( m == 0 || n == 0 ) { return SUCCESS; }
- // dynamic checks (I)
+ // dynamic checks
+ if( nrows( mask ) == 0 || ncols( mask ) == 0 ) {
+ return ILLEGAL;
+ }
if( m != nrows( mask ) || n != ncols( mask ) ) {
return MISMATCH;
}
- // catch trivial case
+#ifdef _BSP1D_IO_DEBUG
+ std::cout << "\t delegating to final backend\n";
+#endif
+ RC ret = SUCCESS;
+ // Take care that local matrices may be empty, even if the global matrix is
+ // not. Processes with empty local matrices will not delegate (no-op).
+ {
+ auto &local_C = internal::getLocal( C );
+ const auto &local_mask = internal::getLocal( mask );
+ const size_t local_m = nrows( local_C );
+ const size_t local_n = ncols( local_C );
+ assert( local_m == nrows( local_mask ) );
+ assert( local_n == ncols( local_mask ) );
+ if( local_m > 0 && local_n > 0 ) {
+ ret = set< descr >( local_C, local_mask, val, phase );
+ }
+ }
+
+ // in the self-masked case, there is no way an error could occur
+ if( (descr & descriptors::structural) && getID( C ) == getID( mask ) ) {
+#ifdef _BSP1D_IO_DEBUG
+ std::cout << "\t structural self-masking detected, which allows trivial "
+ "exit\n"; // since the nnz nor capacity would never change
+#endif
+ assert( ret == SUCCESS );
+ return ret;
+ }
+
+ // in all other cases, in either mode (resize or execute), we must check for
+ // errors
+#ifdef _BSP1D_IO_DEBUG
+ std::cout << "\t all-reducing error code\n";
+#endif
+ if( collectives< BSP1D >::allreduce( ret, operators::any_or< RC >() )
+ != SUCCESS
+ ) {
+ return PANIC;
+ }
+
+#ifdef _BSP1D_IO_DEBUG
+ std::cout << "\t all-reduced error code is " << toString( ret ) << "\n";
+#endif
+ if( phase == RESIZE ) {
+ if( ret == SUCCESS ) {
+#ifdef _BSP1D_IO_DEBUG
+ std::cout << "\t resize phase detected -- synchronising capacity\n";
+#endif
+ ret = internal::updateCap( C );
+ if( ret != SUCCESS ) {
+ std::cerr << "Error updating capacity: " << toString( ret ) << "\n";
+ }
+ }
+ } else {
+ assert( phase == EXECUTE );
+ if( ret == SUCCESS ) {
+#ifdef _BSP1D_IO_DEBUG
+ std::cout << "\t execute phase detected -- synchronising nnz count\n";
+#endif
+ ret = internal::updateNnz( C );
+ if( ret != SUCCESS ) {
+ std::cerr << "Error updating output number of nonzeroes: "
+ << toString( ret ) << "\n";
+ }
+ } else if( ret == ILLEGAL ) {
+#ifdef _BSP1D_IO_DEBUG
+ std::cout << "\t delegate returns ILLEGAL, clearing output\n";
+#endif
+ const RC clear_rc = clear( C );
+ if( clear_rc != SUCCESS ) {
+ ret = PANIC;
+ }
+ } else {
+ if( ret != PANIC ) {
+ std::cerr << "Warning: unexpected error code in grb::set( matrix, mask, "
+ << "value ) (BSP1D). Please submit a bug report.\n";
+ }
+ assert( ret == PANIC );
+ }
+ }
+
+#ifdef _BSP1D_IO_DEBUG
+ std::cout << "\t done; returning " << toString( ret ) << "\n";
+#endif
+
+ // done
+ return ret;
+ }
+
+ /**
+ * The implementation can trivially rely on the final backend, however, the
+ * capacity or nonzero count of the output can in some cases differ. The below
+ * implementation mostly deals with that logic.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename DataType, typename RIT1, typename CIT1, typename NIT1,
+ typename MaskType, typename RIT2, typename CIT2, typename NIT2,
+ typename ValueType = DataType, typename RIT3, typename CIT3, typename NIT3
+ >
+ RC set(
+ Matrix< DataType, BSP1D, RIT1, CIT1, NIT1 > &C,
+ const Matrix< MaskType, BSP1D, RIT2, CIT2, NIT2 > &mask,
+ const Matrix< ValueType, BSP1D, RIT3, CIT3, NIT3 > &A,
+ const Phase &phase = EXECUTE,
+ const typename std::enable_if<
+ !grb::is_object< DataType >::value &&
+ !grb::is_object< ValueType >::value &&
+ !grb::is_object< MaskType >::value
+ >::type * const = nullptr
+ ) noexcept {
+ // static checks
+ NO_CAST_ASSERT( ( !(descr & descriptors::no_casting) ||
+ std::is_same< ValueType, DataType >::value
+ ), "grb::set( matrix, mask, matrix ) (BSP1D)",
+ "called with non-matching value types"
+ );
+ NO_CAST_ASSERT(
+ ( !(descr & descriptors::no_casting) ||
+ std::is_same< MaskType, bool >::value ),
+ "grb::set( matrix, mask, matrix ) (BSP1D)",
+ "called with non-Boolean mask value type"
+ );
+ static_assert( !( (descr & descriptors::structural) &&
+ (descr & descriptors::invert_mask)
+ ), "grb::set( matrix, mask, matrix ) (BSP1D): Primitives with matrix "
+ "outputs may not employ structurally inverted masking"
+ );
+#ifdef _BSP1D_IO_DEBUG
+ std::cout << "Called grb::set( matrix, mask, matrix ) (BSP1D)\n";
+#endif
+ const size_t m = nrows( C );
+ const size_t n = ncols( C );
+
+ // dynamic checks (I)
+ if( m != nrows( A ) || n != ncols( A ) ) {
+ return MISMATCH;
+ }
+
+ // catch trivial cases
if( m == 0 || n == 0 ) { return SUCCESS; }
+ if( nrows( mask ) == 0 || ncols( mask ) == 0 ) {
+ return set< descr >( C, A, phase );
+ }
// dynamic checks (II)
- if( nrows( mask ) == 0 || ncols( mask ) == 0 ) {
- return ILLEGAL;
+ if( m != nrows( mask ) || n != ncols( mask ) ) {
+ return MISMATCH;
}
#ifdef _BSP1D_IO_DEBUG
@@ -1000,13 +1145,16 @@ namespace grb {
// not. Processes with empty local matrices will not delegate (no-op).
{
auto &local_C = internal::getLocal( C );
+ const auto &local_A = internal::getLocal( A );
const auto &local_mask = internal::getLocal( mask );
const size_t local_m = nrows( local_C );
const size_t local_n = ncols( local_C );
assert( local_m == nrows( local_mask ) );
assert( local_n == ncols( local_mask ) );
+ assert( local_m == nrows( local_A ) );
+ assert( local_n == ncols( local_A ) );
if( local_m > 0 && local_n > 0 ) {
- ret = set< descr >( local_C, local_mask, val, phase );
+ ret = set< descr >( local_C, local_mask, local_A, phase );
}
}
@@ -1625,8 +1773,9 @@ namespace grb {
// a pipeline depth of 2 is sufficient.
constexpr size_t iteration_overlaps = 2;
- const std::unique_ptr< size_t > first_nnz_per_thread(
- new size_t[ num_threads * iteration_overlaps ]()
+ const std::unique_ptr< size_t, void(*)(size_t * const) > first_nnz_per_thread(
+ new size_t[ num_threads * iteration_overlaps ](),
+ [](size_t * const array){ delete [] array; }
);
size_t * const first_nnz_per_thread_ptr = first_nnz_per_thread.get();
outgoing.resize( data.P );
diff --git a/include/graphblas/descriptors.hpp b/include/graphblas/descriptors.hpp
index cd0a5ab25..2c0a793ae 100644
--- a/include/graphblas/descriptors.hpp
+++ b/include/graphblas/descriptors.hpp
@@ -62,7 +62,20 @@ namespace grb {
*/
static constexpr Descriptor no_operation = 0;
- /** Inverts the mask prior to applying it. */
+ /**
+ * Inverts the mask prior to applying it.
+ *
+ * Applying this descriptor to a sparse mask may still only generate output
+ * where the mask has elements. To decide whether an output will be generated
+ * at a given element in the mask, its value will be inverted.
+ *
+ * If instead the structural complement is to be taken as a mask, this
+ * descriptor must be combined with #grb::descriptors::structural.
+ * ALP/GraphBLAS forbids taking the structural inverse of matrix masks
+ * (because then either the output matrix or the mask matrix has
+ * \f$ \mathcal{mn} \f$ values, which defeats any useful application of
+ * GraphBLAS as this signifies one of the containers is, in fact, not sparse).
+ */
static constexpr Descriptor invert_mask = 1;
/**
@@ -98,6 +111,11 @@ namespace grb {
* i-th index, regardless of how that value evaluates. It evaluates false
* if there was no value assigned.
*
+ * These semantics are inverted when this descriptor is combined with
+ * #grb::descriptors::invert_mask: in that case, the mask evaluates true at
+ * index \f$ i \f$ if the mask had no value at that index; and evaluates false
+ * otherwise.
+ *
* @see structural_complement
*/
static constexpr Descriptor structural = 8;
@@ -113,6 +131,10 @@ namespace grb {
* This ignores the actual values of the mask argument. The i-th element of
* the mask now evaluates true if the mask has \em no value assigned to its
* i-th index, and evaluates false otherwise.
+ *
+ * The application of this descriptor is forbidden for matrix mask arguments,
+ * as otherwise either the output or the mask is dense or is (too) close to
+ * being dense.
*/
static constexpr Descriptor structural_complement = structural | invert_mask;
diff --git a/include/graphblas/hyperdags/hyperdags.hpp b/include/graphblas/hyperdags/hyperdags.hpp
index 2ae951565..b4a650c0e 100644
--- a/include/graphblas/hyperdags/hyperdags.hpp
+++ b/include/graphblas/hyperdags/hyperdags.hpp
@@ -381,6 +381,8 @@ namespace grb {
SET_MATRIX_MATRIX_INPUT2,
+ SET_MATRIX_MATRIX_MASKED,
+
MXM_MATRIX_MATRIX_MATRIX_SEMIRING,
MXM_MATRIX_MATRIX_MATRIX_MONOID,
@@ -503,7 +505,7 @@ namespace grb {
};
/** \internal How many operation vertex types exist. */
- const constexpr size_t numOperationVertexTypes = 111;
+ const constexpr size_t numOperationVertexTypes = 112;
/** \internal An array of all operation vertex types. */
const constexpr enum OperationVertexType
diff --git a/include/graphblas/hyperdags/io.hpp b/include/graphblas/hyperdags/io.hpp
index 4ec81917b..99711f0bb 100644
--- a/include/graphblas/hyperdags/io.hpp
+++ b/include/graphblas/hyperdags/io.hpp
@@ -28,6 +28,10 @@
#include
+#ifdef _DEBUG
+ #define _DEBUG_HYPERDAGS_IO
+#endif
+
namespace grb {
@@ -440,6 +444,110 @@ namespace grb {
return ret;
}
+ /**
+ * This function inherits the performance semantics of the underlying backend.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename OutputType, typename MaskType, typename InputType,
+ typename RIT1, typename CIT1, typename NIT1,
+ typename RIT2, typename CIT2, typename NIT2,
+ typename RIT3, typename CIT3, typename NIT3
+ >
+ RC set(
+ Matrix< OutputType, hyperdags, RIT1, CIT1, NIT1 > &C,
+ const Matrix< MaskType, hyperdags, RIT2, CIT2, NIT2 > &M,
+ const Matrix< InputType, hyperdags, RIT3, CIT3, NIT3 > &A,
+ const Phase &phase = EXECUTE
+ ) {
+#ifdef _DEBUG_HYPERDAGS_IO
+ std::cout << "Entering set(matrix, matrix, matrix), hyperdags backend\n";
+#endif
+ // first, check for dynamic errors
+ const size_t m = nrows( C );
+ const size_t n = ncols( C );
+ if( m != nrows( A ) || n != ncols( A ) ) {
+#ifdef _DEBUG_HYPERDAGS_IO
+ std::cerr << "\t set( matrix, matrix, matrix ): dimension mismatch (I)\n";
+#endif
+ return MISMATCH;
+ }
+ if( m != nrows( M ) && nrows( M ) != 0 ) {
+#ifdef _DEBUG_HYPERDAGS_IO
+ std::cerr << "\t set( matrix, matrix, matrix ): dimension mismatch (II)\n";
+#endif
+ return MISMATCH;
+ }
+ if( n != ncols( M ) && ncols( M ) != 0 ) {
+#ifdef _DEBUG_HYPERDAGS_IO
+ std::cerr << "\t set( matrix, matrix, matrix ): dimension mismatch (III)\n";
+#endif
+ return MISMATCH;
+ }
+ // second, check for trivial op
+ if( m == 0 || n == 0 ) {
+#ifdef _DEBUG_HYPERDAGS_IO
+ std::cerr << "\t WARNING set( matrix, matrix, matrix ), hyperdags: "
+ << "trivial op detected (all containers empty). No operation will be "
+ << "recorded\n";
+#endif
+ return SUCCESS;
+ }
+ // third, execute
+#ifdef _DEBUG_HYPERDAGS_IO
+ std::cerr << "\t set( matrix, matrix, matrix ), hyperdags: forwarding to "
+ << "execution backend\n";
+#endif
+ const RC ret = set< descr >(
+ internal::getMatrix( C ), internal::getMatrix( M ),
+ internal::getMatrix( A ), phase
+ );
+ // fourth, forward any errors
+ if( ret != SUCCESS ) { return ret; }
+ if( phase != EXECUTE ) { return ret; }
+ // fifth, record operation
+#ifdef _DEBUG_HYPERDAGS_IO
+ std::cerr << "\t set( matrix, matrix, matrix ), hyperdags: execution had no "
+ << "error; recording operation\n";
+#endif
+ std::array< const void *, 0 > sourcesP{};
+ std::array< uintptr_t, 1 > destinations{ getID( internal::getMatrix(C) ) };
+ if( nrows( M ) == 0 || ncols( M ) == 0 ) {
+#ifdef _DEBUG_HYPERDAGS_IO
+ std::cerr << "\t WARNING set( matrix, matrix, matrix ), hyperdags: "
+ << "empty mask detected; hyperDAG will not (cannot) record it\n";
+#endif
+ std::array< uintptr_t, 2 > sourcesC{
+ getID( internal::getMatrix(A) ),
+ getID( internal::getMatrix(C) )
+ };
+ internal::hyperdags::generator.addOperation(
+ internal::hyperdags::SET_MATRIX_MATRIX_MASKED,
+ sourcesP.begin(), sourcesP.end(),
+ sourcesC.begin(), sourcesC.end(),
+ destinations.begin(), destinations.end()
+ );
+ } else {
+ std::array< uintptr_t, 3 > sourcesC{
+ getID( internal::getMatrix(A) ),
+ getID( internal::getMatrix(M) ),
+ getID( internal::getMatrix(C) )
+ };
+ internal::hyperdags::generator.addOperation(
+ internal::hyperdags::SET_MATRIX_MATRIX_MASKED,
+ sourcesP.begin(), sourcesP.end(),
+ sourcesC.begin(), sourcesC.end(),
+ destinations.begin(), destinations.end()
+ );
+ }
+
+ // done
+#ifdef _DEBUG_HYPERDAGS_IO
+ std::cerr << "\t set( matrix, matrix, matrix ), hyperdags: exiting\n";
+#endif
+ return ret;
+ }
+
/**
* This function inherits the performance semantics of the underlying backend.
*/
diff --git a/include/graphblas/nonblocking/io.hpp b/include/graphblas/nonblocking/io.hpp
index 0831fc564..963846251 100644
--- a/include/graphblas/nonblocking/io.hpp
+++ b/include/graphblas/nonblocking/io.hpp
@@ -1227,6 +1227,42 @@ namespace grb {
internal::getRefMatrix( C ), internal::getRefMatrix( A ), val, phase );
}
+
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename OutputType, typename MaskType, typename InputType,
+ typename RIT1, typename CIT1, typename NIT1,
+ typename RIT2, typename CIT2, typename NIT2,
+ typename RIT3, typename CIT3, typename NIT3
+ >
+ RC set(
+ Matrix< OutputType, nonblocking, RIT1, CIT1, NIT1 > &C,
+ const Matrix< MaskType, nonblocking, RIT2, CIT2, NIT2 > &M,
+ const Matrix< InputType, nonblocking, RIT3, CIT3, NIT3 > &A,
+ const Phase &phase = EXECUTE
+ ) noexcept {
+ if( internal::NONBLOCKING::warn_if_not_native &&
+ config::PIPELINE::warn_if_not_native
+ ) {
+ std::cerr << "Warning: masked set (nonblocking) currently delegates "
+ << "to a blocking implementation.\n"
+ << " Further similar such warnings will be suppressed.\n";
+ internal::NONBLOCKING::warn_if_not_native = false;
+ }
+
+ // nonblocking execution is not supported
+ // first, execute any computation that is not completed
+ internal::le.execution();
+
+ // second, delegate to the reference backend
+ return set< descr >(
+ internal::getRefMatrix( C ),
+ internal::getRefMatrix( M ),
+ internal::getRefMatrix( A ),
+ phase
+ );
+ }
+
template<
Descriptor descr = descriptors::no_operation,
typename InputType,
diff --git a/include/graphblas/phase.hpp b/include/graphblas/phase.hpp
index 1f12b8f05..044395ac9 100644
--- a/include/graphblas/phase.hpp
+++ b/include/graphblas/phase.hpp
@@ -236,7 +236,7 @@ namespace grb {
* execute phase.
*
* If, instead, the output container capacity was found to be insufficient,
- * then the requested operation may return #grb::FAILED, in which case the
+ * then the requested operation may return #grb::ILLEGAL, in which case the
* contents of output containers shall be cleared.
*
* \note That on failure a primitive called using the execute phase may
diff --git a/include/graphblas/reference/blas3.hpp b/include/graphblas/reference/blas3.hpp
index 054d945dd..3cffb588a 100644
--- a/include/graphblas/reference/blas3.hpp
+++ b/include/graphblas/reference/blas3.hpp
@@ -579,432 +579,6 @@ namespace grb {
return SUCCESS;
}
-#ifndef _H_GRB_REFERENCE_OMP_BLAS3
- /**
- * Meta-data for global buffer management for use with #grb::mxm.
- *
- * This meta-data is the same for both the sequential (reference) and shared-
- * memory parallel (reference_omp) backends.
- *
- * This class contains all meta-data necessary to interpret the global buffer
- * as an array of sparse accumulators (SPAs). The length of the array is given
- * by a call to #threads(), minus one. It is called that since a call to
- * #threads() retrieves how many threads can be used to process the call to
- * #grb::mxm.
- *
- * Each SPA has the layout (bitarray, stack, valueArray). These are packed in a
- * padded byte array, such that each bit array, stack, and value array is
- * aligned on sizeof(int) bytes.
- *
- * @tparam NIT The nonzero index type.
- * @tparam ValueType The output matrix value type.
- */
- template< typename NIT, typename ValueType >
- class MXM_BufferMetaData {
-
- static_assert( sizeof(NIT) % sizeof(int) == 0, "Unsupported type for NIT; "
- "please submit a bug report!" );
-
- private:
-
- /** The size of the offset array */
- size_t m;
-
- /** The size of the SPA */
- size_t n;
-
- /** The number of threads supported during a call to #grb::mxm */
- size_t nthreads;
-
- /** The initial buffer offset */
- size_t bufferOffset;
-
- /** The size of a single SPA, including bytes needed for padding */
- size_t paddedSPASize;
-
- /** The number of bytes to pad the SPA array with */
- size_t arrayShift;
-
- /** The number of bytes to pad the SPA stack with */
- size_t stackShift;
-
- /**
- * Given a number of used bytes of the buffer, calculate the available
- * remainder buffer and return it.
- *
- * @param[in] osize The size of the buffer (in bytes) that is already
- * in use.
- * @param[out] remainder Pointer to any remainder buffer.
- * @param[out] rsize The size of the remainder buffer.
- *
- * If no buffer space is left, \a remainder will be set to nullptr
- * and \a size to 0.
- */
- void retrieveRemainderBuffer(
- const size_t osize,
- void * &remainder, size_t &rsize
- ) const noexcept {
- const size_t size = internal::template getCurrentBufferSize< char >();
- char * rem = internal::template getReferenceBuffer< char >( size );
- size_t rsize_calc = size - osize;
- rem += osize;
- const size_t mod = reinterpret_cast< uintptr_t >(rem) % sizeof(int);
- if( mod ) {
- const size_t shift = sizeof(int) - mod;
- if( rsize_calc >= shift ) {
- rsize_calc -= shift;
- rem += rsize;
- } else {
- rsize_calc = 0;
- rem = nullptr;
- }
- }
- assert( !(reinterpret_cast< uintptr_t >(rem) % sizeof(int)) );
- // write out
- remainder = rem;
- rsize = rsize_calc;
- }
-
-
- public:
-
- /**
- * Base constructor.
- *
- * @param[in] _m The length of the offset array.
- * @param[in] _n The length of the SPA.
- * @param[in] max_threads The maximum number of threads.
- *
- * \note \a max_threads is a separate input since there might be a need to
- * cap the maximum number of threads used based on some analytic
- * performance model. Rather than putting such a performance model
- * within this class, we make it an obligatory input parameter
- * instead.
- *
- * \note It is always valid to pass config::OMP::threads().
- *
- * \note This class \em will, however, cap the number of threads returned
- * to \a _n.
- */
- MXM_BufferMetaData(
- const size_t _m, const size_t _n,
- const size_t max_threads
- ) : m( _m ), n( _n ), arrayShift( 0 ), stackShift( 0 ) {
- #ifdef _DEBUG_REFERENCE_BLAS3
- #pragma omp critical
- std::cout << "\t\t\t computing padded buffer size for a SPA of length "
- << n << " while leaving space for an additional offset buffer of length "
- << std::max( m, n ) << "...\n";
- #endif
- // compute bufferOffset
- bufferOffset = (std::max( m, n ) + 1) * sizeof( NIT );
-
- // compute value buffer size
- const size_t valBufSize = n * sizeof( ValueType );
-
- #ifdef _DEBUG_REFERENCE_BLAS3
- std::cout << "\t\t\t\t bit-array size has byte-size " <<
- internal::Coordinates< reference >::arraySize( n ) << "\n";
- std::cout << "\t\t\t\t stack has byte-size " <<
- internal::Coordinates< reference >::stackSize( n ) << "\n";
- std::cout << "\t\t\t\t value buffer has byte-size " << valBufSize << "\n";
- #endif
-
- // compute paddedSPASize
- paddedSPASize =
- internal::Coordinates< reference >::arraySize( n ) +
- internal::Coordinates< reference >::stackSize( n ) +
- valBufSize;
- size_t shift =
- internal::Coordinates< reference >::arraySize( n ) % sizeof(int);
- if( shift != 0 ) {
- arrayShift = sizeof(int) - shift;
- paddedSPASize += arrayShift;
- }
- shift = internal::Coordinates< reference >::stackSize( n ) % sizeof(int);
- if( shift != 0 ) {
- stackShift = sizeof(int) - shift;
- paddedSPASize += stackShift;
- }
- shift = valBufSize % sizeof(int);
- if( shift != 0 ) {
- paddedSPASize += (sizeof(int) - shift);
- }
-
- // pad bufferOffset
- shift = bufferOffset % sizeof(int);
- if( shift != 0 ) {
- bufferOffset += (sizeof(int) - shift);
- }
-
- // compute free buffer size
- const size_t freeBufferSize = internal::getCurrentBufferSize< char >() -
- bufferOffset;
-
- // compute max number of threads
- nthreads = 1 + freeBufferSize / paddedSPASize;
- #ifdef _DEBUG_REFERENCE_BLAS3
- #pragma omp critical
- std::cout << "\t\t\t free buffer size: " << freeBufferSize
- << ", (padded) SPA size: " << paddedSPASize
- << " -> supported #threads: " << nthreads << ". "
- << " The shifts for the bit-array and the stack are " << arrayShift
- << ", respectively, " << stackShift << "."
- << "\n";
- #endif
- // cap the final number of selected threads
- if( nthreads > max_threads ) {
- nthreads = max_threads;
- }
- if( nthreads > n ) {
- nthreads = n;
- }
- }
-
- /** @returns The maximum number of supported threads during #grb::mxm */
- size_t threads() const noexcept {
- return nthreads;
- }
-
- /**
- * Requests and returns a global buffer required for a thread-local SPA.
- *
- * @param[in] t The thread ID. Must be larger than 0.
- *
- * \note Thread 0 employs the SPA allocated with the output matrix.
- *
- * @returns Pointer into the global buffer starting at the area reserved for
- * the SPA of thread \a t.
- */
- char * getSPABuffers( size_t t ) const noexcept {
- assert( t > 0 );
- (void) --t;
- char * raw = internal::template getReferenceBuffer< char >(
- bufferOffset + nthreads * paddedSPASize );
- assert( reinterpret_cast< uintptr_t >(raw) % sizeof(int) == 0 );
- raw += bufferOffset;
- assert( reinterpret_cast< uintptr_t >(raw) % sizeof(int) == 0 );
- raw += t * paddedSPASize;
- return raw;
- }
-
- /**
- * Retrieves the column offset buffer.
- *
- * @param[out] remainder Returns any remainder buffer beyond that of the row
- * offset buffer.
- * @param[out] rsize The remainder buffer size \a remainder points to.
- *
- * If \a remainder is not a nullptr then neither should \a rsize,
- * and vice versa.
- *
- * Retrieving any remainder buffer is optional. The default is to not ask
- * for them.
- *
- * \warning If all buffer memory is used for the column offsets, it may be
- * that \a remainder equals nullptr and rsize
- * zero.
- *
- * \warning This buffer is only guaranteed exclusive if only the retrieved
- * column buffer is used. In particular, if also requesting (and
- * using) SPA buffers, the remainder buffer area is shared with
- * those SPA buffers, and data races are likely to occur. In other
- * words: be very careful with any use of these remainder buffers.
- *
- * @returns The column offset buffer.
- *
- * \warning This buffer overlaps with the CRS offset buffer. The caller
- * must ensure to only ever use one at a time.
- */
- NIT * getColOffsetBuffer(
- void * * const remainder = nullptr,
- size_t * const rsize = nullptr
- ) const noexcept {
- NIT * const ret = internal::template getReferenceBuffer< NIT >( n + 1 );
- if( remainder != nullptr || rsize != nullptr ) {
- assert( remainder != nullptr && rsize != nullptr );
- retrieveRemainderBuffer( (m + 1) * sizeof(NIT), *remainder, *rsize );
- }
- return ret;
- }
-
- /**
- * Retrieves the row offset buffer.
- *
- * @param[out] remainder Returns any remainder buffer beyond that of the row
- * offset buffer.
- * @param[out] rsize The remainder buffer size \a remainder points to.
- *
- * If \a remainder is not a nullptr then neither should \a rsize,
- * and vice versa.
- *
- * Retrieving any remainder buffer is optional. The default is to not ask
- * for them.
- *
- * \warning If all buffer memory is used for the row offsets, it may be that
- * \a remainder equals nullptr and rsize zero.
- *
- * \warning This buffer is only guaranteed exclusive if only the retrieved
- * row buffer is used. In particular, if also requesting (and
- * using) SPA buffers, the remainder buffer area is shared with
- * those SPA buffers, and data races are likely to occur. In other
- * words: be very careful with any use of these remainder buffers.
- *
- * @returns The row offset buffer.
- *
- * \warning This buffer overlaps with the CCS offset buffer. The caller
- * must ensure to only ever use one at a time.
- */
- NIT * getRowOffsetBuffer(
- void * * const remainder = nullptr,
- size_t * const rsize = nullptr
- ) const noexcept {
- NIT * const ret = internal::template getReferenceBuffer< NIT >( m + 1 );
- if( remainder != nullptr || rsize != nullptr ) {
- assert( remainder != nullptr && rsize != nullptr );
- retrieveRemainderBuffer( (m + 1) * sizeof(NIT), *remainder, *rsize );
- }
- return ret;
- }
-
- /**
- * Shifts a pointer into the global buffer by the bit-array size and its
- * padding.
- *
- * @param[in,out] raw On input: an aligned pointer into the global buffer.
- * On output: an aligned pointer past the bit-array
- * position.
- */
- void applyArrayShift( char * &raw ) const noexcept {
- const size_t totalShift =
- internal::Coordinates< reference >::arraySize( n ) +
- arrayShift;
- #ifdef _DEBUG_REFERENCE_BLAS3
- std::cout << "\t\t\t shifting input pointer with "
- << internal::Coordinates< reference >::arraySize( n ) << " + "
- << arrayShift << " = " << totalShift << "bytes \n";
- #endif
- raw += totalShift;
- }
-
- /**
- * Shifts a pointer into the global buffer by the stack size and its
- * padding.
- *
- * @param[in,out] raw On input: an aligned pointer into the global buffer.
- * On output: an aligned pointer past the stack position.
- */
- void applyStackShift( char * &raw ) const noexcept {
- const size_t totalShift =
- internal::Coordinates< reference >::stackSize( n ) +
- stackShift;
- #ifdef _DEBUG_REFERENCE_BLAS3
- std::cout << "\t\t\t shifting input pointer with "
- << internal::Coordinates< reference >::arraySize( n ) << " + "
- << stackShift << " = " << totalShift << "bytes \n";
- #endif
- raw += totalShift;
- }
-
- };
-#endif
-
- /**
- * Retrieves the SPA buffers for the calling thread.
- *
- * \warning This function must be called from within an OpenMP parallel
- * section.
- *
- * @param[out] arr Where the bit-array may be located.
- * @param[out] buf Where the stack may be located.
- * @param[out] valbuf Where the value buffer may be located.
- *
- * All above pointers are aligned on sizeof(int) bytes.
- *
- * @param[in] md Meta-data for global buffer management.
- * @param[in] C The output matrix.
- *
- * One thread uses the buffers pre-allocated with the matrix \a C, thus
- * ensuring at least one thread may perform the #grb::mxm. Any remainder
- * threads can only help process the #grb::mxm if there is enough global
- * buffer memory available.
- *
- *
- * \note The global memory has size \f$ \Omega( \mathit{nz} ) \f$, which may
- * be several factors (or even asymptotically greater than)
- * \f$ \max\{ m, n \} \f$.
- *
- * \note In case the application stores multiple matrices, the global buffer
- * may additionally be greater than the above note indicates if at least
- * one of the other matrices is significantly (or asymptotically) larger
- * than the one involved with the #grb::mxm.
- */
- template<
- typename OutputType,
- typename RIT, typename CIT, typename NIT
- >
- void mxm_ompPar_getSPABuffers(
- char * &arr, char * &buf, OutputType * &valbuf,
- const struct MXM_BufferMetaData< NIT, OutputType > &md,
- Matrix< OutputType, reference, RIT, CIT, NIT > &C
- ) {
-#ifdef _H_GRB_REFERENCE_OMP_BLAS3
- // other threads use the global buffer to create additional SPAs
- {
- const size_t t = config::OMP::current_thread_ID();
- #ifndef NDEBUG
- const size_t T = config::OMP::current_threads();
- assert( t < T );
- #endif
- if( t > 0 ) {
- #ifdef _DEBUG_REFERENCE_BLAS3
- #pragma omp critical
- std::cout << "\t Thread " << t << " gets buffers from global buffer\n";
- #endif
- char * rawBuffer = md.getSPABuffers( t );
- assert( reinterpret_cast< uintptr_t >(rawBuffer) % sizeof(int) == 0 );
- arr = rawBuffer;
- #ifdef _DEBUG_REFERENCE_BLAS3
- #pragma omp critical
- #endif
- md.applyArrayShift( rawBuffer );
- assert( reinterpret_cast< uintptr_t >(rawBuffer) % sizeof(int) == 0 );
- buf = rawBuffer;
- #ifdef _DEBUG_REFERENCE_BLAS3
- #pragma omp critical
- #endif
- md.applyStackShift( rawBuffer );
- assert( reinterpret_cast< uintptr_t >(rawBuffer) % sizeof(int) == 0 );
- assert( buf != arr );
- valbuf = reinterpret_cast< OutputType * >(rawBuffer);
- assert( static_cast< void * >(valbuf) != static_cast< void * >(buf) );
- } else {
- #ifdef _DEBUG_REFERENCE_BLAS3
- #pragma omp critical
- std::cout << "\t Thread " << t << " gets buffers from matrix storage\n";
- #endif
- // one thread uses the standard matrix buffer
- internal::getMatrixBuffers( arr, buf, valbuf, 1, C );
- }
- #ifdef _DEBUG_REFERENCE_BLAS3
- #pragma omp critical
- {
- std::cout << "\t Thread " << t << " has SPA array @ "
- << static_cast< void * >( arr ) << " and SPA stack @ "
- << static_cast< void * >( buf ) << " and SPA values @ "
- << static_cast< void * >( valbuf ) << "\n";
- }
- #endif
- }
-#else
- #ifdef _DEBUG_REFERENCE_BLAS3
- std::cout << "\t Reference backend gets buffers from global buffer\n";
- #endif
- internal::getMatrixBuffers( arr, buf, valbuf, 1, C );
- (void) md;
-#endif
- }
-
/**
* Given a computed new row_start array, moves the old index and value arrays
* to new offsets. This leaves precisely enough space for the mxm algorithm to
@@ -1215,6 +789,7 @@ namespace grb {
// a basic analytic model based on the number of nonzeroes
size_t max_threads = config::OMP::threads();
+ assert( max_threads > 0 );
{
size_t target_nnz = 0;
if( phase == EXECUTE ) {
@@ -1226,7 +801,7 @@ namespace grb {
const size_t nnz_based_nthreads =
target_nnz / config::CACHE_LINE_SIZE::value();
if( nnz_based_nthreads < max_threads ) {
- max_threads = nnz_based_nthreads;
+ max_threads = nnz_based_nthreads > 0 ? nnz_based_nthreads : 1;
}
#ifdef _DEBUG_REFERENCE_BLAS3
std::cout << "\t simple analytic model selects max threads of "
@@ -1234,7 +809,7 @@ namespace grb {
#endif
}
- MXM_BufferMetaData< NIT, OutputType > bufferMD( m, n, max_threads );
+ SPA_BufferMetaData< NIT, OutputType > bufferMD( m, n, max_threads );
#ifdef _H_GRB_REFERENCE_OMP_BLAS3
// derive number of threads
@@ -1255,7 +830,7 @@ namespace grb {
char * arr = nullptr;
char * buf = nullptr;
OutputType * valbuf = nullptr;
- mxm_ompPar_getSPABuffers( arr, buf, valbuf, bufferMD, C );
+ spa_ompPar_getBuffers( arr, buf, valbuf, bufferMD, C );
// do count
size_t local_nzc;
@@ -1296,7 +871,7 @@ namespace grb {
char * arr = nullptr;
char * buf = nullptr;
OutputType * valbuf = nullptr;
- mxm_ompPar_getSPABuffers( arr, buf, valbuf, bufferMD, C );
+ spa_ompPar_getBuffers( arr, buf, valbuf, bufferMD, C );
#ifdef _DEBUG_REFERENCE_BLAS3
#ifdef _H_GRB_REFERENCE_OMP_BLAS3
#pragma omp critical
diff --git a/include/graphblas/reference/compressed_storage.hpp b/include/graphblas/reference/compressed_storage.hpp
index 95b67a903..2d40d18cc 100644
--- a/include/graphblas/reference/compressed_storage.hpp
+++ b/include/graphblas/reference/compressed_storage.hpp
@@ -1156,7 +1156,7 @@ namespace grb {
{}
/** Move constructor. */
- Compressed_Storage< void, IND, SIZE >( SelfType &&other ) {
+ Compressed_Storage( SelfType &&other ) {
moveFromOther( other );
}
diff --git a/include/graphblas/reference/io.hpp b/include/graphblas/reference/io.hpp
index f4f1b0709..b4af46eea 100644
--- a/include/graphblas/reference/io.hpp
+++ b/include/graphblas/reference/io.hpp
@@ -951,14 +951,15 @@ namespace grb {
);
static_assert(
( !(descr & descriptors::no_casting) ||
- ( A_is_mask && std::is_same< InputType2, OutputType >::value ) ),
+ ( A_is_mask && std::is_same< InputType2, OutputType >::value ) ||
+ ( !A_is_mask && std::is_same< InputType1, OutputType >::value ) ),
"grb::internal::set_copy called with non-matching value types. This is an "
"internal error. Please submit a bug report."
);
static_assert(
- !(descr & descriptors::invert_mask), "internal::grb::set_copy called with "
- "the invert_mask descriptor. This is an internal error; please submit a "
- "bug report."
+ !A_is_mask || !(descr & descriptors::invert_mask),
+ "internal::grb::set_copy called with the invert_mask descriptor. This is "
+ "an internal error; please submit a bug report."
);
// run-time checks
@@ -2028,6 +2029,274 @@ namespace grb {
}
}
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename OutputType, typename MaskType, typename InputType,
+ typename RIT1, typename CIT1, typename NIT1,
+ typename RIT2, typename CIT2, typename NIT2,
+ typename RIT3, typename CIT3, typename NIT3
+ >
+ RC set(
+ Matrix< OutputType, reference, RIT1, CIT1, NIT1 > &C,
+ const Matrix< MaskType, reference, RIT2, CIT2, NIT2 > &M,
+ const Matrix< InputType, reference, RIT3, CIT3, NIT3 > &A,
+ const Phase &phase = EXECUTE
+ ) noexcept {
+ // static checks
+ static_assert(
+ !std::is_void< InputType >::value ||
+ std::is_void< OutputType >::value, "grb::set( masked set to matrix ): "
+ "cannot have a pattern matrix as input unless the output is also a pattern "
+ "matrix"
+ );
+ static_assert(
+ std::is_convertible< InputType, OutputType >::value ||
+ std::is_void< OutputType >::value,
+ "grb::set (masked set to matrix): input type cannot be "
+ "converted to output type"
+ );
+ static_assert(
+ !(descr & descriptors::structural && (descr & descriptors::invert_mask)),
+ "grb::set (masked set to matrix) may not be called with both the structural "
+ "and invert_mask descriptors set"
+ );
+ NO_CAST_ASSERT( ( !(descr & descriptors::no_casting) ||
+ std::is_same< InputType, OutputType >::value
+ ), "grb::set",
+ "called with non-matching value types"
+ );
+
+ // dynamic checks
+#ifdef _DEBUG_REFERENCE_IO
+ std::cout << "Called grb::set (matrix-to-matrix-masked, reference)\n";
+#endif
+ assert( phase != TRY );
+ const size_t nrows = grb::nrows( C );
+ const size_t ncols = grb::ncols( C );
+ const size_t m = grb::nrows( M );
+ const size_t n = grb::ncols( M );
+
+ // check for trivial dispatch first (otherwise the below checks fail when they
+ // should not)
+ if( m == 0 || n == 0 ) {
+#ifdef _DEBUG_REFERENCE_IO
+ std::cout << "\t delegating to unmasked matrix-to-matrix set, reference\n";
+#endif
+ // If the mask is empty, ignore it
+ return set< descr >( C, A, phase );
+ }
+
+ // dynamic checks, continued
+ if( nrows != grb::nrows( A ) || nrows != m ) {
+ return MISMATCH;
+ }
+
+ if( ncols != grb::ncols( A ) || ncols != n ) {
+ return MISMATCH;
+ }
+
+ // go for implementation, preliminaries:
+ size_t nzc = 0;
+ const auto &A_raw = internal::getCRS( A );
+ const auto &mask_raw = internal::getCRS( M );
+ // we now have one (guaranteed) SPA, which is mask_coors. We now are going to
+ // check how many more SPAs ideally we would like (for reference_omp), and
+ // then go about trying to get those. If, finally, we get just this one SPA,
+ // we will go into this mostly-sequential code (essentially, big-Omega nrows):
+#ifdef _H_GRB_REFERENCE_OMP_IO
+ const size_t nnz_based_nthreads = std::max( config::OMP::threads(),
+ grb::nnz( A ) / config::CACHE_LINE_SIZE::value() );
+ grb::internal::SPA_BufferMetaData< NIT1, OutputType > bufferMD( m, n,
+ nnz_based_nthreads );
+ const size_t nthreads = bufferMD.threads();
+ std::cout << "\t set( matrix, matrix, matrix ) will use " << nthreads
+ << " threads\n";
+#else
+ const size_t nthreads = 1;
+#endif
+ if( nthreads == 1 ) {
+ char * arr = nullptr;
+ char * buf = nullptr;
+ OutputType * valbuf = nullptr;
+ internal::Coordinates< reference > coors;
+ internal::getMatrixBuffers( arr, buf, valbuf, 1, C );
+ coors.set( arr, false, buf, ncols );
+ for( size_t i = 0; i < nrows; ++i ) {
+ coors.clear();
+ for( auto k = mask_raw.col_start[ i ]; k < mask_raw.col_start[ i + 1 ]; ++k ) {
+ const auto k_col = mask_raw.row_index[ k ];
+ if( utils::interpretMatrixMask< descr, MaskType >( true, mask_raw.getValues(), k ) ) {
+ coors.assign( k_col );
+ }
+ }
+#ifdef _H_GRB_REFERENCE_OMP_IO
+ #pragma omp parallel for reduction( +: nzc ) \
+ schedule( dynamic, config::CACHE_LINE_SIZE::value() )
+#endif
+ for( auto k = A_raw.col_start[ i ]; k < A_raw.col_start[ i + 1 ]; ++k ) {
+ const auto k_col = A_raw.row_index[ k ];
+ if( coors.assigned( k_col ) ) {
+ (void) ++nzc;
+ }
+ }
+ }
+ } else {
+#ifdef _H_GRB_REFERENCE_OMP_IO
+ #pragma omp parallel num_threads( nthreads ) reduction( +: nzc )
+ {
+ // get thread-local buffers
+ size_t local_nz = 0;
+ char * arr = nullptr;
+ char * buf = nullptr;
+ OutputType * valbuf = nullptr;
+ internal::spa_ompPar_getBuffers( arr, buf, valbuf, bufferMD, C );
+ internal::Coordinates< reference > coors;
+ coors.set_seq( arr, false, buf, n );
+ // follow dynamic schedule since we cannot predict sparsity structure
+ #pragma omp for schedule( dynamic, config::CACHE_LINE_SIZE::value() )
+ for( size_t i = 0; i < nrows; ++i ) {
+ coors.clear();
+ for( auto k = mask_raw.col_start[ i ]; k < mask_raw.col_start[ i + 1 ]; ++k ) {
+ const auto k_col = mask_raw.row_index[ k ];
+ if( utils::interpretMatrixMask< descr, MaskType >( true, mask_raw.getValues(), k ) ) {
+ coors.assign( k_col );
+ }
+ }
+ for( auto k = A_raw.col_start[ i ]; k < A_raw.col_start[ i + 1 ]; ++k ) {
+ const auto k_col = A_raw.row_index[ k ];
+ if( coors.assigned( k_col ) ) {
+ (void) ++local_nz;
+ }
+ }
+ }
+ nzc += local_nz;
+ }
+#else
+ const bool code_path_should_not_be_reached = false;
+ std::cerr << "\t logic error in grb::set( matrix, matrix, matrix ): "
+ << "code path should not reach here. Please submit a bug report\n";
+ assert( code_path_should_not_be_reached );
+ #ifdef NDEBUG
+ (void) code_path_should_not_be_reached;
+ #endif
+#endif
+ }
+
+ // we now have a count. If we're in the resize phase that means we're done:
+ if( phase == RESIZE ) {
+ return resize( C, nzc );
+ }
+
+ // otherwise, we now compute the output. We start with checking capacity
+ assert( phase == EXECUTE );
+ if( capacity( C ) < nzc ) {
+#ifdef _DEBUG
+ std::cout << "\t insufficient capacity to complete "
+ "requested masked set matrix to matrix computation\n";
+#endif
+ const RC clear_rc = clear( C );
+ if( clear_rc != SUCCESS ) {
+ return PANIC;
+ } else {
+ return ILLEGAL;
+ }
+ }
+
+ // get output CRS and CCS structures
+ // TODO: check for crs_only descriptor
+ auto &CRS_raw = internal::getCRS( C );
+ auto &CCS_raw = internal::getCCS( C );
+ config::NonzeroIndexType * C_col_index = internal::template
+ getReferenceBuffer< typename config::NonzeroIndexType >( ncols + 1 );
+ CRS_raw.col_start[ 0 ] = 0;
+
+#ifdef _H_GRB_REFERENCE_OMP_IO
+ // TODO ALPify the below
+ #pragma omp parallel for simd
+#endif
+ for( size_t j = 0; j <= ncols; ++j ) {
+ CCS_raw.col_start[ j ] = 0;
+ C_col_index[ j ] = 0;
+ }
+
+ //TODO: revise the below, WIP
+ char * arr = nullptr;
+ char * buf = nullptr;
+ MaskType * valbuf = nullptr;
+ internal::Coordinates< reference > coors;
+ internal::getMatrixBuffers( arr, buf, valbuf, 1, M );
+ coors.set( arr, false, buf, ncols );
+
+ // do counting sort, phase 1 -- also this loop should employ the same
+ // parallelisation strategy during counting
+ nzc = 0;
+ for( size_t i = 0; i < nrows; ++i ) {
+ coors.clear();
+ for( auto k = mask_raw.col_start[ i ]; k < mask_raw.col_start[ i + 1 ]; ++k ) {
+ const auto k_col = mask_raw.row_index[ k ];
+ if( utils::interpretMask< descr, MaskType >( true, mask_raw.getValues(), k ) ) {
+ coors.assign( k_col );
+ }
+ }
+ for( auto k = A_raw.col_start[ i ]; k < A_raw.col_start[ i + 1 ]; ++k ) {
+ const auto k_col = A_raw.row_index[ k ];
+ if( coors.assigned( k_col ) ) {
+ (void) nzc++;
+ (void) (CCS_raw.col_start[ k_col + 1 ])++;
+ }
+ }
+ CRS_raw.col_start[ i + 1 ] = nzc;
+ }
+
+ // TODO this is a prefix sum -- use the OMP utility function here to
+ // parallelise it
+ for( size_t j = 1; j < ncols; ++j ) {
+ CCS_raw.col_start[ j + 1 ] += CCS_raw.col_start[ j ];
+ }
+
+ // do counting sort, phase 2 -- use previously computed CCS offset array to
+ // update CCS during the computational phase. Also this loop should employ
+ // the same (multiple-SPA) parallelisation strategy as above
+ nzc = 0;
+ for( size_t i = 0; i < nrows; ++i ) {
+ coors.clear();
+ for( auto k = mask_raw.col_start[ i ]; k < mask_raw.col_start[ i + 1 ]; ++k ) {
+ const auto k_col = mask_raw.row_index[ k ];
+ if( utils::interpretMatrixMask< descr, MaskType >(
+ true, mask_raw.getValues(), k )
+ ) {
+ coors.assign( k_col );
+ }
+ }
+ for( auto k = A_raw.col_start[ i ]; k < A_raw.col_start[ i + 1 ]; ++k ) {
+ const auto k_col = A_raw.row_index[ k ];
+ if( coors.assigned( k_col ) ) {
+ constexpr int zero = 0;
+ CRS_raw.row_index[ nzc ] = k_col;
+ CRS_raw.setValue( nzc, A_raw.getValue( k, zero ) );
+ const size_t CCS_index = C_col_index[ k_col ] + CCS_raw.col_start[ k_col ];
+ (void) C_col_index[ k_col ]++;
+ CCS_raw.row_index[ CCS_index ] = i;
+ CCS_raw.setValue( CCS_index, A_raw.getValue( k, zero ) );
+ (void) nzc++;
+ }
+ }
+ }
+#ifndef NDEBUG
+ #ifdef _H_GRB_REFERENCE_OMP_BLAS3
+ #pragma omp parallel schedule( static, config::CACHE_LINE_SIZE::value() )
+ #endif
+ for( size_t j = 0; j < ncols; ++j ) {
+ assert( CCS_raw.col_start[ j + 1 ] - CCS_raw.col_start[ j ] ==
+ C_col_index[ j ] );
+ }
+#endif
+ internal::setCurrentNonzeroes( C, nzc );
+
+ // done
+ return SUCCESS;
+ }
+
/**
* Ingests raw data into a GraphBLAS vector.
*
diff --git a/include/graphblas/reference/matrix.hpp b/include/graphblas/reference/matrix.hpp
index c1b3a365d..84873a22c 100644
--- a/include/graphblas/reference/matrix.hpp
+++ b/include/graphblas/reference/matrix.hpp
@@ -1283,7 +1283,9 @@ namespace grb {
"We hit here a configuration border case which the implementation does not "
"handle at present. Please submit a bug report."
);
- // compute and return
+ // compute and return the maximum of
+ // - row- and column-wise buffers. The added factor two is for padding
+ // - minimal buffer requirement for parallel buildMatrixUnique
return std::max( (m + n + 2) * globalBufferUnitSize,
#ifdef _H_GRB_REFERENCE_OMP_MATRIX
config::OMP::threads() * config::CACHE_LINE_SIZE::value() *
@@ -2364,7 +2366,434 @@ namespace grb {
);
return ret;
}
+
+ /**
+ * Meta-data for global buffer management for use with #grb::mxm and the
+ * matrix-matrix-matrix variant of #grb::set.
+ *
+ * This class contains all meta-data necessary to interpret the global buffer
+ * as an array of sparse accumulators (SPAs). The length of the array is given
+ * by a call to #threads(), minus one. It is called that since a call to
+ * #threads() retrieves how many threads can be used to process the call to
+ * #grb::mxm.
+ *
+ * Each SPA has the layout (bitarray, stack, valueArray). These are packed in a
+ * padded byte array, such that each bit array, stack, and value array is
+ * aligned on sizeof(int) bytes.
+ *
+ * @tparam NIT The nonzero index type.
+ * @tparam ValueType The output matrix value type.
+ *
+ * This meta-data class applies to both the sequential (reference) and shared-
+ * memory parallel (reference_omp) backends.
+ */
+ template< typename NIT, typename ValueType >
+ class SPA_BufferMetaData {
+
+ static_assert( sizeof(NIT) % sizeof(int) == 0, "Unsupported type for NIT; "
+ "please submit a bug report!" );
+
+ private:
+
+ /** The size of the offset array */
+ size_t m;
+
+ /** The size of the SPA */
+ size_t n;
+
+ /** The number of threads supported during a call to #grb::mxm */
+ size_t nthreads;
+
+ /** The initial buffer offset */
+ size_t bufferOffset;
+
+ /** The size of a single SPA, including bytes needed for padding */
+ size_t paddedSPASize;
+
+ /** The number of bytes to pad the SPA array with */
+ size_t arrayShift;
+
+ /** The number of bytes to pad the SPA stack with */
+ size_t stackShift;
+
+ /**
+ * Given a number of used bytes of the buffer, calculate the available
+ * remainder buffer and return it.
+ *
+ * @param[in] osize The size of the buffer (in bytes) that is already
+ * in use.
+ * @param[out] remainder Pointer to any remainder buffer.
+ * @param[out] rsize The size of the remainder buffer.
+ *
+ * If no buffer space is left, \a remainder will be set to nullptr
+ * and \a size to 0.
+ */
+ void retrieveRemainderBuffer(
+ const size_t osize,
+ void * &remainder, size_t &rsize
+ ) const noexcept {
+ const size_t size = internal::template getCurrentBufferSize< char >();
+ char * rem = internal::template getReferenceBuffer< char >( size );
+ size_t rsize_calc = size - osize;
+ rem += osize;
+ const size_t mod = reinterpret_cast< uintptr_t >(rem) % sizeof(int);
+ if( mod ) {
+ const size_t shift = sizeof(int) - mod;
+ if( rsize_calc >= shift ) {
+ rsize_calc -= shift;
+ rem += rsize;
+ } else {
+ rsize_calc = 0;
+ rem = nullptr;
+ }
+ }
+ assert( !(reinterpret_cast< uintptr_t >(rem) % sizeof(int)) );
+ // write out
+ remainder = rem;
+ rsize = rsize_calc;
+ }
+
+
+ public:
+
+ /**
+ * Base constructor.
+ *
+ * @param[in] _m The length of the offset array.
+ * @param[in] _n The length of the SPA.
+ * @param[in] max_threads The maximum number of threads.
+ *
+ * \note \a max_threads is a separate input since there might be a need to
+ * cap the maximum number of threads used based on some analytic
+ * performance model. Rather than putting such a performance model
+ * within this class, we make it an obligatory input parameter
+ * instead.
+ *
+ * \note It is always valid to pass config::OMP::threads().
+ *
+ * \note This class \em will, however, cap the number of threads returned
+ * to \a _n.
+ */
+ SPA_BufferMetaData(
+ const size_t _m, const size_t _n,
+ const size_t max_threads
+ ) : m( _m ), n( _n ), arrayShift( 0 ), stackShift( 0 ) {
+ #ifdef _DEBUG_REFERENCE_MATRIX
+ #pragma omp critical
+ std::cout << "\t\t\t computing padded buffer size for a SPA of length "
+ << n << " while leaving space for an additional offset buffer of length "
+ << std::max( m, n ) << "...\n";
+ #endif
+ // compute bufferOffset
+ bufferOffset = (std::max( m, n ) + 1) * sizeof( NIT );
+
+ // compute value buffer size
+ const size_t valBufSize = n * utils::SizeOf< ValueType >::value;
+
+ #ifdef _DEBUG_REFERENCE_MATRIX
+ std::cout << "\t\t\t\t bit-array size has byte-size " <<
+ internal::Coordinates< reference >::arraySize( n ) << "\n";
+ std::cout << "\t\t\t\t stack has byte-size " <<
+ internal::Coordinates< reference >::stackSize( n ) << "\n";
+ std::cout << "\t\t\t\t value buffer has byte-size " << valBufSize << "\n";
+ #endif
+
+ // compute paddedSPASize
+ paddedSPASize =
+ internal::Coordinates< reference >::arraySize( n ) +
+ internal::Coordinates< reference >::stackSize( n ) +
+ valBufSize;
+ size_t shift =
+ internal::Coordinates< reference >::arraySize( n ) % sizeof(int);
+ if( shift != 0 ) {
+ arrayShift = sizeof(int) - shift;
+ paddedSPASize += arrayShift;
+ }
+ shift = internal::Coordinates< reference >::stackSize( n ) % sizeof(int);
+ if( shift != 0 ) {
+ stackShift = sizeof(int) - shift;
+ paddedSPASize += stackShift;
+ }
+ shift = valBufSize % sizeof(int);
+ if( shift != 0 ) {
+ paddedSPASize += (sizeof(int) - shift);
+ }
+
+ // pad bufferOffset
+ shift = bufferOffset % sizeof(int);
+ if( shift != 0 ) {
+ bufferOffset += (sizeof(int) - shift);
+ }
+
+ // compute free buffer size
+ const size_t freeBufferSize = internal::getCurrentBufferSize< char >() -
+ bufferOffset;
+
+ // compute max number of threads
+ nthreads = 1 + freeBufferSize / paddedSPASize;
+ #ifdef _DEBUG_REFERENCE_MATRIX
+ #pragma omp critical
+ std::cout << "\t\t\t free buffer size: " << freeBufferSize
+ << ", (padded) SPA size: " << paddedSPASize
+ << ", bufferOffset: " << bufferOffset
+ << " -> supported #threads: " << nthreads << ". "
+ << " The shifts for the bit-array and the stack are " << arrayShift
+ << ", respectively, " << stackShift << "."
+ << "\n";
+ #endif
+ // cap the final number of selected threads
+ if( nthreads > max_threads ) {
+ nthreads = max_threads;
+ }
+ if( nthreads > n ) {
+ nthreads = n;
+ }
+ }
+
+ /** @returns The maximum number of supported threads during #grb::mxm */
+ size_t threads() const noexcept {
+ return nthreads;
+ }
+
+ /**
+ * Requests and returns a global buffer required for a thread-local SPA.
+ *
+ * @param[in] t The thread ID. Must be larger than 0.
+ *
+ * \note Thread 0 employs the SPA allocated with the output matrix.
+ *
+ * @returns Pointer into the global buffer starting at the area reserved for
+ * the SPA of thread \a t.
+ */
+ char * getSPABuffers( size_t t ) const noexcept {
+ assert( t > 0 );
+ assert( nthreads > 1 );
+ (void) --t;
+ char * raw = internal::template getReferenceBuffer< char >(
+ bufferOffset + (nthreads - 1) * paddedSPASize );
+ assert( reinterpret_cast< uintptr_t >(raw) % sizeof(int) == 0 );
+ raw += bufferOffset;
+ assert( reinterpret_cast< uintptr_t >(raw) % sizeof(int) == 0 );
+ raw += t * paddedSPASize;
+ return raw;
+ }
+
+ /**
+ * Retrieves the column offset buffer.
+ *
+ * @param[out] remainder Returns any remainder buffer beyond that of the row
+ * offset buffer.
+ * @param[out] rsize The remainder buffer size \a remainder points to.
+ *
+ * If \a remainder is not a nullptr then neither should \a rsize,
+ * and vice versa.
+ *
+ * Retrieving any remainder buffer is optional. The default is to not ask
+ * for them.
+ *
+ * \warning If all buffer memory is used for the column offsets, it may be
+ * that \a remainder equals nullptr and rsize
+ * zero.
+ *
+ * \warning This buffer is only guaranteed exclusive if only the retrieved
+ * column buffer is used. In particular, if also requesting (and
+ * using) SPA buffers, the remainder buffer area is shared with
+ * those SPA buffers, and data races are likely to occur. In other
+ * words: be very careful with any use of these remainder buffers.
+ *
+ * @returns The column offset buffer.
+ *
+ * \warning This buffer overlaps with the CRS offset buffer. The caller
+ * must ensure to only ever use one at a time.
+ */
+ NIT * getColOffsetBuffer(
+ void * * const remainder = nullptr,
+ size_t * const rsize = nullptr
+ ) const noexcept {
+ NIT * const ret = internal::template getReferenceBuffer< NIT >( n + 1 );
+ if( remainder != nullptr || rsize != nullptr ) {
+ assert( remainder != nullptr && rsize != nullptr );
+ retrieveRemainderBuffer( (m + 1) * sizeof(NIT), *remainder, *rsize );
+ }
+ return ret;
+ }
+
+ /**
+ * Retrieves the row offset buffer.
+ *
+ * @param[out] remainder Returns any remainder buffer beyond that of the row
+ * offset buffer.
+ * @param[out] rsize The remainder buffer size \a remainder points to.
+ *
+ * If \a remainder is not a nullptr then neither should \a rsize,
+ * and vice versa.
+ *
+ * Retrieving any remainder buffer is optional. The default is to not ask
+ * for them.
+ *
+ * \warning If all buffer memory is used for the row offsets, it may be that
+ * \a remainder equals nullptr and rsize zero.
+ *
+ * \warning This buffer is only guaranteed exclusive if only the retrieved
+ * row buffer is used. In particular, if also requesting (and
+ * using) SPA buffers, the remainder buffer area is shared with
+ * those SPA buffers, and data races are likely to occur. In other
+ * words: be very careful with any use of these remainder buffers.
+ *
+ * @returns The row offset buffer.
+ *
+ * \warning This buffer overlaps with the CCS offset buffer. The caller
+ * must ensure to only ever use one at a time.
+ */
+ NIT * getRowOffsetBuffer(
+ void * * const remainder = nullptr,
+ size_t * const rsize = nullptr
+ ) const noexcept {
+ NIT * const ret = internal::template getReferenceBuffer< NIT >( m + 1 );
+ if( remainder != nullptr || rsize != nullptr ) {
+ assert( remainder != nullptr && rsize != nullptr );
+ retrieveRemainderBuffer( (m + 1) * sizeof(NIT), *remainder, *rsize );
+ }
+ return ret;
+ }
+
+ /**
+ * Shifts a pointer into the global buffer by the bit-array size and its
+ * padding.
+ *
+ * @param[in,out] raw On input: an aligned pointer into the global buffer.
+ * On output: an aligned pointer past the bit-array
+ * position.
+ */
+ void applyArrayShift( char * &raw ) const noexcept {
+ const size_t totalShift =
+ internal::Coordinates< reference >::arraySize( n ) +
+ arrayShift;
+ #ifdef _DEBUG_REFERENCE_MATRIX
+ std::cout << "\t\t\t shifting input pointer with "
+ << internal::Coordinates< reference >::arraySize( n ) << " + "
+ << arrayShift << " = " << totalShift << "bytes \n";
+ #endif
+ raw += totalShift;
+ }
+
+ /**
+ * Shifts a pointer into the global buffer by the stack size and its
+ * padding.
+ *
+ * @param[in,out] raw On input: an aligned pointer into the global buffer.
+ * On output: an aligned pointer past the stack position.
+ */
+ void applyStackShift( char * &raw ) const noexcept {
+ const size_t totalShift =
+ internal::Coordinates< reference >::stackSize( n ) +
+ stackShift;
+ #ifdef _DEBUG_REFERENCE_MATRIX
+ std::cout << "\t\t\t shifting input pointer with "
+ << internal::Coordinates< reference >::arraySize( n ) << " + "
+ << stackShift << " = " << totalShift << "bytes \n";
+ #endif
+ raw += totalShift;
+ }
+
+ };
+#endif
+
+ /**
+ * Retrieves the SPA buffers for the calling thread.
+ *
+ * \warning This function must be called from within an OpenMP parallel
+ * section.
+ *
+ * @param[out] arr Where the bit-array may be located.
+ * @param[out] buf Where the stack may be located.
+ * @param[out] valbuf Where the value buffer may be located.
+ *
+ * All above pointers are aligned on sizeof(int) bytes.
+ *
+ * @param[in] md Meta-data for global buffer management.
+ * @param[in] C The output matrix.
+ *
+ * One thread uses the buffers pre-allocated with the matrix \a C, thus
+ * ensuring at least one thread may perform the #grb::mxm. Any remainder
+ * threads can only help process the #grb::mxm if there is enough global
+ * buffer memory available.
+ *
+ *
+ * \note The global memory has size \f$ \Omega( \mathit{nz} ) \f$, which may
+ * be several factors (or even asymptotically greater than)
+ * \f$ \max\{ m, n \} \f$.
+ *
+ * \note In case the application stores multiple matrices, the global buffer
+ * may additionally be greater than the above note indicates if at least
+ * one of the other matrices is significantly (or asymptotically) larger
+ * than the one involved with the #grb::mxm.
+ */
+ template<
+ typename OutputType,
+ typename RIT, typename CIT, typename NIT
+ >
+ void spa_ompPar_getBuffers(
+ char * &arr, char * &buf, OutputType * &valbuf,
+ const struct SPA_BufferMetaData< NIT, OutputType > &md,
+ Matrix< OutputType, reference, RIT, CIT, NIT > &C
+ ) {
+#ifdef _H_GRB_REFERENCE_OMP_MATRIX
+ // other threads use the global buffer to create additional SPAs
+ {
+ const size_t t = config::OMP::current_thread_ID();
+ #ifndef NDEBUG
+ const size_t T = config::OMP::current_threads();
+ assert( t < T );
+ #endif
+ if( t > 0 ) {
+ #ifdef _DEBUG_REFERENCE_MATRIX
+ #pragma omp critical
+ std::cout << "\t Thread " << t << " gets buffers from global buffer\n";
+ #endif
+ char * rawBuffer = md.getSPABuffers( t );
+ assert( reinterpret_cast< uintptr_t >(rawBuffer) % sizeof(int) == 0 );
+ arr = rawBuffer;
+ #ifdef _DEBUG_REFERENCE_MATRIX
+ #pragma omp critical
+ #endif
+ md.applyArrayShift( rawBuffer );
+ assert( reinterpret_cast< uintptr_t >(rawBuffer) % sizeof(int) == 0 );
+ buf = rawBuffer;
+ #ifdef _DEBUG_REFERENCE_MATRIX
+ #pragma omp critical
+ #endif
+ md.applyStackShift( rawBuffer );
+ assert( reinterpret_cast< uintptr_t >(rawBuffer) % sizeof(int) == 0 );
+ assert( buf != arr );
+ valbuf = reinterpret_cast< OutputType * >(rawBuffer);
+ assert( static_cast< void * >(valbuf) != static_cast< void * >(buf) );
+ } else {
+ #ifdef _DEBUG_REFERENCE_MATRIX
+ #pragma omp critical
+ std::cout << "\t Thread " << t << " gets buffers from matrix storage\n";
+ #endif
+ // one thread uses the standard matrix buffer
+ internal::getMatrixBuffers( arr, buf, valbuf, 1, C );
+ }
+ #ifdef _DEBUG_REFERENCE_MATRIX
+ #pragma omp critical
+ {
+ std::cout << "\t Thread " << t << " has SPA array @ "
+ << static_cast< void * >( arr ) << " and SPA stack @ "
+ << static_cast< void * >( buf ) << " and SPA values @ "
+ << static_cast< void * >( valbuf ) << "\n";
+ }
+ #endif
+ }
+#else
+ #ifdef _DEBUG_REFERENCE_MATRIX
+ std::cout << "\t Reference backend gets buffers from global buffer\n";
+ #endif
+ internal::getMatrixBuffers( arr, buf, valbuf, 1, C );
+ (void) md;
#endif
+ }
} // end namespace grb::internal
diff --git a/include/graphblas/utils.hpp b/include/graphblas/utils.hpp
index 3ab508eec..4a2235501 100644
--- a/include/graphblas/utils.hpp
+++ b/include/graphblas/utils.hpp
@@ -384,9 +384,11 @@ namespace grb {
return ret;
}
}
-
- /** Specialisation for void-valued matrice's masks */
- template< Descriptor descriptor, typename MatrixDataType, typename ValuesType >
+ /** Specialisation for void-valued matrix masks */
+ template<
+ Descriptor descriptor,
+ typename MatrixDataType, typename ValuesType
+ >
static bool interpretMatrixMask(
const bool &assigned,
const ValuesType * const values,
@@ -398,8 +400,11 @@ namespace grb {
return interpretMask< descriptor, ValuesType >( assigned, values, k );
}
- /** Specialisation for void-valued matrice's masks */
- template< Descriptor descriptor, typename MatrixDataType, typename ValuesType >
+ /** Specialisation for void-valued matrix masks */
+ template<
+ Descriptor descriptor,
+ typename MatrixDataType, typename ValuesType
+ >
static bool interpretMatrixMask(
const bool &assigned,
const ValuesType * const,
diff --git a/include/graphblas/utils/parser/matrixFileReaderBase.hpp b/include/graphblas/utils/parser/matrixFileReaderBase.hpp
index c5cdf8b7a..15aa3e4f8 100644
--- a/include/graphblas/utils/parser/matrixFileReaderBase.hpp
+++ b/include/graphblas/utils/parser/matrixFileReaderBase.hpp
@@ -280,7 +280,7 @@ namespace grb {
properties._nz = nz;
properties._entries = entries;
properties._pattern = pattern;
- properties._symmetric = symmetric;
+ properties._symmetric = symmetric ? Symmetric : General;
properties._direct = direct;
properties._symmetricmap = symmetricmap;
// check for existance of file
diff --git a/src/graphblas/hyperdags/hyperdags.cpp b/src/graphblas/hyperdags/hyperdags.cpp
index 256b31fba..320b522f5 100644
--- a/src/graphblas/hyperdags/hyperdags.cpp
+++ b/src/graphblas/hyperdags/hyperdags.cpp
@@ -215,6 +215,9 @@ std::string grb::internal::hyperdags::toString(
case SET_MATRIX_MATRIX_INPUT2:
return "set( matrix, matrix, scalar )";
+ case SET_MATRIX_MATRIX_MASKED:
+ return "set( matrix, matrix, matrix )";
+
case MXM_MATRIX_MATRIX_MATRIX_MONOID:
return "mxm( matrix, matrix, matrix, monoid, scalar, scalar )";
diff --git a/tests/unit/id.cpp b/tests/unit/id.cpp
index a7a7cdb0b..8711369ad 100644
--- a/tests/unit/id.cpp
+++ b/tests/unit/id.cpp
@@ -216,7 +216,7 @@ void grb_program2( const struct input &in, struct output &out ) {
}
/**
- * Test for move assignement id cleanup.
+ * Test for move assignment id cleanup.
*
* Creating and performing move assignment on multiple new objects and check
* for collisions.
diff --git a/tests/unit/matrixSet.cpp b/tests/unit/matrixSet.cpp
index c6ab6739d..da3d48f64 100644
--- a/tests/unit/matrixSet.cpp
+++ b/tests/unit/matrixSet.cpp
@@ -19,6 +19,7 @@
#include
#include
+#include
using namespace grb;
@@ -27,8 +28,356 @@ static const int data1[ 15 ] = { 4, 7, 4, 6, 4, 7, 1, 7, 3, 6, 7, 5, 1, 8, 7 };
static const size_t I[ 15 ] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9, 8, 7, 6 };
static const size_t J[ 15 ] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 5, 7, 5, 1 };
+/** Generic implementation of masked tests */
+template<
+ Descriptor descr = descriptors::no_operation,
+ typename Tout, typename Tmask, typename Tin
+>
+RC masked_tests_generic_impl(
+ RC &rc, grb::Matrix< Tout > &output,
+ const grb::Matrix< Tmask > &mask, const grb::Matrix< Tin > &input,
+ const size_t n
+) {
+ const bool emptyMask = grb::nrows( mask ) == 0 || grb::ncols( mask ) == 0;
+
+ std::cout << "\t\t with structural descriptor\n";
+ rc = grb::set< descr | descriptors::structural >( output, mask, input );
+ if( rc != SUCCESS ) {
+ std::cerr << "\t grb::set structural (matrix to matrix masked) FAILED\n";
+ return rc;
+ }
+ if( grb::nnz( output ) != 2 * n - 1 ) {
+ std::cerr << "\t unexpected number of output elements ( "
+ << grb::nnz( output ) << " ), expected " << 2 * n - 1 <<".\n";
+ rc = FAILED;
+ }
+ for( const auto &triplet : output ) {
+ if(
+ triplet.first.first != triplet.first.second &&
+ triplet.first.first != triplet.first.second - 1
+ ) {
+ std::cerr << "\t unexpected entry at ( " << triplet.first.first << ", "
+ << triplet.first.second << " ), value " << triplet.second << ".\n";
+ rc = FAILED;
+ }
+ if( triplet.first.first != static_cast< size_t >(triplet.second) ) {
+ std::cerr << "\t unexpected entry at ( " << triplet.first.first << ", "
+ << triplet.first.second << " ) with value " << triplet.second;
+ std::cerr << ", expected value "<< triplet.first.first <<".\n";
+ rc = FAILED;
+ }
+ }
+ if( rc != SUCCESS ) { return rc; }
+
+ std::cout << "\t\t without descriptor\n";
+ rc = grb::set< descr >( output, mask, input );
+ if( rc != SUCCESS ) {
+ std::cerr << "\t grb::set (matrix to matrix masked) FAILED\n";
+ return rc;
+ }
+ if( emptyMask ) {
+ if( grb::nnz( output ) != 2 * n - 1 ) {
+ std::cerr << "\t unexpected number of output elements ( "
+ << grb::nnz( output ) << " ), expected " << 2 * n - 1 <<".\n";
+ rc = FAILED;
+ }
+ } else {
+ if( grb::nnz( output ) != n ) {
+ std::cerr << "\t unexpected number of output elements ( "
+ << grb::nnz( output ) << " ), expected " << n <<".\n";
+ rc = FAILED;
+ }
+ }
+ for( const auto &triplet : output ) {
+ if( emptyMask ) {
+ if( triplet.first.first != triplet.first.second &&
+ triplet.first.first != triplet.first.second - 1
+ ) {
+ std::cerr << "\t unexpected entry at ( " << triplet.first.first << ", "
+ << triplet.first.second << " ), value " << triplet.second << ".\n";
+ rc = FAILED;
+ }
+ } else {
+ if( triplet.first.first != triplet.first.second ) {
+ std::cerr << "\t unexpected entry at ( " << triplet.first.first << ", "
+ << triplet.first.second << " ), value " << triplet.second << ".\n";
+ rc = FAILED;
+ }
+ }
+ if( triplet.first.first != static_cast< size_t >(triplet.second) ) {
+ std::cerr << "\t unexpected entry at ( " << triplet.first.first << ", "
+ << triplet.first.second << " ) with value " << triplet.second;
+ std::cerr << ", expected value "<< triplet.first.first <<".\n";
+ rc = FAILED;
+ }
+ }
+ if( rc != SUCCESS ) { return rc; }
+
+ // done
+ return rc;
+}
+
+/** Specialisation for void output */
+template<
+ Descriptor descr = descriptors::no_operation,
+ typename Tmask, typename Tin
+>
+RC masked_tests_generic_impl(
+ RC &rc, grb::Matrix< void > &output,
+ const grb::Matrix< Tmask > &mask, const grb::Matrix< Tin > &input,
+ const size_t n
+) {
+ const bool emptyMask = grb::nrows( mask ) == 0 || grb::ncols( mask ) == 0;
+
+ std::cout << "\t\t with structural descriptor\n";
+ rc = grb::set< descr | descriptors::structural >( output, mask, input );
+ if( rc != SUCCESS ) {
+ std::cerr << "\t grb::set structural (matrix to matrix masked) FAILED\n";
+ return rc;
+ }
+ if( grb::nnz( output ) != 2 * n - 1 ) {
+ std::cerr << "\t unexpected number of output elements ( "
+ << grb::nnz( output ) << " ), expected " << 2 * n - 1 <<".\n";
+ rc = FAILED;
+ }
+ for( const auto &triplet : output ) {
+ if(
+ triplet.first != triplet.second &&
+ triplet.first != triplet.second - 1
+ ) {
+ std::cerr << "\t unexpected entry at ( " << triplet.first << ", "
+ << triplet.second << " ), no value (pattern matrix).\n";
+ rc = FAILED;
+ }
+ }
+ if( rc != SUCCESS ) { return rc; }
+
+ std::cout << "\t\t without descriptor\n";
+ rc = grb::set< descr >( output, mask, input );
+ if( rc != SUCCESS ) {
+ std::cerr << "\t grb::set (matrix to matrix masked) FAILED\n";
+ return rc;
+ }
+ if( emptyMask ) {
+ if( grb::nnz( output ) != 2 * n - 1 ) {
+ std::cerr << "\t unexpected number of output elements ( "
+ << grb::nnz( output ) << " ), expected " << 2 * n - 1 <<".\n";
+ rc = FAILED;
+ }
+ } else {
+ if( grb::nnz( output ) != n ) {
+ std::cerr << "\t unexpected number of output elements ( "
+ << grb::nnz( output ) << " ), expected " << n <<".\n";
+ rc = FAILED;
+ }
+ }
+ for( const auto &triplet : output ) {
+ if( emptyMask ) {
+ if( triplet.first != triplet.second &&
+ triplet.first != triplet.second - 1
+ ) {
+ std::cerr << "\t unexpected entry at ( " << triplet.first << ", "
+ << triplet.second << " ), no value (pattern matrix).\n";
+ rc = FAILED;
+ }
+ } else {
+ if( triplet.first != triplet.second ) {
+ std::cerr << "\t unexpected entry at ( " << triplet.first << ", "
+ << triplet.second << " ), no value (pattern matrix).\n";
+ rc = FAILED;
+ }
+ }
+ }
+ if( rc != SUCCESS ) { return rc; }
+
+ // done
+ return rc;
+}
+
+/** Implementation of masked tests for non-void masks (nvm). */
+template<
+ Descriptor descr = descriptors::no_operation,
+ typename Tout, typename Tmask, typename Tin
+>
+RC masked_tests_nvm_impl(
+ RC &rc, grb::Matrix< Tout > &output,
+ const grb::Matrix< Tmask > &mask, const grb::Matrix< Tin > &input,
+ const size_t n
+) {
+ const bool emptyMask = grb::nrows( mask ) == 0 || grb::ncols( mask ) == 0;
+
+ std::cout << "\t\t with invert_mask descriptor\n";
+ rc = grb::set< descr | descriptors::invert_mask >( output, mask, input );
+ if( rc != SUCCESS ) {
+ std::cerr << "\t grb::set invert mask (matrix to matrix masked) FAILED\n";
+ return rc;
+ }
+ if( emptyMask ) {
+ if( grb::nnz( output ) != 2 * n - 1 ) {
+ std::cerr << "\t unexpected number of output elements ( "
+ << grb::nnz( output ) << " ), expected " << 2 * n - 1 <<".\n";
+ rc = FAILED;
+ }
+ } else {
+ if( grb::nnz( output ) != n - 1 ) {
+ std::cerr << "\t unexpected number of output elements ( "
+ << grb::nnz( output ) << " ), expected " << n - 1 <<".\n";
+ rc = FAILED;
+ }
+ }
+ for( const auto &triplet : output ) {
+ if( emptyMask ) {
+ if( triplet.first.first != triplet.first.second &&
+ triplet.first.first != triplet.first.second - 1
+ ) {
+ std::cerr << "\t unexpected entry at ( " << triplet.first.first << ", "
+ << triplet.first.second << " ), value " << triplet.second << ".\n";
+ rc = FAILED;
+ }
+ } else {
+ if( triplet.first.first != triplet.first.second - 1 ) {
+ std::cerr << "\t unexpected entry at ( " << triplet.first.first << ", "
+ << triplet.first.second << " ), value " << triplet.second << ".\n";
+ rc = FAILED;
+ }
+ }
+ if( triplet.first.first != static_cast< size_t >(triplet.second) ) {
+ std::cerr << "\t unexpected entry at ( " << triplet.first.first << ", "
+ << triplet.first.second << " ) with value " << triplet.second;
+ std::cerr << ", expected value "<< triplet.first.first <<".\n";
+ rc = FAILED;
+ }
+ }
+ if( rc != SUCCESS ) { return rc; }
+
+ // done
+ return rc;
+}
+
+/** Specialisation for void output */
+template<
+ Descriptor descr = descriptors::no_operation,
+ typename Tmask, typename Tin
+>
+RC masked_tests_nvm_impl(
+ RC &rc, grb::Matrix< void > &output,
+ const grb::Matrix< Tmask > &mask, const grb::Matrix< Tin > &input,
+ const size_t n
+) {
+ const bool emptyMask = grb::nrows( mask ) == 0 || grb::ncols( mask ) == 0;
+
+ std::cout << "\t\t with invert_mask descriptor\n";
+ rc = grb::set< descr | descriptors::invert_mask >( output, mask, input );
+ if( rc != SUCCESS ) {
+ std::cerr << "\t grb::set invert mask (matrix to matrix masked) FAILED\n";
+ return rc;
+ }
+ if( emptyMask ) {
+ if( grb::nnz( output ) != 2 * n - 1 ) {
+ std::cerr << "\t unexpected number of output elements ( "
+ << grb::nnz( output ) << " ), expected " << 2 * n - 1 <<".\n";
+ rc = FAILED;
+ }
+ } else {
+ if( grb::nnz( output ) != n - 1 ) {
+ std::cerr << "\t unexpected number of output elements ( "
+ << grb::nnz( output ) << " ), expected " << n - 1 <<".\n";
+ rc = FAILED;
+ }
+ }
+ for( const auto &triplet : output ) {
+ if( emptyMask ) {
+ if( triplet.first != triplet.second &&
+ triplet.first != triplet.second - 1
+ ) {
+ std::cerr << "\t unexpected entry at ( " << triplet.first << ", "
+ << triplet.second << " ), no value (pattern matrix).\n";
+ rc = FAILED;
+ }
+ } else {
+ if( triplet.first != triplet.second - 1 ) {
+ std::cerr << "\t unexpected entry at ( " << triplet.first << ", "
+ << triplet.second << " ), no value (pattern matrix).\n";
+ rc = FAILED;
+ }
+ }
+ }
+ if( rc != SUCCESS ) { return rc; }
+
+ // done
+ return rc;
+}
+
+/** Dispatch for generic masked tests */
+template< typename Tout, typename Tmask, typename Tin >
+RC masked_tests(
+ RC &rc, grb::Matrix< Tout > &output,
+ const grb::Matrix< Tmask > &mask, const grb::Matrix< Tin > &input,
+ const size_t n
+) {
+ if( masked_tests_generic_impl( rc, output, mask, input, n ) != SUCCESS ) {
+ return rc;
+ }
+ return masked_tests_nvm_impl( rc, output, mask, input, n );
+}
+
+/** Specialised dispatch for masked tests with void masks */
+template< typename Tout, typename Tin >
+RC masked_tests(
+ RC &rc, grb::Matrix< Tout > &output,
+ const grb::Matrix< void > &mask, const grb::Matrix< Tin > &input,
+ const size_t n
+) {
+ const grb::RC ret = masked_tests_generic_impl( rc, output, mask, input, n );
+ std::cout << "\t\t invert_mask descriptor SKIPPED\n";
+ return ret;
+}
+
+/** Specialised dispatch for masked tests with no-cast domains */
+template< typename T >
+RC masked_tests(
+ RC &rc, grb::Matrix< T > &output,
+ const grb::Matrix< bool > &mask, const grb::Matrix< T > &input,
+ const size_t n
+) {
+ if( masked_tests_generic_impl( rc, output, mask, input, n ) != SUCCESS ) {
+ return rc;
+ }
+ if( masked_tests_nvm_impl( rc, output, mask, input, n ) != SUCCESS ) {
+ return rc;
+ }
+ std::cout << "\t re-running previous tests with no_casting descriptor\n";
+ if(
+ masked_tests_generic_impl< descriptors::no_casting >(
+ rc, output, mask, input, n
+ ) != SUCCESS
+ ) {
+ return rc;
+ }
+ return masked_tests_nvm_impl< descriptors::no_casting >(
+ rc, output, mask, input, n );
+}
+
void grb_program( const size_t &n, grb::RC &rc ) {
- // initialize test
+ // initialize non-masked test containers
+ grb::Matrix< double > A( n, n );
+ grb::Matrix< double > B( n, n );
+ grb::Matrix< void > C( n, n );
+ grb::Matrix< void > D( n, n );
+ grb::Matrix< unsigned int > E( n, n );
+
+ // initalize masked test containers
+ grb::Matrix< int > mask( n, n );
+ grb::Matrix< bool > maskBool( n, n );
+ grb::Matrix< void > maskVoid( n, n );
+ grb::Matrix< double > maskEmpty( 0, 0 );
+ grb::Matrix< int > input( n, n );
+ grb::Matrix< void > inputVoid( n, n );
+ grb::Matrix< float > inputFloat( n, n );
+ grb::Matrix< int > output( n, n );
+ grb::Matrix< void > outputVoid( n, n );
+
+ // initialise non-masked test data
int chk[ 10 ][ 10 ];
for( size_t i = 0; i < 10; ++i ) {
for( size_t j = 0; j < 10; ++j ) {
@@ -38,21 +387,17 @@ void grb_program( const size_t &n, grb::RC &rc ) {
for( size_t k = 0; k < 15; ++k ) {
chk[ I[ k ] ][ J[ k ] ] = data1[ k ];
}
- grb::Matrix< double > A( n, n );
- grb::Matrix< double > B( n, n );
- grb::Matrix< void > C( n, n );
- grb::Matrix< void > D( n, n );
- grb::Matrix< unsigned int > E( n, n );
+
rc = grb::resize( A, 15 );
if( rc == SUCCESS ) {
rc = grb::buildMatrixUnique( A, I, J, data1, 15, SEQUENTIAL );
for( const auto &triplet : A ) {
if( triplet.first.first >= 10 || triplet.first.second >= 10 ) {
- std::cerr << "\tunexpected entry at A( " << triplet.first.first << ", "
+ std::cerr << "\t unexpected entry at A( " << triplet.first.first << ", "
<< triplet.first.second << " ).\n";
rc = FAILED;
} else if( chk[ triplet.first.first ][ triplet.first.second ] != triplet.second ) {
- std::cerr << "\tunexpected entry at A( " << triplet.first.first << ", "
+ std::cerr << "\t unexpected entry at A( " << triplet.first.first << ", "
<< triplet.first.second << " ) with value " << triplet.second;
if( chk[ triplet.first.first ][ triplet.first.second ] == 0 ) {
std::cerr << ", expected no entry here.\n";
@@ -64,22 +409,86 @@ void grb_program( const size_t &n, grb::RC &rc ) {
}
}
}
- if( rc == SUCCESS ) {
- rc = grb::resize( B, 15 );
+ rc = rc ? rc : grb::resize( B, 15 );
+ rc = rc ? rc : grb::resize( C, 15 );
+ rc = rc ? rc : grb::resize( D, 15 );
+ rc = rc ? rc : grb::resize( E, 15 );
+ rc = rc ? rc : grb::resize( output, 15 );
+ if( rc != SUCCESS || grb::nnz( A ) != 15 ) {
+ std::cerr << "\tinitialisation FAILED\n";
+ return;
}
- if( rc == SUCCESS ) {
- rc = grb::resize( C, 15 );
+
+ // initialise data for masked-set tests
+ // - mask will be an n by n matrix with its diagonal set to 1 and its
+ // superdiagonal set to 0.
+ // - input will be an n by n matrix with each element on its diagonal and
+ // superdiagonal set to its row index (meaning the entries on row 0 have
+ // value 0).
+ size_t * const I_mask = new size_t[ 2 * n - 1 ];
+ size_t * const J_mask = new size_t[ 2 * n - 1 ];
+ int * const mask_vals = new int[ 2 * n - 1 ];
+ int * const input_vals = new int[ 2 * n - 1 ];
+ if( I_mask == nullptr || J_mask == nullptr || mask_vals == nullptr ||
+ input_vals == nullptr
+ ) {
+ std::cerr << "\t initialisation FAILED\n";
+ return;
}
- if( rc == SUCCESS ) {
- rc = grb::resize( D, 15 );
+ for( size_t k = 0; k < n; ++k ) {
+ I_mask[ k ] = J_mask[ k ] = k;
+ mask_vals[ k ] = 1;
+ input_vals[ k ] = static_cast< int >( k );
+ if( k < n - 1 ) {
+ I_mask[ n + k ] = k;
+ J_mask[ n + k ] = k + 1;
+ mask_vals [ n + k ] = 0;
+ input_vals[ n + k ] = k;
+ }
}
- if( rc == SUCCESS ) {
- rc = grb::resize( E, 15 );
+ rc = grb::buildMatrixUnique( mask, I_mask, J_mask, mask_vals, 2 * n - 1,
+ SEQUENTIAL );
+ if( rc != SUCCESS ) {
+ std::cerr << "\t buildMatrixUnique of mask matrix FAILED\n";
+ return;
}
- if( rc != SUCCESS || grb::nnz( A ) != 15 ) {
- std::cerr << "\tinitialisation FAILED\n";
+ rc = grb::buildMatrixUnique( maskBool, I_mask, J_mask, mask_vals, 2 * n - 1,
+ SEQUENTIAL );
+ if( rc != SUCCESS ) {
+ std::cerr << "\t buildMatrixUnique of maskBool matrix FAILED\n";
+ return;
+ }
+ try {
+ maskVoid = grb::algorithms::matrices< void >::identity( n );
+ } catch( ... ) {
+ std::cerr << "\t constructing maskVoid FAILED\n";
+ return;
+ }
+ rc = grb::buildMatrixUnique( input, I_mask, J_mask, input_vals, 2 * n - 1,
+ SEQUENTIAL );
+ if( rc != SUCCESS ) {
+ std::cerr << "\t buildMatrixUnique of input matrix FAILED\n";
+ return;
+ }
+ rc = grb::buildMatrixUnique( inputFloat, I_mask, J_mask, input_vals, 2 * n - 1,
+ SEQUENTIAL );
+ if( rc != SUCCESS ) {
+ std::cerr << "\t buildMatrixUnique of inputFloat matrix FAILED\n";
+ return;
+ }
+ rc = grb::resize( inputVoid, 2 * n - 1 );
+ rc = rc ? rc : grb::resize( outputVoid, 2 * n - 1 );
+ if( rc != SUCCESS ) {
+ std::cerr << "\t error resizing matrices for masked tests\n";
return;
}
+ // postpone materialisation of inputVoid since it relies on unmasked grb::set
+ // (which is itself unit-tested later)
+
+ delete [] I_mask;
+ delete [] J_mask;
+ delete [] mask_vals;
+ delete [] input_vals;
std::cout << "\t test initialisation complete\n";
// check grb::set for non-voids
@@ -96,11 +505,11 @@ void grb_program( const size_t &n, grb::RC &rc ) {
}
for( const auto &triplet : B ) {
if( triplet.first.first >= 10 || triplet.first.second >= 10 ) {
- std::cerr << "\tunexpected entry at B( " << triplet.first.first << ", "
+ std::cerr << "\t unexpected entry at B( " << triplet.first.first << ", "
<< triplet.first.second << " ).\n";
rc = FAILED;
} else if( chk[ triplet.first.first ][ triplet.first.second ] != triplet.second ) {
- std::cerr << "\tunexpected entry at B( " << triplet.first.first << ", "
+ std::cerr << "\t unexpected entry at B( " << triplet.first.first << ", "
<< triplet.first.second << " ) with value " << triplet.second;
if( chk[ triplet.first.first ][ triplet.first.second ] == 0 ) {
std::cerr << ", expected no entry here.\n";
@@ -175,14 +584,14 @@ void grb_program( const size_t &n, grb::RC &rc ) {
}
for( const auto &triplet : E ) {
if( triplet.first.first >= 10 || triplet.first.second >= 10 ) {
- std::cerr << "\tunexpected entry at E( " << triplet.first.first << ", "
+ std::cerr << "\t unexpected entry at E( " << triplet.first.first << ", "
<< triplet.first.second << " ), value " << triplet.second << ".\n";
rc = FAILED;
} else if( static_cast< unsigned int >(
chk[ triplet.first.first ][ triplet.first.second ]
) != triplet.second
) {
- std::cerr << "\tunexpected entry at E( " << triplet.first.first << ", "
+ std::cerr << "\t unexpected entry at E( " << triplet.first.first << ", "
<< triplet.first.second << " ) with value " << triplet.second;
if( chk[ triplet.first.first ][ triplet.first.second ] == 0 ) {
std::cerr << ", expected no entry here.\n";
@@ -209,11 +618,11 @@ void grb_program( const size_t &n, grb::RC &rc ) {
}
for( const auto &triplet : E ) {
if( triplet.first.first >= 10 || triplet.first.second >= 10 ) {
- std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", "
+ std::cerr << "\t unexpected entry at ( " << triplet.first.first << ", "
<< triplet.first.second << " ), value " << triplet.second << ".\n";
rc = FAILED;
} else if( 117 != triplet.second ) {
- std::cerr << "\tunexpected entry at ( " << triplet.first.first << ", "
+ std::cerr << "\t unexpected entry at ( " << triplet.first.first << ", "
<< triplet.first.second << " ) with value " << triplet.second;
if( chk[ triplet.first.first ][ triplet.first.second ] == 0 ) {
std::cerr << ", expected no entry here.\n";
@@ -224,6 +633,109 @@ void grb_program( const size_t &n, grb::RC &rc ) {
}
}
if( rc != SUCCESS ) { return; }
+
+ // check masked matrix set
+ // first, finish initialisation
+ rc = grb::set( inputVoid, input );
+ rc = rc ? rc : grb::resize( output, 2 * n - 1 );
+ if( rc != SUCCESS || grb::nnz( inputVoid ) != 2 * n - 1 ) {
+ std::cerr << "\t error in inputVoid (an earlier test likely failed)\n";
+ if( rc == SUCCESS ) { rc = FAILED; }
+ return;
+ }
+
+ std::cout << "\t testing set( matrix, mask, matrix ), non-void, no-cast, "
+ << "empty mask\n";
+ if( masked_tests( rc, output, maskEmpty, input, n ) != grb::SUCCESS ) {
+ return;
+ }
+
+ std::cout << "\t testing set( matrix, mask, matrix ), non-void, no-cast, "
+ << "non-empty mask\n";
+ if( masked_tests( rc, output, mask, input, n ) != grb::SUCCESS ) { return; }
+
+ std::cout << "\t testing set( matrix, mask, matrix ), non-void, no-cast, "
+ << "non-empty Boolean mask\n";
+ if( masked_tests( rc, output, maskBool, input, n ) != grb::SUCCESS ) {
+ return;
+ }
+
+ std::cout << "\t testing set( matrix, mask, matrix ), non-void, casting from "
+ << "float to int, empty mask\n";
+ if( masked_tests( rc, output, maskEmpty, inputFloat, n ) != grb::SUCCESS ) {
+ return;
+ }
+
+ std::cout << "\t testing set( matrix, mask, matrix ), non-void, casting from "
+ << "float to int, non-empty mask\n";
+ if( masked_tests( rc, output, mask, inputFloat, n ) != grb::SUCCESS ) {
+ return;
+ }
+
+ std::cout << "\t testing set( matrix, mask, matrix ), non-void, casting from "
+ << "float to int, non-empty Boolean mask\n";
+ if( masked_tests( rc, output, maskBool, inputFloat, n ) != grb::SUCCESS ) {
+ return;
+ }
+
+ std::cout << "\t testing set( matrix, mask, matrix ), void-to-void (no cast), "
+ << "empty mask\n";
+ if( masked_tests( rc, outputVoid, maskEmpty, inputVoid, n ) != grb::SUCCESS ) {
+ return;
+ }
+
+ std::cout << "\t testing set( matrix, mask, matrix ), void-to-void (no cast), "
+ << "non-empty mask\n";
+ if( masked_tests( rc, outputVoid, mask, inputVoid, n ) != grb::SUCCESS ) {
+ return;
+ }
+
+ std::cout << "\t testing set( matrix, mask, matrix ), void-to-void (no cast), "
+ << "non-empty Boolean mask\n";
+ if( masked_tests( rc, outputVoid, maskBool, inputVoid, n ) != grb::SUCCESS ) {
+ return;
+ }
+
+ std::cout << "\t testing set( matrix, mask, matrix ), int-to-void, casting "
+ << "(sort of), empty mask\n";
+ if( masked_tests( rc, outputVoid, maskEmpty, input, n ) != grb::SUCCESS ) {
+ return;
+ }
+
+ std::cout << "\t testing set( matrix, mask, matrix ), int-to-void, casting "
+ << "(sort of), non-empty mask\n";
+ if( masked_tests( rc, outputVoid, mask, input, n ) != grb::SUCCESS ) {
+ return;
+ }
+
+ std::cout << "\t testing set( matrix, mask, matrix ), int-to-void, casting "
+ << "(sort of), Boolean mask\n";
+ if( masked_tests( rc, outputVoid, maskBool, input, n ) != grb::SUCCESS ) {
+ return;
+ }
+
+ std::cout << "\t testing set( matrix, mask, matrix ), float-to-void, casting "
+ << "(sort of), empty mask\n";
+ if(
+ masked_tests( rc, outputVoid, maskEmpty, inputFloat, n ) !=
+ grb::SUCCESS
+ ) {
+ return;
+ }
+
+ std::cout << "\t testing set( matrix, mask, matrix ), float-to-void, casting "
+ << "(sort of), non-empty mask\n";
+ if( masked_tests( rc, outputVoid, mask, inputFloat, n ) != grb::SUCCESS ) {
+ return;
+ }
+
+ std::cout << "\t testing set( matrix, mask, matrix ), float-to-void, casting "
+ << "(sort of), Boolean mask\n";
+ if( masked_tests( rc, outputVoid, maskBool, inputFloat, n ) != grb::SUCCESS ) {
+ return;
+ }
+
+ // done
}
int main( int argc, char ** argv ) {
diff --git a/tests/unit/unittests.sh b/tests/unit/unittests.sh
index 0b49d2421..6abb78e21 100755
--- a/tests/unit/unittests.sh
+++ b/tests/unit/unittests.sh
@@ -484,6 +484,12 @@ for MODE in ${MODES}; do
grep "Test OK" ${TEST_OUT_DIR}/matrixSet_${MODE}_${BACKEND}_${P}_${T}.log || echo "Test FAILED"
echo " "
+ echo ">>> [x] [ ] Testing grb::set (matrices), size 10 000"
+ $runner ${TEST_BIN_DIR}/matrixSet_${MODE}_${BACKEND} 10000 2> ${TEST_OUT_DIR}/matrixSet_large_${MODE}_${BACKEND}_${P}_${T}.err 1> ${TEST_OUT_DIR}/matrixSet_large_${MODE}_${BACKEND}_${P}_${T}.log
+ head -1 ${TEST_OUT_DIR}/matrixSet_large_${MODE}_${BACKEND}_${P}_${T}.log
+ grep "Test OK" ${TEST_OUT_DIR}/matrixSet_large_${MODE}_${BACKEND}_${P}_${T}.log || echo "Test FAILED"
+ echo " "
+
echo ">>> [x] [ ] Testing grb::set (matrix, value)"
$runner ${TEST_BIN_DIR}/setMatrixValue_${MODE}_${BACKEND} 2> ${TEST_OUT_DIR}/setMatrixValue_${MODE}_${BACKEND}_${P}_${T}.err 1> ${TEST_OUT_DIR}/setMatrixValue_${MODE}_${BACKEND}_${P}_${T}.log
head -1 ${TEST_OUT_DIR}/setMatrixValue_${MODE}_${BACKEND}_${P}_${T}.log