diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt
index 714e6034b..5b48b9e55 100644
--- a/include/CMakeLists.txt
+++ b/include/CMakeLists.txt
@@ -219,30 +219,20 @@ install( DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/graphblas/interfaces/"
 
 install( TARGETS algorithms EXPORT GraphBLASTargets )
 
-# generate the spblas header with the library prefix
-configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/transition/spblas.h.in
-	${CMAKE_CURRENT_BINARY_DIR}/transition/spblas.h @ONLY
-)
-
 # this target lists the transition path headers
 # these are plain C headers and do not have any dependences
-add_library( transition_headers INTERFACE )
+add_library( transition INTERFACE )
 
 target_include_directories(
-	transition_headers INTERFACE
+	transition INTERFACE
 	$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/transition/>
-	$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/transition/>
 	$<INSTALL_INTERFACE:transition/>
 )
 
-install( FILES ${CMAKE_CURRENT_BINARY_DIR}/transition/spblas.h
-	DESTINATION "${INCLUDE_INSTALL_DIR}/transition"
-)
-
 install( DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/transition/"
-	DESTINATION "${INCLUDE_INSTALL_DIR}/transition"
+	DESTINATION "${GRB_INCLUDE_INSTALL_DIR}/../transition/"
 	FILES_MATCHING REGEX "${HEADERS_REGEX}"
 )
 
-install( TARGETS transition_headers EXPORT GraphBLASTargets )
+install( TARGETS transition EXPORT GraphBLASTargets )
 
diff --git a/include/graphblas.hpp b/include/graphblas.hpp
index 7a61f9599..a539a5c0d 100644
--- a/include/graphblas.hpp
+++ b/include/graphblas.hpp
@@ -38,11 +38,6 @@
  *  -# generalised sparse linear algebra, \ref GraphBLAS;
  *  -# vertex-centric programming, \ref Pregel.
  *
- * Additionally, to ease integration with existing software, ALP defines
- * so-called \ref TRANS libraries, which presently includes (partial)
- * implementations of the \ref SPARSEBLAS and \ref SPBLAS (de-facto) standards,
- * as well as an interface for numerical \ref TRANS_SOLVERS.
- *
  * Several other programming interfaces are under design at present.
  *
  * For authors who contributed to ALP, please see the NOTICE file.
diff --git a/include/graphblas/algorithms/conjugate_gradient.hpp b/include/graphblas/algorithms/conjugate_gradient.hpp
index 27616ec77..b87cfa080 100644
--- a/include/graphblas/algorithms/conjugate_gradient.hpp
+++ b/include/graphblas/algorithms/conjugate_gradient.hpp
@@ -27,7 +27,7 @@
 #define _H_GRB_ALGORITHMS_CONJUGATE_GRADIENT
 
 #include <cstdio>
-#include <cmath>
+#include <complex>
 
 #include <graphblas.hpp>
 #include <graphblas/utils/iscomplex.hpp>
@@ -144,8 +144,7 @@ namespace grb {
 		 * performance semantics, with the exception of getters such as #grb::nnz, are
 		 * specific to the backend selected during compilation.
 		 */
-		template<
-			Descriptor descr = descriptors::no_operation,
+		template< Descriptor descr = descriptors::no_operation,
 			typename IOType,
 			typename ResidualType,
 			typename NonzeroType,
@@ -155,20 +154,19 @@ namespace grb {
 				grb::identities::zero, grb::identities::one
 			>,
 			class Minus = operators::subtract< IOType >,
-			class Divide = operators::divide< IOType >,
-			typename RSI, typename NZI, Backend backend
+			class Divide = operators::divide< IOType >
 		>
 		grb::RC conjugate_gradient(
-			grb::Vector< IOType, backend > &x,
-			const grb::Matrix< NonzeroType, backend, RSI, RSI, NZI > &A,
-			const grb::Vector< InputType, backend > &b,
+			grb::Vector< IOType > &x,
+			const grb::Matrix< NonzeroType > &A,
+			const grb::Vector< InputType > &b,
 			const size_t max_iterations,
 			ResidualType tol,
 			size_t &iterations,
 			ResidualType &residual,
-			grb::Vector< IOType, backend > &r,
-			grb::Vector< IOType, backend > &u,
-			grb::Vector< IOType, backend > &temp,
+			grb::Vector< IOType > &r,
+			grb::Vector< IOType > &u,
+			grb::Vector< IOType > &temp,
 			const Ring &ring = Ring(),
 			const Minus &minus = Minus(),
 			const Divide &divide = Divide()
@@ -326,7 +324,7 @@ namespace grb {
 			assert( ret == SUCCESS );
 
 			if( ret == SUCCESS ) {
-				tol *= std::sqrt( grb::utils::is_complex< IOType >::modulus( bnorm ) );
+				tol *= sqrt( grb::utils::is_complex< IOType >::modulus( bnorm ) );
 			}
 
 			size_t iter = 0;
@@ -419,7 +417,7 @@ namespace grb {
 
 			// return correct error code
 			if( ret == SUCCESS ) {
-				if( std::sqrt( residual ) >= tol ) {
+				if( sqrt( residual ) >= tol ) {
 					// did not converge within iterations
 					return FAILED;
 				}
diff --git a/include/graphblas/algorithms/triangle_count.hpp b/include/graphblas/algorithms/triangle_count.hpp
new file mode 100644
index 000000000..6cb9f2ca7
--- /dev/null
+++ b/include/graphblas/algorithms/triangle_count.hpp
@@ -0,0 +1,249 @@
+
+/*
+ *   Copyright 2023 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ *
+ * Implements the triangle counting algorithm, using different methods.
+ *
+ * @author Benjamin Lozes
+ * @date: May 10th, 2023
+ */
+
+#ifndef _H_GRB_TRIANGLE_COUNT
+#define _H_GRB_TRIANGLE_COUNT
+
+#include <map>
+#include <numeric>
+#include <vector>
+
+#include <graphblas/utils/iterators/nonzeroIterator.hpp>
+
+#include <graphblas.hpp>
+
+namespace grb {
+
+	namespace algorithms {
+
+		enum class TriangleCountAlgorithm { Burkhardt, Cohen, Sandia_TT };
+
+		std::map< TriangleCountAlgorithm, std::string > TriangleCountAlgorithmNames = {
+			{ TriangleCountAlgorithm::Burkhardt, "Burkhardt" },
+			{ TriangleCountAlgorithm::Cohen, "Cohen" },
+			{ TriangleCountAlgorithm::Sandia_TT, "Sandia_TT" }
+		};
+
+		template<
+			class Semiring, class MulMonoid, class SumMonoid,
+			Descriptor descr_mxm = descriptors::no_operation,
+			Descriptor descr_ewa = descriptors::no_operation,
+			Descriptor descr_reduce = descriptors::no_operation,
+			typename D1, typename RIT1, typename CIT1, typename NIT1,
+			typename D2, typename RIT2, typename CIT2, typename NIT2,
+			typename D3, typename RIT3, typename CIT3, typename NIT3,
+			typename D4, typename RIT4, typename CIT4, typename NIT4,
+			typename D5, typename RIT5, typename CIT5, typename NIT5,
+			typename D6
+		>
+		RC triangle_count_generic(
+			size_t & count,
+			Matrix< D1, config::default_backend, RIT1, CIT1, NIT1 > & MXM_out,
+			const Matrix< D2, config::default_backend, RIT2, CIT2, NIT2 > & MXM_lhs,
+			const Matrix< D3, config::default_backend, RIT3, CIT3, NIT3 > & MXM_rhs,
+			Matrix< D4, config::default_backend, RIT4, CIT4, NIT4 > & EWA_out,
+			const Matrix< D5, config::default_backend, RIT5, CIT5, NIT5 > & EWA_rhs,
+			const D6 div_factor,
+			const Semiring mxm_semiring = Semiring(),
+			const MulMonoid ewiseapply_monoid = MulMonoid(),
+			const SumMonoid sumreduce_monoid = SumMonoid()
+		) {
+			if( ( &MXM_out == &MXM_lhs ) || ( &MXM_out == &MXM_rhs ) ) {
+				return ILLEGAL;
+			}
+
+			RC rc = SUCCESS;
+
+			// Compute MXM_out = Mlhs * Mrhs
+			rc = rc ? rc : mxm< descr_mxm >( MXM_out, MXM_lhs, MXM_rhs, mxm_semiring, Phase::RESIZE );
+			rc = rc ? rc : mxm< descr_mxm >( MXM_out, MXM_lhs, MXM_rhs, mxm_semiring, Phase::EXECUTE );
+
+			// Compute MXM_out .*= EWA_rhs
+			// FIXME: Replace by a foldl( Matrix[in,out], Matrix[in], Monoid ) - not implemented yet
+			// Will then become:
+			// rc = rc ? rc : eWiseApply< descr_ewa >( MXM_out, MXM_out, EWA_rhs, ewiseapply_monoid, Phase::RESIZE );
+			// rc = rc ? rc : eWiseApply< descr_ewa >( MXM_out, MXM_out, EWA_rhs, ewiseapply_monoid, Phase::EXECUTE );
+			// Instead of:
+			rc = rc ? rc : eWiseApply< descr_ewa >( EWA_out, MXM_out, EWA_rhs, ewiseapply_monoid, Phase::RESIZE );
+			rc = rc ? rc : eWiseApply< descr_ewa >( EWA_out, MXM_out, EWA_rhs, ewiseapply_monoid, Phase::EXECUTE );
+
+			// Compute a sum reduction over <EWA_out> into <count>
+			count = static_cast< size_t >( 0 );
+			rc = rc ? rc : foldl< descr_reduce >( count, EWA_out, sumreduce_monoid );
+
+			// Apply the div_factor to the reduction result
+			count /= div_factor;
+
+			return rc;
+		}
+
+		/**
+		 * Given a graph, indicates how many triangles are contained within.
+		 *
+		 * @tparam D 				The type of the matrix non-zero values.
+		 *
+		 * @param[out]    count     The number of triangles.
+		 * 						    Any prior contents will be ignored.
+		 * @param[in]     A         The input graph.
+		 * @param[in,out] MXM_out    Buffer matrix with the same dimensions as the input
+		 * 							graph. Any prior contents will be ignored.
+		 * @param[in] L 		Lower triangular matrix of the input graph (optional)
+		 * @param[in] U 		Lower triangular matrix of the input graph (optional)
+		 *
+		 *
+		 * @returns #grb::SUCCESS  When the computation completes successfully.
+		 * @returns #grb::MISMATCH If the dimensions of the input matrices/buffers
+		 * 						   are incompatible.
+		 * @returns #grb::ILLEGAL  If the given algorithm does not exist.
+		 * @returns #grb::PANIC    If an unrecoverable error has been encountered. The
+		 *                         output as well as the state of ALP/GraphBLAS is
+		 *                         undefined.
+		 *
+		 * \par Performance semantics
+		 *
+		 *   -# This function does not allocate nor free dynamic memory, nor shall it
+		 *      make any system calls.
+		 *
+		 * For performance semantics regarding work, inter-process data movement,
+		 * intra-process data movement, synchronisations, and memory use, please see
+		 * the specification of the ALP primitives this function relies on. These
+		 * performance semantics, with the exception of getters such as #grb::nnz, are
+		 * specific to the backend selected during compilation.
+		 */
+		template<
+			Descriptor descr = descriptors::no_operation,
+			typename D1, typename RIT1, typename CIT1, typename NIT1,
+			typename D2, typename RIT2, typename CIT2, typename NIT2,
+			typename D3, typename RIT3, typename CIT3, typename NIT3,
+			typename D4, typename RIT4, typename CIT4, typename NIT4,
+			class Semiring = Semiring< operators::add< D1 >,
+									   operators::mul< D1 >,
+									   identities::zero,
+									   identities::one >,
+			class MulMonoid = Monoid< operators::mul< D1 >,
+									  identities::one >,
+			class SumMonoid = Monoid< operators::add< size_t, D1, size_t >,
+									  identities::zero >
+		>
+		RC triangle_count(
+			const TriangleCountAlgorithm algo,
+			size_t & count,
+			const Matrix< D1, config::default_backend, RIT1, CIT1, NIT1 > & A,
+			Matrix< D2, config::default_backend, RIT2, CIT2, NIT2 > & MXM_out,
+			Matrix< D3, config::default_backend, RIT3, CIT3, NIT3 > & EWA_out,
+			Matrix< D4, config::default_backend, RIT4, CIT4, NIT4 > & L = { 0, 0 },
+			Matrix< D4, config::default_backend, RIT4, CIT4, NIT4 > & U = { 0, 0 }
+		) {
+			// Static assertions
+			static_assert( std::is_integral< D1 >::value, "Type D1 must be integral" );
+
+			// Sanity checks
+			if( nrows( A ) != ncols( A ) ) {
+				std::cerr << "Matrix A must be square" << std::endl;
+				return MISMATCH;
+			}
+			if( ncols( L ) != nrows( L ) ) {
+				std::cerr << "Matrix L must be square" << std::endl;
+				return MISMATCH;
+			}
+			if( nrows( A ) != ncols( L ) ) {
+				std::cerr << "Matrices A and L must have the same dimensions" << std::endl;
+				return MISMATCH;
+			}
+			if( ncols( U ) != nrows( U ) ) {
+				std::cerr << "Matrix U must be square" << std::endl;
+				return MISMATCH;
+			}
+			if( nrows( A ) != ncols( U ) ) {
+				std::cerr << "Matrices A and U must have the same dimensions" << std::endl;
+				return MISMATCH;
+			}
+			if( ncols( MXM_out ) != nrows( MXM_out ) ) {
+				std::cerr << "Matrix MXM_out must be square" << std::endl;
+				return MISMATCH;
+			}
+			if( nrows( A ) != ncols( MXM_out ) ) {
+				std::cerr << "Matrices A and MXM_out must have the same dimensions" << std::endl;
+				return MISMATCH;
+			}
+			if( ncols( EWA_out ) != nrows( EWA_out ) ) {
+				std::cerr << "Matrix EWA_out must be square" << std::endl;
+				return MISMATCH;
+			}
+			if( nrows( A ) != ncols( EWA_out ) ) {
+				std::cerr << "Matrices A and EWA_out must have the same dimensions" << std::endl;
+				return MISMATCH;
+			}
+
+			// Dispatch to the appropriate algorithm
+			switch( algo ) {
+				case TriangleCountAlgorithm::Burkhardt: {
+					return triangle_count_generic<
+						Semiring, MulMonoid, SumMonoid,
+						descr | descriptors::transpose_right
+					>( count, MXM_out, A, A, EWA_out, A, 6UL );
+				}
+
+				case TriangleCountAlgorithm::Cohen: {
+					if( nrows( L ) == 0 || ncols( L ) == 0 ) {
+						std::cerr << "Matrix L must be provided for the Cohen algorithm" << std::endl;
+						return MISMATCH;
+					}
+					if( nrows( U ) == 0 || ncols( U ) == 0 ) {
+						std::cerr << "Matrix U must be provided for the Cohen algorithm" << std::endl;
+						return MISMATCH;
+					}
+
+					return triangle_count_generic<
+						Semiring, MulMonoid, SumMonoid
+					>( count, MXM_out, L, U, EWA_out,  A, 2UL );
+				}
+
+				case TriangleCountAlgorithm::Sandia_TT: {
+					if( ( nrows( U ) == 0 || ncols( U ) == 0 ) && ( nrows( L ) == 0 || ncols( L ) == 0 ) ) {
+						std::cerr << "Matrix L or U must be provided for the Sandia_TT algorithm" << std::endl;
+						return MISMATCH;
+					}
+
+					const Matrix< D4, config::default_backend, RIT4, CIT4, NIT4 > & T = ( nrows( U ) == 0 || ncols( U ) == 0 ) ? L : U;
+					return triangle_count_generic<
+						Semiring, MulMonoid, SumMonoid
+					>( count, MXM_out, T, T, EWA_out, T, 1UL );
+				}
+
+				default:
+					std::cerr << "Unknown TriangleCountAlgorithm enum value" << std::endl;
+					return ILLEGAL;
+			}
+
+			return SUCCESS;
+		}
+
+	} // namespace algorithms
+
+} // namespace grb
+
+#endif // _H_GRB_TRIANGLE_COUNT
diff --git a/include/graphblas/backends.hpp b/include/graphblas/backends.hpp
index 653348112..3fd2f0ec1 100644
--- a/include/graphblas/backends.hpp
+++ b/include/graphblas/backends.hpp
@@ -29,9 +29,6 @@
 #ifndef _H_GRB_BACKENDS
 #define _H_GRB_BACKENDS
 
-#include <string>
-#include <iostream>
-
 
 namespace grb {
 
@@ -221,42 +218,6 @@ namespace grb {
 
 	};
 
-	/**
-	 * Converts a backend identifier to a human-readable string.
-	 *
-	 * @param[in] backend The backend whose string to return.
-	 *
-	 * @return The name of the given \a backend as a C++ string.
-	 */
-	static inline std::string toString( const enum grb::Backend backend ) {
-		switch( backend ) {
-			case grb::Backend::reference:     return "reference";
-			case grb::Backend::reference_omp: return "reference_omp";
-			case grb::Backend::hyperdags:     return "hyperdags";
-			case grb::Backend::nonblocking:   return "nonblocking";
-			case grb::Backend::shmem1D:       return "shmem1D";
-			case grb::Backend::NUMA1D:        return "NUMA1D";
-			case grb::Backend::GENERIC_BSP:   return "GENERIC_BSP";
-			case grb::Backend::BSP1D:         return "BSP1D";
-			case grb::Backend::doublyBSP1D:   return "doublyBSP1D";
-			case grb::Backend::BSP2D:         return "BSP2D";
-			case grb::Backend::autoBSP:       return "autoBSP";
-			case grb::Backend::optBSP:        return "optBSP";
-			case grb::Backend::hybrid:        return "hybrid";
-			case grb::Backend::hybridSmall:   return "hybridSmall";
-			case grb::Backend::hybridMid:     return "hybridMid";
-			case grb::Backend::hybridLarge:   return "hybridLarge";
-			case grb::Backend::minFootprint:  return "minFootprint";
-			case grb::Backend::banshee:       return "banshee";
-			case grb::Backend::banshee_ssr:   return "banshee_ssr";
-			default:
-				const int backend_id = static_cast< int >( backend );
-				std::cerr << "Warning, std::string( const grb::Backend ): unknown backend "
-					<< backend_id << " encountered, please submit a bug report.\n";
-				return "unknown_backend(id=" + std::to_string( backend_id ) + ")";
-		}
-	}
-
 } // namespace grb
 
 #endif
diff --git a/include/graphblas/base/vector.hpp b/include/graphblas/base/vector.hpp
index 5eb4ad83a..c00ca6e53 100644
--- a/include/graphblas/base/vector.hpp
+++ b/include/graphblas/base/vector.hpp
@@ -236,28 +236,6 @@ namespace grb {
 				(void) n;
 			}
 
-			/**
-			 * Creates a dense ALP/GraphBLAS vector.
-			 *
-			 * This constructor takes an initialiser list of values that will be copied
-			 * into this vector. The size of the vector will be equal to the number of
-			 * elements in the initialiser list.
-			 *
-			 * For backends with more than one user process, the size of \a vals is the
-			 * global vector size, and the contents of \a vals are processed using
-			 * sequential I/O semantics.
-			 *
-			 * @see #grb::IOMode For the difference between sequential and parallel I/O
-			 *                   modes.
-			 *
-			 * \note There is only a difference if there are more than one user process.
-			 *
-			 * @param[in] vals The values to be copied into this vector.
-			 */
-			Vector( const std::initializer_list< D > &vals ) {
-				(void) vals;
-			}
-
 			/**
 			 * Move constructor.
 			 *
diff --git a/include/graphblas/bsp1d/vector.hpp b/include/graphblas/bsp1d/vector.hpp
index a0d44829f..1e85db74e 100644
--- a/include/graphblas/bsp1d/vector.hpp
+++ b/include/graphblas/bsp1d/vector.hpp
@@ -2377,67 +2377,6 @@ namespace grb {
 #endif
 		}
 
-		/**
-		 * Constructs a BSP1D vector.
-		 *
-		 * @see Full description in base backend.
-		 *
-		 * \internal
-		 * This constructor initialises the local vector and synchronises the global
-		 * vector once.
-		 *
-		 * TODO rewrite below logic using an iterator filter (GitHub PR 233, issue
-		 * 228)
-		 * \endinternal
-		 */
-		Vector( const std::initializer_list< D > &vals )
-			: Vector( vals.size(), vals.size() )
-		{
-#ifdef _DEBUG
-			std::cerr << "In Vector< BSP1D >::Vector( initializer_list ) constructor\n";
-#endif
-			RC ret = SUCCESS;
-			const size_t n = vals.size();
-			const internal::BSP1D_Data &data = internal::grb_BSP1D.cload();
-
-			// Set all the local values
-			for( size_t i = 0; i < vals.size(); i++ ) {
-				const D val = *( vals.begin() + i );
-
-				// check if local
-				// if( (i / x._b) % data.P != data.s ) {
-				if( data.s !=
-					internal::Distribution< BSP1D >::global_index_to_process_id(
-						i, n, data.P
-					)
-				) {
-					continue;
-				}
-
-				// local, so translate index and perform requested operation
-				const size_t local_index =
-					internal::Distribution< BSP1D >::global_index_to_local( i, n, data.P );
-#ifdef _DEBUG
-				std::cout << data.s << ", grb::setElement translates global index "
-					<< i << " to " << local_index << "\n";
-#endif
-				ret = ret
-					? ret
-					: setElement( _local, val, local_index, EXECUTE );
-			}
-
-			// Synchronise once between all processes
-			if( SUCCESS !=
-				collectives< BSP1D >::allreduce( ret, operators::any_or< RC >() )
-			) {
-				throw std::runtime_error( "grb::Vector< BSP1D >::Vector( initializer_list ): "
-					"collective::allreduce failed." );
-			}
-
-			// on successful execute, sync new nnz count
-			updateNnz();
-		}
-
 		/**
 		 * Copy constructor.
 		 *
diff --git a/include/graphblas/hyperdags/io.hpp b/include/graphblas/hyperdags/io.hpp
index 09b16634a..db1f09e54 100644
--- a/include/graphblas/hyperdags/io.hpp
+++ b/include/graphblas/hyperdags/io.hpp
@@ -180,8 +180,7 @@ namespace grb {
 		typename T
 	>
 	RC set(
-		Vector< DataType, hyperdags, Coords > &x, 
-		const T val,
+		Vector< DataType, hyperdags, Coords > &x, const T val,
 		const Phase &phase = EXECUTE,
 		const typename std::enable_if<
 			!grb::is_object< DataType >::value &&
diff --git a/include/graphblas/hyperdags/vector.hpp b/include/graphblas/hyperdags/vector.hpp
index 02c0f7734..5f422399e 100644
--- a/include/graphblas/hyperdags/vector.hpp
+++ b/include/graphblas/hyperdags/vector.hpp
@@ -161,15 +161,6 @@ namespace grb {
 				register_vector();
 			}
 
-			Vector( const std::initializer_list< T > vals ) : vector( vals )
-			{
-#ifdef _DEBUG
-				std::cout << "In Vector< hyperdags >::Vector( initializer_list )"
-					<< " constructor\n";
-#endif
-				register_vector();
-			}
-
 			~Vector() {
 #ifdef _DEBUG
 				std::cout << "Vector (hyperdags) destructor\n";
diff --git a/include/graphblas/nonblocking/blas1.hpp b/include/graphblas/nonblocking/blas1.hpp
index f970da41a..f9f14cafc 100644
--- a/include/graphblas/nonblocking/blas1.hpp
+++ b/include/graphblas/nonblocking/blas1.hpp
@@ -548,7 +548,7 @@ namespace grb {
 			typename Monoid::D3 global =
 				monoid.template getIdentity< typename Monoid::D3 >();
 
-			size_t local_reduced_size = NONBLOCKING::numThreads() *
+			size_t local_reduced_size = sysconf( _SC_NPROCESSORS_ONLN ) *
 				config::CACHE_LINE_SIZE::value();
 			IOType local_reduced[ local_reduced_size ];
 
@@ -10550,7 +10550,7 @@ namespace grb {
 				typename AddMonoid::D3 reduced =
 					addMonoid.template getIdentity< typename AddMonoid::D3 >();
 
-				size_t reduced_size = NONBLOCKING::numThreads() *
+				size_t reduced_size = sysconf( _SC_NPROCESSORS_ONLN ) *
 					config::CACHE_LINE_SIZE::value();
 				typename AddMonoid::D3 array_reduced[ reduced_size ];
 
diff --git a/include/graphblas/nonblocking/coordinates.hpp b/include/graphblas/nonblocking/coordinates.hpp
index ad9e3c670..bcb4cf42a 100644
--- a/include/graphblas/nonblocking/coordinates.hpp
+++ b/include/graphblas/nonblocking/coordinates.hpp
@@ -232,21 +232,6 @@ namespace grb {
 					}
 				}
 
-				/**
-				 * Sets this data structure to a dummy placeholder for a dense structure.
-				 *
-				 * This structure will be immutable, and does not support the majority of
-				 * operations this class defines; use dense coordinates with care.
-				 */
-				void setDense( const size_t dim ) noexcept {
-					_assigned = nullptr;
-					_stack = nullptr;
-					_buffer = nullptr;
-					_n = dim;
-					_cap = dim;
-					_buf = 0;
-				}
-
 				inline bool assign( const size_t i ) noexcept {
 					if( _n == _cap ) {
 						return true;
@@ -265,7 +250,7 @@ namespace grb {
 				}
 
 				template< bool maybe_invalid = false >
-				inline void local_assignAll() noexcept {
+				inline void local_assignAll( ) noexcept {
 					if( maybe_invalid || _n != _cap ) {
 						if( _assigned != nullptr ) {
 							assert( _stack != nullptr );
@@ -318,21 +303,8 @@ namespace grb {
 					}
 				}
 
-				inline void assignAll() noexcept {
-					// this operates on the global coordinates, not on a local view of it
-					#pragma omp parallel
-					{
-						size_t start, end;
-						config::OMP::localRange( start, end, 0, _cap );
-						for( size_t i = start; i < end; ++i ) {
-							_assigned[ i ] = true;
-							_stack[ i ] = i;
-						}
-					}
-					_n = _cap;
-				}
-
 				inline void clear() noexcept {
+
 					if( _n == _cap ) {
 #ifndef NDEBUG
 						if( _assigned == nullptr && _cap > 0 ) {
@@ -340,6 +312,7 @@ namespace grb {
 							assert( dense_coordinates_may_not_call_clear );
 						}
 #endif
+
 						#pragma omp parallel for schedule( dynamic, config::CACHE_LINE_SIZE::value() )
 						for( size_t i = 0; i < _cap; ++i ) {
 							_assigned[ i ] = false;
diff --git a/include/graphblas/nonblocking/matrix.hpp b/include/graphblas/nonblocking/matrix.hpp
index 00d0e5ebb..251e2037d 100644
--- a/include/graphblas/nonblocking/matrix.hpp
+++ b/include/graphblas/nonblocking/matrix.hpp
@@ -419,32 +419,6 @@ namespace grb {
 			const Matrix< InputType, nonblocking, RIT, CIT, NIT > &
 		);
 
-		// Native interface friends
-
-		friend const grb::Matrix<
-			D, nonblocking,
-			ColIndexType, ColIndexType, NonzeroIndexType
-		>
-		internal::wrapCRSMatrix< D, ColIndexType, NonzeroIndexType, nonblocking >(
-			const D *__restrict__ const,
-			const ColIndexType *__restrict__ const,
-			const NonzeroIndexType *__restrict__ const,
-			const size_t, const size_t
-		);
-
-		friend grb::Matrix<
-			D, nonblocking,
-			ColIndexType, ColIndexType, NonzeroIndexType
-		>
-		internal::wrapCRSMatrix< D, ColIndexType, NonzeroIndexType, nonblocking >(
-			D *__restrict__ const,
-			ColIndexType *__restrict__ const,
-			NonzeroIndexType *__restrict__ const,
-			const size_t, const size_t, const size_t,
-			char * const, char * const,
-			D *__restrict__ const
-		);
-
 
 		private:
 
diff --git a/include/graphblas/nonblocking/vector.hpp b/include/graphblas/nonblocking/vector.hpp
index a70c48521..9b57d8c24 100644
--- a/include/graphblas/nonblocking/vector.hpp
+++ b/include/graphblas/nonblocking/vector.hpp
@@ -197,46 +197,12 @@ namespace grb {
 
 		friend class PinnedVector< D, nonblocking >;
 
-		// Native interface friends
-
-		template< typename ValueType, Backend backend >
-		friend Vector<
-			ValueType, backend,
-			internal::Coordinates<
-				config::IMPLEMENTATION< backend >::coordinatesBackend()
-			>
-		> internal::wrapRawVector( const size_t n, ValueType *__restrict__ const
-			raw );
-
-		template< typename ValueType, Backend backend >
-		friend const Vector<
-			ValueType, backend,
-			internal::Coordinates<
-				config::IMPLEMENTATION< backend >::coordinatesBackend()
-			>
-		> internal::wrapRawVector( const size_t n, const ValueType *__restrict__ const raw );
-
 
 		private:
 
 			Vector< D, reference, MyCoordinates > ref;
 
 
-		protected:
-
-			/**
-			 * \internal Internal constructor that wraps around an existing raw dense
-			 *           vector. This constructor results in a dense vector whose
-			 *           structure is immutable. Any invalid use incurs UB; use with care.
-			 */
-			Vector( const size_t n, D *__restrict__ const raw ) : ref( n, raw ) {
-#ifdef _DEBUG
-				std::cerr << "In Vector< nonblocking > constructor that wraps around an "
-					<< "external raw array.\n";
-#endif
-			}
-
-
 		public:
 
 			/** @see Vector::value_type. */
@@ -255,14 +221,8 @@ namespace grb {
 			typedef typename Vector< D, reference, MyCoordinates >::const_iterator
 				const_iterator;
 
-			Vector( const size_t n, const size_t nz ) : ref( n, nz ) {
-				// pipeline execution is not required here as this is a grb::Vector
-				// declaration
-#ifdef _DEBUG
-				std::cerr << "In Vector< nonblocking >::Vector( size_t, size_t )"
-					<< " constructor\n";				
-#endif
-			}
+
+			Vector( const size_t n, const size_t nz ) : ref( n, nz ) {}
 
 			Vector( const size_t n ) : Vector( n, n ) {
 
@@ -273,15 +233,6 @@ namespace grb {
 #endif
 			}
 
-			Vector( const std::initializer_list< D > vals ) : ref( vals ) {
-				// pipeline execution is not required here as this is a grb::Vector
-				// declaration
-#ifdef _DEBUG
-				std::cerr << "In Vector< nonblocking >::Vector( initializer_list )"
-					<< " constructor\n";				
-#endif
-			}
-
 			Vector() : Vector( 0 ) {}
 
 			Vector( const Vector< D, nonblocking, MyCoordinates > &x ) :
diff --git a/include/graphblas/reference/blas2.hpp b/include/graphblas/reference/blas2.hpp
index 865f7bc51..40acf6531 100644
--- a/include/graphblas/reference/blas2.hpp
+++ b/include/graphblas/reference/blas2.hpp
@@ -2573,10 +2573,9 @@ namespace grb {
 				j_start = A.n / 2;
 				assert( A.n > 0 );
 				while( j_start < A.n && !(
-						A.CCS.col_start[ j_start ] <= start &&
-						start < A.CCS.col_start[ j_start + 1 ]
-					)
-				) {
+					A.CCS.col_start[ j_start ] <=
+						start && start < A.CCS.col_start[ j_start + 1 ]
+				) ) {
 #ifdef _DEBUG
  #ifdef _H_GRB_REFERENCE_OMP_BLAS2
 					#pragma omp critical
@@ -2609,32 +2608,33 @@ namespace grb {
 				j_left_range = 0;
 				j_right_range = A.n;
 				j_end = A.n / 2;
-				while( j_end < A.n && !(
+				if( j_end < A.CCS.col_start[ A.n ] ) {
+					while( j_end < A.n && !(
 						A.CCS.col_start[ j_end ] <= end &&
 						end < A.CCS.col_start[ j_end + 1 ]
-					)
-				) {
+					) ) {
 #ifdef _DEBUG
  #ifdef _H_GRB_REFERENCE_OMP_BLAS2
-					#pragma omp critical
+						#pragma omp critical
  #endif
-					std::cout << "\t binary search for " << end << " in [ " << j_left_range
-						<< ", " << j_right_range << " ) = [ " << A.CCS.col_start[ j_left_range ]
-						<< ", " << A.CCS.col_start[ j_right_range ] << " ). "
-						<< "Currently tried and failed at " << j_end << "\n";
-#endif
-					if( j_right_range == j_left_range ) {
-						assert( false );
-						break;
-					} else if( A.CCS.col_start[ j_end ] > end ) {
-						j_right_range = j_end;
-					} else {
-						j_left_range = j_end + 1;
+						std::cout << "\t binary search for " << end << " in [ " << j_left_range
+							<< ", " << j_right_range << " ) = [ " << A.CCS.col_start[ j_left_range ]
+							<< ", " << A.CCS.col_start[ j_right_range ] << " ). "
+							<< "Currently tried and failed at " << j_end << "\n";
+#endif
+						if( j_right_range == j_left_range ) {
+							assert( false );
+							break;
+						} else if( A.CCS.col_start[ j_end ] > end ) {
+							j_right_range = j_end;
+						} else {
+							j_left_range = j_end + 1;
+						}
+						assert( j_right_range >= j_left_range );
+						j_end = j_right_range - j_left_range;
+						j_end /= 2;
+						j_end += j_left_range;
 					}
-					assert( j_right_range >= j_left_range );
-					j_end = j_right_range - j_left_range;
-					j_end /= 2;
-					j_end += j_left_range;
 				}
 				if( j_start > j_end ) {
 					j_start = j_end;
diff --git a/include/graphblas/reference/vector.hpp b/include/graphblas/reference/vector.hpp
index fcd2516d8..f0db908b2 100644
--- a/include/graphblas/reference/vector.hpp
+++ b/include/graphblas/reference/vector.hpp
@@ -244,8 +244,6 @@ namespace grb {
 
 		friend class PinnedVector< D, BSP1D >;
 
-		friend class Vector< D, nonblocking, internal::Coordinates< nonblocking > >;
-
 		template< typename ValueType, Backend backend >
 		friend Vector<
 			ValueType, backend,
@@ -857,28 +855,6 @@ namespace grb {
 #endif
 			}
 
-			/**
-			 * Constructs a reference vector.
-			 *
-			 * @see Full description in base backend.
-			 */
-			Vector( const std::initializer_list< D > &vals )
-				: Vector( vals.size(), vals.size() )
-			{
-#ifdef _DEBUG
-				std::cerr << "In Vector< reference >::Vector( initializer_list )"
-					<< " constructor\n";
-#endif
-
-#ifdef _H_GRB_REFERENCE_OMP_VECTOR
-				#pragma omp parallel for simd
-#endif
-				for( size_t i = 0; i < vals.size(); ++i ) {
-					_raw[ i ] = *( vals.begin() + i );
-				}
-				_coordinates.assignAll();
-			}
-
 			/**
 			 * The default constructor creates an empty vector and should never be
 			 * used explicitly.
diff --git a/include/transition/blas_sparse.h b/include/transition/blas_sparse.h
index 83e186f8e..63839723e 100644
--- a/include/transition/blas_sparse.h
+++ b/include/transition/blas_sparse.h
@@ -21,39 +21,6 @@
  * This is the ALP implementation of a subset of the NIST Sparse BLAS standard.
  * While the API is standardised, this header makes some implementation-specific
  * extensions.
- *
- * @author A. N. Yzelman
- * @date 2023
- */
-
-/**
- * \defgroup TRANS Transition path
- *
- * The transition path libraries enable integrating ALP with existing software.
- * It operates by exposing several of its functionalities via established C
- * interfaces and established data formats in order to facilitate the transition
- * of legacy software to ALP. Ideally, users of transition interfaces need only
- * re-compile and link their software; in some cases, trivial modifications
- * might be required to migrate to transition interfaces, e.g., changing the
- * prefix of called functions.
- *
- * The currently exposed interfaces are:
- *  - \ref SPARSEBLAS;
- *  - \ref SPBLAS; and
- *  - \ref TRANS_SOLVERS.
- *
- * All of these transition libraries show-case ALP's ability to quickly wrap
- * around external APIs, thus simplifying integration of ALP-backed code with
- * existing software. We do note, however, that the direct use of the native C++
- * ALP API may lead to higher performance than the use of these transition path
- * interfaces, and that in some cases the legacy interface itself is what makes
- * achieving such higher performance impossible.
- *
- * The current transition path interfaces are at an *experimental prototype
- * phase*; in particular, not all primitives in a given standard API are
- * currently implemented. For \ref SPARSEBLAS in particular, additional support
- * or coverage may freely be requested in GitHub issue #14. For other
- * interfaces, feel free to open new issues or to contact the maintainers.
  */
 
 #ifndef _H_ALP_SPARSEBLAS_NIST
@@ -65,41 +32,6 @@
 extern "C" {
 #endif
 
-/**
- * \defgroup SPARSEBLAS SparseBLAS
- * \ingroup TRANS
- *
- * A SparseBLAS implementation enabled by ALP/GraphBLAS
- *
- * ALP provides a (presently partial) implementation of the Sparse BLAS standard
- * as defined by the BLAS forum and in the following paper:
- *  - Duff, Iain S., Michael A. Heroux, and Roldan Pozo. "An overview of the
- *    sparse basic linear algebra subprograms: The new standard from the BLAS
- *    technical forum." ACM Transactions on Mathematical Software (TOMS) 28(2),
- *    2002, pp. 239-267.
- *
- * We also provide a couple of extensions over this standard, in particular to
- * add support for sparse vectors. Such extensions are prefixed by
- * <tt>EXTBLAS_</tt> and <tt>extblas_</tt>, such as, for example,
- *  - #EXTBLAS_dusv_begin and
- *  - #extblas_sparse_vector.
- * This prefix can be configured differently, please refer to the developer
- * documentation if looking for this option.
- *
- * The functionalities defined by the standard of course retain the prefix
- * defined by the standard: <tt>BLAS_</tt> and <tt>blas_</tt>, such as, e.g.,
- *  - #BLAS_duscr_begin and
- *  - #blas_sparse_matrix.
- *
- * The implementation of this standard is done by mapping back to the equivalent
- * ALP/GraphBLAS primitives. By default, ALP builds both sequential and shared-
- * memory parallel SparseBLAS libraries. It does so simply by compiling the same
- * ALP-based SparseBLAS implementation with a sequential and a shared-memory ALP
- * backend, respectively.
- *
- * @{
- */
-
 /**
  * The possible transposition types.
  *
@@ -261,8 +193,8 @@ int BLAS_dusmm(
 int EXTBLAS_dusmsv(
 	const enum blas_trans_type transa,
 	const double alpha, const blas_sparse_matrix A,
-	const EXTBLAS_TYPE( sparse_vector ) x,
-	EXTBLAS_TYPE( sparse_vector ) y
+	const extblas_sparse_vector x,
+	extblas_sparse_vector y
 );
 
 /**
@@ -397,8 +329,6 @@ int EXTBLAS_dusm_clear( blas_sparse_matrix A );
  */
 int EXTBLAS_free();
 
-/**@}*/ // ends the SparseBLAS doxygen group
-
 #ifdef __cplusplus
 } // end extern "C"
 #endif
diff --git a/include/transition/blas_sparse_vec.h b/include/transition/blas_sparse_vec.h
index efbb8256a..dd44f0ec2 100644
--- a/include/transition/blas_sparse_vec.h
+++ b/include/transition/blas_sparse_vec.h
@@ -21,47 +21,17 @@
  * This is an ALP-specific extension to the NIST Sparse BLAS standard, which
  * the ALP libsparseblas transition path also introduces to the de-facto spblas
  * standard.
- *
- * @author A. N. Yzelman
- * @date 2023
  */
 
 #ifndef _H_ALP_SPARSEBLAS_EXT_VEC
 #define _H_ALP_SPARSEBLAS_EXT_VEC
 
-/**
- * \addtogroup SPARSEBLAS
- * @{
- */
-
-/**@{*/
-/** \internal Helper macros for #EXTBLAS_FUN and #EXTBLAS_TYPE */
-#define __SPBLAS_CONC( _a, _b ) _a ## _b
-#define __SPBLAS_CONCAT( _a, _b ) __SPBLAS_CONC( _a, _b )
-#define SPCONCAT( _a, _b ) __SPBLAS_CONCAT( _a, _b )
-/**@}*/
-
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-/**@{*/
-/**
- * \internal
- *
- * Allows renaming our non-standard functions with some other prefix.
- *
- * The default prefixes are <tt>EXTBLAS_</tt> for functions and
- * <tt>extblas_</tt> for types.
- *
- * \endinternal
- */
-#define EXTBLAS_FUN( name ) SPCONCAT( EXTBLAS_, name )
-#define EXTBLAS_TYPE( name ) SPCONCAT( extblas_, name )
-/**@}*/
-
 /** A sparse vector. This is an implementation-specific extension. */
-typedef void * EXTBLAS_TYPE( sparse_vector );
+typedef void * extblas_sparse_vector;
 
 /**
  * Creates a handle to a new sparse vector that holds no entries.
@@ -72,7 +42,7 @@ typedef void * EXTBLAS_TYPE( sparse_vector );
  *
  * @returns An #extblas_sparse_vector that is under construction.
  */
-EXTBLAS_TYPE( sparse_vector ) EXTBLAS_FUN( dusv_begin )( const int n );
+extblas_sparse_vector EXTBLAS_dusv_begin( const int n );
 
 /**
  * Inserts a new nonzero entry into a sparse vector that is under construction.
@@ -90,8 +60,8 @@ EXTBLAS_TYPE( sparse_vector ) EXTBLAS_FUN( dusv_begin )( const int n );
  *
  * This is an implementation-specific extension.
  */
-int EXTBLAS_FUN( dusv_insert_entry )(
-	EXTBLAS_TYPE( sparse_vector ) x,
+int EXTBLAS_dusv_insert_entry(
+	extblas_sparse_vector x,
 	const double val,
 	const int index
 );
@@ -108,7 +78,7 @@ int EXTBLAS_FUN( dusv_insert_entry )(
  *
  * This is an implementation-specific extension.
  */
-int EXTBLAS_FUN( dusv_end )( EXTBLAS_TYPE( sparse_vector ) x );
+int EXTBLAS_dusv_end( extblas_sparse_vector x );
 
 /**
  * Destroys the given sparse vector.
@@ -122,7 +92,7 @@ int EXTBLAS_FUN( dusv_end )( EXTBLAS_TYPE( sparse_vector ) x );
  *
  * This is an implementation-specific extension.
  */
-int EXTBLAS_FUN( dusvds )( EXTBLAS_TYPE( sparse_vector ) x );
+int EXTBLAS_dusvds( extblas_sparse_vector x );
 
 /**
  * Retrieves the number of nonzeroes in a given finalised sparse vector.
@@ -137,7 +107,7 @@ int EXTBLAS_FUN( dusvds )( EXTBLAS_TYPE( sparse_vector ) x );
  *
  * This is an implementation-specific extension.
  */
-int EXTBLAS_FUN( dusv_nz )( const EXTBLAS_TYPE( sparse_vector ) x, int * nz );
+int EXTBLAS_dusv_nz( const extblas_sparse_vector x, int * nz );
 
 /**
  * Opens a sparse vector for read-out.
@@ -154,7 +124,7 @@ int EXTBLAS_FUN( dusv_nz )( const EXTBLAS_TYPE( sparse_vector ) x, int * nz );
  *
  * This is an implementation-specific extension.
  */
-int EXTBLAS_FUN( dusv_open )( const EXTBLAS_TYPE( sparse_vector ) x );
+int EXTBLAS_dusv_open( const extblas_sparse_vector x );
 
 /**
  * Retrieves a sparse vector entry.
@@ -184,8 +154,8 @@ int EXTBLAS_FUN( dusv_open )( const EXTBLAS_TYPE( sparse_vector ) x );
  *
  * This is an implementation-specific extension.
  */
-int EXTBLAS_FUN( dusv_get )(
-	const EXTBLAS_TYPE( sparse_vector ) x,
+int EXTBLAS_dusv_get(
+	const extblas_sparse_vector x,
 	double * const val, int * const ind
 );
 
@@ -200,7 +170,7 @@ int EXTBLAS_FUN( dusv_get )(
  *
  * This is an implementation-specific extension.
  */
-int EXTBLAS_FUN( dusv_close )( const EXTBLAS_TYPE( sparse_vector ) x );
+int EXTBLAS_dusv_close( const extblas_sparse_vector x );
 
 /**
  * Removes all entries from a finalised sparse vector.
@@ -213,9 +183,7 @@ int EXTBLAS_FUN( dusv_close )( const EXTBLAS_TYPE( sparse_vector ) x );
  *
  * This is an implementation-specific extension.
  */
-int EXTBLAS_FUN( dusv_clear )( EXTBLAS_TYPE( sparse_vector ) x );
-
-/**@}*/ // end doxygen grouping for SPARSEBLAS
+int EXTBLAS_dusv_clear( extblas_sparse_vector x );
 
 #ifdef __cplusplus
 } // end extern "C"
diff --git a/include/transition/spblas.h b/include/transition/spblas.h
new file mode 100644
index 000000000..4bcc41fc9
--- /dev/null
+++ b/include/transition/spblas.h
@@ -0,0 +1,211 @@
+
+/*
+ *   Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ *
+ * This is the ALP implementation of a subset of the de-facto *_spblas.h Sparse
+ * BLAS standard. This implementation uses the spblas_ prefix; e.g.,
+ * #spblas_dcsrgemv.
+ *
+ * All functions defined have <tt>void</tt> return types. This implies two
+ * important factors:
+ *   1. when breaking the contract defined in the API, undefined behaviour will
+ *      occur.
+ *   2. this API hence does not permit the graceful handling of any errors that
+ *      ALP would normally recover gracefully from, such as, but not limited to,
+ *      the detection of dimension mismatches.
+ */
+
+#ifndef _H_ALP_SPBLAS
+#define _H_ALP_SPBLAS
+
+#include "blas_sparse_vec.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Performs sparse matrix--vector multiplication.
+ *
+ * This function computes one of
+ *  - \f$ y \to Ax \f$, or
+ *  - \f$ y \to A^Tx \f$.
+ *
+ * The matrix \f$ A \f$ is \f$ m \times n \f$ and holds \f$ k \f$ nonzeroes,
+ * and is assumed to be stored in Compressed Row Storage (CRS).
+ *
+ * @param[in] transa Either 'N' or 'T' for transposed ('T') or not ('N').
+ * @param[in] m      The row size of \f$ A \f$.
+ * @param[in] a      The nonzero value array of \f$ A \f$ of size \f$ k \f$.
+ * @param[in] ia     The row offset array of \f$ A \f$ of size \f$ m+1 \f$.
+ * @param[in] ja     The column indices of nonzeroes of \f$ A \f$. Must be of
+ *                   size \f$ k \f$.
+ * @param[in] x      The dense input vector \f$ x \f$ of length \f$ n \f$.
+ * @param[out] y     The dense output vector \f$ y \f$ of length \f$ m \f$.
+ *
+ * All memory regions must be pre-allocated and initialised.
+ */
+void spblas_dcsrgemv(
+	const char * transa,
+	const int * m,
+	const double * a, const int * ia, const int * ja,
+	const double * x,
+	double * y
+);
+
+/**
+ * Computes a variant of \f$ C \to \alpha AB+\beta C \f$.
+ *
+ * The matrix \f$ A \f$ is sparse and employs the Compressed Row Storage (CRS).
+ * The matrices \f$ B, C \f$ are dense. \f$ A \f$ has size \f$ m \times k \f$,
+ * \f$ B \f$ is \f$ k \times n \f$ and \f$ C \f$ is \f$ m \times n \f$.
+ *
+ * @param[in] transa    Either 'N' or 'T'.
+ * @param[in] m, n, k   Pointers to integers that equal \f$ m, n, k \f$, resp.
+ * @param[in] alpha     Pointer to the scalar \f$ \alpha \f$.
+ * @param[in] matdescra Has several entries. Going from first to last:
+ *                      Either 'G', 'S', 'H', 'T', 'A', or 'D' (similar to MatrixMarket)
+ *                      Either 'L' or 'U', in the case of 'T' (triangular)
+ *                      Either 'N' or 'U' for the diagonal type
+ *                      Either 'F' or 'C' (one or zero based indexing)
+ * @param[in] val       The values of the nonzeroes in \f$ A \f$.
+ * @param[in] indx      The column index of the nonzeroes in \f$ A \f$.
+ * @param[in] pntrb     The Compressed Row Storage (CRS) row start array.
+ * @param[in] pntre     The array \a pntrb shifted by one.
+ * @param[in] b         Pointer to the values of \f$ B \f$.
+ * @param[in] ldb       Leading dimension of \a b. If in row-major format, this
+ *                      should be \f$ n \f$. If in column-major format, this
+ *                      should be \f$ k \f$.
+ * @param[in] beta      Pointer to the scalar \f$ \beta \f$.
+ * @param[in] c         Pointer to the values of \f$ C \f$.
+ * @param[in] ldc       Leading dimension of \a c. If in row-major format, this
+ *                      should be \f$ n \f$. If in column-major format, this
+ *                      should be \f$ m \f$.
+ */
+void spblas_dcsrmm(
+	const char * transa,
+	const int * m, const int * n, const int * k,
+	const double * alpha,
+	const char * matdescra, const double * val, const int * indx,
+	const int * pntrb, const int * pntre,
+	const double * b, const int * ldb,
+	const double * beta,
+	double * c, const int * ldc
+);
+
+/**
+ * Computes \f$ C \to AB \f$ or \f$ C \to A^TB \f$, where all matrices are
+ * sparse and employ the Compressed Row Storage (CRS).
+ *
+ * The matrix \f$ C \f$ is \f$ m \times n \f$, the matrix \f$ A \f$ is
+ * \f$ m \times k \f$, and the matrix \f$ B \f$ is \f$ k \times n \f$.
+ *
+ * @param[in] trans Either 'N' or 'T', indicating whether A is to be transposed.
+ *                  The Hermitian operator on \a A is currently not supported;
+ *                  if required, please submit a ticket.
+ * @param[in] request A pointer to an integer that reads either 0, 1, or 2.
+ *                    0: the output memory area has been pre-allocated and is
+ *                       guaranteed sufficient for storing the output
+ *                    1: a symbolic phase will be executed that only modifies
+ *                       the row offset array \a ic. This array must have been
+ *                       pre-allocated and of sufficient size (\f$ m+1 \f$).
+ *                    2: assumes 1 has executed prior to this call and that the
+ *                       contents of the row offset arrays have not been
+ *                       modified. It also assumes that the column index and
+ *                       value arrays are (now) of sufficient size to hold the
+ *                       output.
+ * @param[in] sort A pointer to an integer value of 7. All other values are not
+ *                 supported by this interface. If you require it, please submit
+ *                 a ticket.
+ * @param[in] m,n,k Pointers to the integer sizes of \a A, \a B, and \a C.
+ * @param[in] a     The value array of nonzeroes in \a A.
+ * @param[in] ja    The column index array of nonzeroes in \a A.
+ * @param[in] ia    The row offset array of nonzeroes in \a A.
+ * @param[in] b, ib, jb  Similar for the nonzeroes in \a B.
+ * @param[out] c, ic, jc Similar for the nonzeroes in \a C. For these parameters
+ *                       depending on \a request there are various assumptions
+ *                       on capacity and, for \a ic, contents.
+ * @param[in] nzmax A pointer to an integer that holds the capacity of \a c and
+ *                  \a jc.
+ * @param[out] info The integer pointed to will be set to 0 if the call was
+ *                  successful, -1 if the routine only computed the required
+ *                  size of \a c and \a jc (stored in \a ic), and any positive
+ *                  integer when computation has proceeded successfully until
+ *                  (but not including) the returned integer.
+ */
+void spblas_dcsrmultcsr(
+	const char * trans, const int * request, const int * sort,
+	const int * m, const int * n, const int * k,
+	double * a, int * ja, int * ia,
+	double * b, int * jb, int * ib,
+	double * c, int * jc, int * ic,
+	const int * nzmax, int * info
+);
+
+/**
+ * Performs sparse matrix--sparse vector multiplication.
+ *
+ * This extension performs one of
+ *  -# \f$ y \to y + \alpha A x \f$, or
+ *  -# \f$ y \to y + \alpha A^T x \f$.
+ *
+ * Here, \f$ A \f$ is assumed in Compressed Row Storage (CRS), while \f$ x \f$
+ * and \f$ y \f$ are assumed to be using the #extblas_sparse_vector extension.
+ *
+ * This API follows loosely that of #spblas_dcsrmultcsr.
+ *
+ * @param[in] trans Either 'N' or 'T', indicating whether A is to be transposed.
+ *                  The Hermitian operator on \a A is currently not supported;
+ *                  if required, please submit a ticket.
+ * @param[in] request A pointer to an integer that reads either 0 or 1
+ *                    0: the output vector is guaranteed to have sufficient
+ *                       capacity to hold the output of the computation.
+ *                    1: a symbolic phase will be executed that only modifies
+ *                       the capacity of the output vector so that it is
+ *                       guaranteed to be able to hold the output of the
+ *                       requested computation.
+ * @param[in] m, n Pointers to integers equal to \f$ m, n \f$.
+ * @param[in] a  The value array of the nonzeroes in \f$ A \f$.
+ * @param[in] ja The column indices of the nonzeroes in \f$ A \f$.
+ * @param[in] ia The row offset arrays of the nonzeroes in \f$ A \f$.
+ * @param[in]  x The sparse input vector.
+ * @param[out] y The sparse output vector.
+ *
+ * This is an ALP implementation-specific extension.
+ */
+void extspblas_dcsrmultsv(
+	const char * trans, const int * request,
+	const int * m, const int * n,
+	const double * a, const int * ja, const int * ia,
+	const extblas_sparse_vector x,
+	extblas_sparse_vector y
+);
+
+/**
+ * An extension that frees any buffers the ALP/GraphBLAS-generated SparseBLAS
+ * library may have allocated.
+ */
+void extspblas_free();
+
+#ifdef __cplusplus
+} // end extern "C"
+#endif
+
+#endif // end _H_ALP_SPBLAS
+
diff --git a/src/transition/CMakeLists.txt b/src/transition/CMakeLists.txt
index 5c6abcc72..daf7252e0 100644
--- a/src/transition/CMakeLists.txt
+++ b/src/transition/CMakeLists.txt
@@ -18,79 +18,48 @@
 # This file creates the basic target(s) needed by all backends
 #
 
-assert_defined_variables( WITH_REFERENCE_BACKEND WITH_OMP_BACKEND WITH_NONBLOCKING_BACKEND )
+assert_defined_variables( WITH_REFERENCE_BACKEND WITH_OMP_BACKEND )
 
-function( add_transition_library target_name lib_type lib_name src1 )
-
-	set( multiValueArgs
-		"SOURCES"
-		"PUBLIC_LINK_LIBRARIES"
-		"PRIVATE_LINK_LIBRARIES"
+if( WITH_REFERENCE_BACKEND )
+	add_library( sparseblas_static STATIC
+		${CMAKE_CURRENT_SOURCE_DIR}/sparseblas.cpp
 	)
-	cmake_parse_arguments( parsed "" "" "${multiValueArgs}" "SOURCES;${src1};${ARGN}" )
 
-	add_library( ${target_name} ${lib_type} ${parsed_SOURCES} )
-	set_target_properties( ${target_name} PROPERTIES
-		OUTPUT_NAME ${lib_name}
+	set_target_properties( sparseblas_static PROPERTIES
+		OUTPUT_NAME "sparseblas"
 		ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/shmem"
 	)
 
-	target_link_libraries( ${target_name} PUBLIC transition_headers ${parsed_PUBLIC_LINK_LIBRARIES} )
-	target_link_libraries( ${target_name} PRIVATE backend_flags ${parsed_PRIVATE_LINK_LIBRARIES} )
-	add_dependencies( libs ${target_name} )
-	install( TARGETS ${target_name} EXPORT GraphBLASTargets
-		ARCHIVE DESTINATION "${SHMEM_BACKEND_INSTALL_DIR}"
-	)
+	target_link_libraries( sparseblas_static PUBLIC backend_reference transition )
 
-endfunction( add_transition_library )
+	target_link_libraries( sparseblas_static PRIVATE backend_flags )
 
-if( WITH_REFERENCE_BACKEND )
-	add_transition_library( sparseblas_sequential_static STATIC "sparseblas_sequential" ${CMAKE_CURRENT_SOURCE_DIR}/sparseblas.cpp
-		PUBLIC_LINK_LIBRARIES backend_reference
-	)
+	add_dependencies( libs sparseblas_static )
 
-	# this is the version for sequantial execution only
-	add_transition_library( ${_SPBLAS_PREFIX}sequential STATIC "${_SPBLAS_PREFIX}sequential"
-		${CMAKE_CURRENT_SOURCE_DIR}/spblas.cpp PUBLIC_LINK_LIBRARIES backend_reference
+	install( TARGETS sparseblas_static
+		EXPORT GraphBLASTargets
+		ARCHIVE DESTINATION "${SHMEM_BACKEND_INSTALL_DIR}"
 	)
-	target_compile_definitions( ${_SPBLAS_PREFIX}sequential PUBLIC SPBLAS_PREFIX=${_SPBLAS_PREFIX} )
-
-	if( ENABLE_SOLVER_LIB )
-		add_transition_library( spsolver_sequential STATIC "spsolver_sequential" ${CMAKE_CURRENT_SOURCE_DIR}/solver.cpp
-			PRIVATE_LINK_LIBRARIES backend_reference
-		)
-	endif()
-endif()
+endif( WITH_REFERENCE_BACKEND )
 
 if( WITH_OMP_BACKEND )
-	add_transition_library( sparseblas_shmem_parallel_static STATIC "sparseblas_shmem_parallel" ${CMAKE_CURRENT_SOURCE_DIR}/sparseblas.cpp
-		PRIVATE_LINK_LIBRARIES backend_reference_omp
+	add_library( sparseblas_omp_static STATIC
+		${CMAKE_CURRENT_SOURCE_DIR}/sparseblas.cpp
 	)
 
-	# this is the "default" version (parallel)
-	add_transition_library( ${_SPBLAS_PREFIX}shmem_parallel STATIC "${_SPBLAS_PREFIX}shmem_parallel"
-		${CMAKE_CURRENT_SOURCE_DIR}/spblas.cpp PRIVATE_LINK_LIBRARIES backend_reference_omp
+	set_target_properties( sparseblas_omp_static PROPERTIES
+		OUTPUT_NAME "sparseblas_omp"
+		ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/shmem"
 	)
-	target_compile_definitions( ${_SPBLAS_PREFIX}shmem_parallel PUBLIC SPBLAS_PREFIX=${_SPBLAS_PREFIX} )
 
-	if( ENABLE_EXTRA_SOLVER_LIBS )
-		add_transition_library( spsolver_shmem_blocking STATIC "spsolver_shmem_blocking" ${CMAKE_CURRENT_SOURCE_DIR}/solver.cpp
-			PRIVATE_LINK_LIBRARIES backend_reference_omp
-		)
-	endif()
-endif()
+	target_link_libraries( sparseblas_omp_static PUBLIC backend_reference_omp transition )
 
-if( WITH_NONBLOCKING_BACKEND )
-	if( ENABLE_SOLVER_LIB )
-		add_transition_library( spsolver_shmem_parallel STATIC "spsolver_shmem_parallel" ${CMAKE_CURRENT_SOURCE_DIR}/solver.cpp
-			PRIVATE_LINK_LIBRARIES backend_nonblocking
-		)
+	target_link_libraries( sparseblas_omp_static PRIVATE backend_flags )
 
-		# same binary name of KML library
-		# https://www.hikunpeng.com/document/detail/en/kunpengaccel/math-lib/devg-kml/kunpengaccel_kml_16_0011.html
-		add_transition_library( ksolver STATIC "ksolver" ${CMAKE_CURRENT_SOURCE_DIR}/kml_iss.cpp
-			PRIVATE_LINK_LIBRARIES spsolver_nonblocking_static
-		)
-	endif()
-endif()
+	add_dependencies( libs sparseblas_omp_static )
 
+	install( TARGETS sparseblas_omp_static
+		EXPORT GraphBLASTargets
+		ARCHIVE DESTINATION "${SHMEM_BACKEND_INSTALL_DIR}"
+	)
+endif()
diff --git a/src/transition/sparseblas.cpp b/src/transition/sparseblas.cpp
index 6d3471258..60fb1f055 100644
--- a/src/transition/sparseblas.cpp
+++ b/src/transition/sparseblas.cpp
@@ -15,17 +15,7 @@
  * limitations under the License.
  */
 
-/**
- * @file
- *
- * Implements the Sparse BLAS standard using ALP/GraphBLAS.
- *
- * @author A. N. Yzelman
- * @date 2023
- */
-
 #include "blas_sparse.h"
-#include "sparse_vector_impl.hpp"
 
 #include <limits>
 #include <vector>
@@ -252,6 +242,76 @@ namespace sparseblas {
 
 	};
 
+	/**
+	 * \internal A sparse vector that is either under construction, or finalized as
+	 *           an ALP/GraphBLAS vector.
+	 */
+	template< typename T >
+	class SparseVector {
+
+		public:
+
+			int n;
+			bool finalized;
+			grb::Vector< T > * vector;
+			typename grb::Vector< T >::const_iterator start, end;
+
+		private:
+
+			std::vector< T > uc_vals;
+			std::vector< int > uc_inds;
+
+		public:
+
+			SparseVector( const int &_n ) :
+				n( _n ), finalized( false ), vector( nullptr )
+			{}
+
+			~SparseVector() {
+				if( finalized ) {
+					assert( vector != nullptr );
+					delete vector;
+				} else {
+					assert( vector == nullptr );
+				}
+			}
+
+			void add( const T &val, const int &index ) {
+				assert( !finalized );
+				uc_vals.push_back( val );
+				uc_inds.push_back( index );
+			}
+
+			void finalize() {
+				assert( uc_vals.size() == uc_inds.size() );
+				const size_t nz = uc_vals.size();
+				vector = new grb::Vector< T >( n, nz );
+				if( vector == nullptr ) {
+					std::cerr << "Could not create ALP/GraphBLAS vector of size " << n
+						<< " and capacity " << nz << "\n";
+					throw std::runtime_error( "Could not create ALP/GraphBLAS vector" );
+				}
+				if( grb::capacity( *vector ) < nz ) {
+					throw std::runtime_error( "ALP/GraphBLAS vector has insufficient "
+						"capacity" );
+				}
+				const grb::RC rc = grb::buildVector(
+					*vector,
+					uc_inds.cbegin(), uc_inds.cend(),
+					uc_vals.cbegin(), uc_vals.cend(),
+					grb::SEQUENTIAL
+				);
+				if( rc != grb::SUCCESS ) {
+					throw std::runtime_error( "Could not ingest nonzeroes into ALP/GraphBLAS "
+						"vector" );
+				}
+				uc_vals.clear();
+				uc_inds.clear();
+				finalized = true;
+			}
+
+	};
+
 	/**
 	 * \internal SparseBLAS allows a matrix to be under construction or finalized.
 	 *           This class matches that concept -- for non-finalized matrices, it
@@ -331,20 +391,95 @@ namespace sparseblas {
 	 * \internal Utility function that converts a #extblas_sparse_vector to a
 	 *           sparseblas::SparseVector. This is for vectors of doubles.
 	 */
-	static native::SparseVector< double > * getDoubleVector(
-		EXTBLAS_TYPE( sparse_vector ) x
-	) {
-		return static_cast< native::SparseVector< double >* >( x );
+	SparseVector< double > * getDoubleVector( extblas_sparse_vector x ) {
+		return static_cast< SparseVector< double >* >( x );
 	}
 
 	/**
 	 * \internal Utility function that converts a #blas_sparse_matrix to a
 	 *           sparseblas::SparseMatrix. This is for matrices of doubles.
 	 */
-	static SparseMatrix< double > * getDoubleMatrix( blas_sparse_matrix A ) {
+	SparseMatrix< double > * getDoubleMatrix( blas_sparse_matrix A ) {
 		return static_cast< SparseMatrix< double >* >( A );
 	}
 
+	/**
+	 * \internal Internal buffer used for output matrix containers.
+	 */
+	char * buffer = nullptr;
+
+	/**
+	 * \internal The size of #buffer.
+	 */
+	size_t buffer_size = 0;
+
+	/**
+	 * @returns false if and only if buffer allocation failed.
+	 * @returns true on success.
+	 */
+	template< typename T >
+	bool getBuffer(
+		char * &bitmask, char * &stack, T * &valbuf,
+		const size_t size
+	) {
+		typedef typename grb::internal::Coordinates< grb::config::default_backend >
+			Coors;
+		constexpr const size_t b = grb::config::CACHE_LINE_SIZE::value();
+
+		// catch trivial case
+		if( size == 0 ) {
+			bitmask = stack = nullptr;
+			valbuf = nullptr;
+			return true;
+		}
+
+		// compute required size
+		size_t reqSize = Coors::arraySize( size ) + Coors::stackSize( size ) +
+			(size * sizeof(T)) + 3 * b;
+
+		// ensure buffer is at least the required size
+		if( buffer == nullptr ) {
+			assert( buffer_size == 0 );
+			buffer_size = reqSize;
+			buffer = static_cast< char * >( malloc( buffer_size ) );
+			if( buffer == nullptr ) {
+				buffer_size = 0;
+				return false;
+			}
+		} else if( buffer_size < reqSize ) {
+			free( buffer );
+			buffer_size = std::max( reqSize, 2 * buffer_size );
+			buffer = static_cast< char * >( malloc( buffer_size ) );
+			if( buffer == nullptr ) {
+				buffer_size = 0;
+				return false;
+			}
+		}
+
+		// set buffers and make sure they are aligned
+		char * walk = buffer;
+		uintptr_t cur_mod = reinterpret_cast< uintptr_t >(walk) % b;
+		if( cur_mod > 0 ) {
+			walk += (b - cur_mod);
+		}
+		bitmask = walk;
+		walk += Coors::arraySize( size );
+		cur_mod = reinterpret_cast< uintptr_t >(walk) % b;
+		if( cur_mod > 0 ) {
+			walk += (b - cur_mod);
+		}
+		stack = walk;
+		walk += Coors::stackSize( size );
+		cur_mod = reinterpret_cast< uintptr_t >(walk) % b;
+		if( cur_mod > 0 ) {
+			walk += (b - cur_mod);
+		}
+		valbuf = reinterpret_cast< T * >( walk);
+
+		// done
+		return true;
+	}
+
 } // end namespace sparseblas
 
 namespace std {
@@ -374,536 +509,823 @@ namespace std {
 
 // implementation of the SparseBLAS API follows
 
-EXTBLAS_TYPE( sparse_vector ) EXTBLAS_FUN( dusv_begin )( const int n ) {
-	return new native::SparseVector< double >( n );
-}
-
-int EXTBLAS_FUN( dusv_insert_entry )(
-	EXTBLAS_TYPE( sparse_vector ) x,
-	const double val,
-	const int index
-) {
-	auto vector = sparseblas::getDoubleVector( x );
-	assert( !(vector->finalized) );
-	try {
-		vector->add( val, index );
-	} catch( ... ) {
-		return 20;
-	}
-	return 0;
-}
-
-int EXTBLAS_FUN( dusv_end )( EXTBLAS_TYPE( sparse_vector ) x ) {
-	auto vector = sparseblas::getDoubleVector( x );
-	assert( !(vector->finalized) );
-	try {
-		vector->finalize();
-	} catch( ... ) {
-		return 30;
-	}
-	return 0;
-}
-
-int EXTBLAS_FUN( dusvds )( EXTBLAS_TYPE( sparse_vector ) x ) {
-	auto vector = sparseblas::getDoubleVector( x );
-	delete vector;
-	return 0;
-}
-
-int EXTBLAS_FUN( dusv_nz )(
-	const EXTBLAS_TYPE( sparse_vector ) x,
-	int * const nz
-) {
-	auto vector = sparseblas::getDoubleVector( x );
-	assert( vector->finalized );
-	const size_t nnz = grb::nnz( *(vector->vector) );
-	if( nnz > static_cast< size_t >( std::numeric_limits< int >::max() ) ) {
-		std::cerr << "Number of nonzeroes is larger than what can be represented by "
-			<< "a SparseBLAS int!\n";
-		return 10;
-	}
-	*nz = static_cast< int >(nnz);
-	return 0;
-}
-
-int EXTBLAS_FUN( dusv_clear )( EXTBLAS_TYPE( sparse_vector ) x ) {
-	auto vector = sparseblas::getDoubleVector( x );
-	assert( vector->finalized );
-	const grb::RC rc = grb::clear( *(vector->vector) );
-	if( rc != grb::SUCCESS ) {
-		return 10;
-	}
-	return 0;
-}
-
-int EXTBLAS_FUN( dusv_open )( const EXTBLAS_TYPE( sparse_vector ) x ) {
-	auto vector = sparseblas::getDoubleVector( x );
-	assert( vector->finalized );
-	try {
-		vector->start = vector->vector->cbegin();
-		vector->end = vector->vector->cend();
-	} catch( ... ) {
-		return 10;
-	}
-	return 0;
-}
-
-int EXTBLAS_FUN( dusv_get )(
-	const EXTBLAS_TYPE( sparse_vector ) x,
-	double * const val, int * const ind
-) {
-	auto vector = sparseblas::getDoubleVector( x );
-	assert( vector->finalized );
-	assert( vector->start != vector->end );
-	assert( val != nullptr );
-	assert( ind != nullptr );
-	*val = vector->start->second;
-	*ind = vector->start->first;
-	try {
-		(void) ++(vector->start);
-	} catch( ... ) {
-		return 2;
-	}
-	if( vector->start == vector->end ) {
+extern "C" {
+
+	extblas_sparse_vector EXTBLAS_dusv_begin( const int n ) {
+		return new sparseblas::SparseVector< double >( n );
+	}
+
+	int EXTBLAS_dusv_insert_entry(
+		extblas_sparse_vector x,
+		const double val,
+		const int index
+	) {
+		auto vector = sparseblas::getDoubleVector( x );
+		assert( !(vector->finalized) );
+		try {
+			vector->add( val, index );
+		} catch( ... ) {
+			return 20;
+		}
 		return 0;
-	} else {
-		return 1;
-	}
-}
-
-int EXTBLAS_FUN( dusv_close )( const EXTBLAS_TYPE( sparse_vector ) x ) {
-	auto vector = sparseblas::getDoubleVector( x );
-	assert( vector->finalized );
-	vector->start = vector->end;
-	return 0;
-}
-
-blas_sparse_matrix BLAS_duscr_begin( const int m, const int n ) {
-	return new sparseblas::SparseMatrix< double >( m, n );
-}
-
-int BLAS_duscr_insert_entry(
-	blas_sparse_matrix A,
-	const double val, const int row, const int col
-) {
-	auto matrix = sparseblas::getDoubleMatrix( A );
-	assert( matrix->finalized == false );
-	assert( matrix->ingest != nullptr );
-	try {
-		matrix->ingest->add( val, row, col );
-	} catch( ... ) {
-		return 2;
-	}
-	return 0;
-}
-
-int BLAS_duscr_insert_entries(
-	blas_sparse_matrix A,
-	const int nnz,
-	const double * vals, const int * rows, const int * cols
-) {
-	auto matrix = sparseblas::getDoubleMatrix( A );
-	assert( matrix->finalized == false );
-	assert( matrix->ingest != nullptr );
-	try {
-		for( int k = 0; k < nnz; ++k ) {
-			matrix->ingest->add( vals[ k ], rows[ k ], cols[ k ] );
-		}
-	} catch( ... ) {
-		return 3;
-	}
-	return 0;
-}
-
-int BLAS_duscr_insert_col(
-	blas_sparse_matrix A,
-	const int j, const int nnz,
-	const double * vals, const int * rows
-) {
-	auto matrix = sparseblas::getDoubleMatrix( A );
-	assert( matrix->finalized == false );
-	assert( matrix->ingest != nullptr );
-	try {
-		for( int k = 0; k < nnz; ++k ) {
-			matrix->ingest->add( vals[ k ], rows[ k ], j );
-		}
-	} catch( ... ) {
-		return 4;
-	}
-	return 0;
-}
-
-int BLAS_duscr_insert_row(
-	blas_sparse_matrix A,
-	const int i, const int nnz,
-	const double * vals, const int * cols
-) {
-	auto matrix = sparseblas::getDoubleMatrix( A );
-	assert( matrix->finalized == false );
-	assert( matrix->ingest != nullptr );
-	try {
-		for( int k = 0; k < nnz; ++k ) {
-			matrix->ingest->add( vals[ k ], i, cols[ k ] );
-		}
-	} catch( ... ) {
-		return 5;
-	}
-	return 0;
-}
-
-int BLAS_duscr_end( blas_sparse_matrix A ) {
-	auto matrix = sparseblas::getDoubleMatrix( A );
-	assert( matrix->finalized == false );
-	assert( matrix->ingest != nullptr );
-	try {
-		matrix->finalize();
-	} catch( const std::runtime_error &e ) {
-		std::cerr << "Caught error: " << e.what() << "\n";
-		return 1;
-	}
-	return 0;
-}
-
-int EXTBLAS_dusm_clear( blas_sparse_matrix A ) {
-	auto matrix = sparseblas::getDoubleMatrix( A );
-	assert( matrix->finalized );
-	const grb::RC rc = grb::clear( *(matrix->A) );
-	if( rc != grb::SUCCESS ) {
-		return 10;
-	}
-	return 0;
-}
-
-int BLAS_usds( blas_sparse_matrix A ) {
-	delete sparseblas::getDoubleMatrix( A );
-	return 0;
-}
-
-int BLAS_dusmv(
-	const enum blas_trans_type transa,
-	const double alpha, const blas_sparse_matrix A,
-	const double * x, int incx,
-	double * const y, const int incy
-) {
-	grb::Semiring<
-		grb::operators::add< double >, grb::operators::mul< double >,
-		grb::identities::zero, grb::identities::one
-	> ring;
-	auto matrix = sparseblas::getDoubleMatrix( A );
-	if( alpha != 1.0 ) {
-		grb::Vector< double > output = grb::internal::template
-			wrapRawVector< double >( matrix->m, y );
-		const grb::RC rc = grb::foldl< grb::descriptors::dense >(
-			output, 1.0 / alpha, ring.getMultiplicativeOperator() );
-		if( rc != grb::SUCCESS ) {
-			std::cerr << "Error during pre-scaling during SpMV\n";
-			return 50;
+	}
+
+	int EXTBLAS_dusv_end( extblas_sparse_vector x ) {
+		auto vector = sparseblas::getDoubleVector( x );
+		assert( !(vector->finalized) );
+		try {
+			vector->finalize();
+		} catch( ... ) {
+			return 30;
 		}
+		return 0;
 	}
-	if( incx != 1 || incy != 1 ) {
-		// TODO: requires ALP views
-		std::cerr << "Strided input and/or output vectors are not supported.\n";
-		return 255;
+
+	int EXTBLAS_dusvds( extblas_sparse_vector x ) {
+		auto vector = sparseblas::getDoubleVector( x );
+		delete vector;
+		return 0;
 	}
-	if( !(matrix->finalized) ) {
-		std::cerr << "Input matrix was not yet finalised; see BLAS_duscr_end.\n";
-		return 100;
+
+	int EXTBLAS_dusv_nz( const extblas_sparse_vector x, int * const nz ) {
+		auto vector = sparseblas::getDoubleVector( x );
+		assert( vector->finalized );
+		const size_t nnz = grb::nnz( *(vector->vector) );
+		if( nnz > static_cast< size_t >( std::numeric_limits< int >::max() ) ) {
+			std::cerr << "Number of nonzeroes is larger than what can be represented by "
+				<< "a SparseBLAS int!\n";
+			return 10;
+		}
+		*nz = static_cast< int >(nnz);
+		return 0;
 	}
-	assert( matrix->finalized );
-	if( transa == blas_no_trans ) {
-		const grb::Vector< double > input = grb::internal::template
-			wrapRawVector< double >( matrix->n, x );
-		grb::Vector< double > output = grb::internal::template
-			wrapRawVector< double >( matrix->m, y );
-		const grb::RC rc = grb::mxv< grb::descriptors::dense >(
-			output, *(matrix->A), input, ring
-		);
+
+	int EXTBLAS_dusv_clear( extblas_sparse_vector x ) {
+		auto vector = sparseblas::getDoubleVector( x );
+		assert( vector->finalized );
+		const grb::RC rc = grb::clear( *(vector->vector) );
 		if( rc != grb::SUCCESS ) {
-			std::cerr << "ALP/GraphBLAS returns error during SpMV: "
-				<< grb::toString( rc ) << ".\n";
-			return 200;
+			return 10;
 		}
-	} else {
-		const grb::Vector< double > input = grb::internal::template
-			wrapRawVector< double >( matrix->m, x );
-		grb::Vector< double > output = grb::internal::template
-			wrapRawVector< double >( matrix->n, y );
-		const grb::RC rc = grb::mxv<
-			grb::descriptors::dense |
-			grb::descriptors::transpose_matrix
-		>(
-			output, *(matrix->A), input, ring
-		);
+		return 0;
+	}
+
+	int EXTBLAS_dusv_open( const extblas_sparse_vector x ) {
+		auto vector = sparseblas::getDoubleVector( x );
+		assert( vector->finalized );
+		try {
+			vector->start = vector->vector->cbegin();
+			vector->end = vector->vector->cend();
+		} catch( ... ) {
+			return 10;
+		}
+		return 0;
+	}
+
+	int EXTBLAS_dusv_get(
+		const extblas_sparse_vector x,
+		double * const val, int * const ind
+	) {
+		auto vector = sparseblas::getDoubleVector( x );
+		assert( vector->finalized );
+		assert( vector->start != vector->end );
+		assert( val != nullptr );
+		assert( ind != nullptr );
+		*val = vector->start->second;
+		*ind = vector->start->first;
+		try {
+			(void) ++(vector->start);
+		} catch( ... ) {
+			return 2;
+		}
+		if( vector->start == vector->end ) {
+			return 0;
+		} else {
+			return 1;
+		}
+	}
+
+	int EXTBLAS_dusv_close( const extblas_sparse_vector x ) {
+		auto vector = sparseblas::getDoubleVector( x );
+		assert( vector->finalized );
+		vector->start = vector->end;
+		return 0;
+	}
+
+	blas_sparse_matrix BLAS_duscr_begin( const int m, const int n ) {
+		return new sparseblas::SparseMatrix< double >( m, n );
+	}
+
+	int BLAS_duscr_insert_entry(
+		blas_sparse_matrix A,
+		const double val, const int row, const int col
+	) {
+		auto matrix = sparseblas::getDoubleMatrix( A );
+		assert( matrix->finalized == false );
+		assert( matrix->ingest != nullptr );
+		try {
+			matrix->ingest->add( val, row, col );
+		} catch( ... ) {
+			return 2;
+		}
+		return 0;
+	}
+
+	int BLAS_duscr_insert_entries(
+		blas_sparse_matrix A,
+		const int nnz,
+		const double * vals, const int * rows, const int * cols
+	) {
+		auto matrix = sparseblas::getDoubleMatrix( A );
+		assert( matrix->finalized == false );
+		assert( matrix->ingest != nullptr );
+		try {
+			for( int k = 0; k < nnz; ++k ) {
+				matrix->ingest->add( vals[ k ], rows[ k ], cols[ k ] );
+			}
+		} catch( ... ) {
+			return 3;
+		}
+		return 0;
+	}
+
+	int BLAS_duscr_insert_col(
+		blas_sparse_matrix A,
+		const int j, const int nnz,
+		const double * vals, const int * rows
+	) {
+		auto matrix = sparseblas::getDoubleMatrix( A );
+		assert( matrix->finalized == false );
+		assert( matrix->ingest != nullptr );
+		try {
+			for( int k = 0; k < nnz; ++k ) {
+				matrix->ingest->add( vals[ k ], rows[ k ], j );
+			}
+		} catch( ... ) {
+			return 4;
+		}
+		return 0;
+	}
+
+	int BLAS_duscr_insert_row(
+		blas_sparse_matrix A,
+		const int i, const int nnz,
+		const double * vals, const int * cols
+	) {
+		auto matrix = sparseblas::getDoubleMatrix( A );
+		assert( matrix->finalized == false );
+		assert( matrix->ingest != nullptr );
+		try {
+			for( int k = 0; k < nnz; ++k ) {
+				matrix->ingest->add( vals[ k ], i, cols[ k ] );
+			}
+		} catch( ... ) {
+			return 5;
+		}
+		return 0;
+	}
+
+	int BLAS_duscr_end( blas_sparse_matrix A ) {
+		auto matrix = sparseblas::getDoubleMatrix( A );
+		assert( matrix->finalized == false );
+		assert( matrix->ingest != nullptr );
+		try {
+			matrix->finalize();
+		} catch( const std::runtime_error &e ) {
+			std::cerr << "Caught error: " << e.what() << "\n";
+			return 1;
+		}
+		return 0;
+	}
+
+	int EXTBLAS_dusm_clear( blas_sparse_matrix A ) {
+		auto matrix = sparseblas::getDoubleMatrix( A );
+		assert( matrix->finalized );
+		const grb::RC rc = grb::clear( *(matrix->A) );
 		if( rc != grb::SUCCESS ) {
-			std::cerr << "ALP/GraphBLAS returns error during transposed SpMV: "
-				<< grb::toString( rc ) << ".\n";
-			return 200;
+			return 10;
 		}
+		return 0;
 	}
-	if( alpha != 1.0 ) {
-		grb::Vector< double > output = grb::internal::template
-			wrapRawVector< double >( matrix->m, y );
-		const grb::RC rc = grb::foldl< grb::descriptors::dense >(
-			output, alpha, ring.getMultiplicativeOperator() );
+
+	int BLAS_usds( blas_sparse_matrix A ) {
+		delete sparseblas::getDoubleMatrix( A );
+		return 0;
+	}
+
+	int BLAS_dusmv(
+		const enum blas_trans_type transa,
+		const double alpha, const blas_sparse_matrix A,
+		const double * x, int incx,
+		double * const y, const int incy
+	) {
+		grb::Semiring<
+			grb::operators::add< double >, grb::operators::mul< double >,
+			grb::identities::zero, grb::identities::one
+		> ring;
+		auto matrix = sparseblas::getDoubleMatrix( A );
+		if( alpha != 1.0 ) {
+			grb::Vector< double > output = grb::internal::template
+				wrapRawVector< double >( matrix->m, y );
+			const grb::RC rc = grb::foldl< grb::descriptors::dense >(
+				output, 1.0 / alpha, ring.getMultiplicativeOperator() );
+			if( rc != grb::SUCCESS ) {
+				std::cerr << "Error during pre-scaling during SpMV\n";
+				return 50;
+			}
+		}
+		if( incx != 1 || incy != 1 ) {
+			// TODO: requires ALP views
+			std::cerr << "Strided input and/or output vectors are not supported.\n";
+			return 255;
+		}
+		if( !(matrix->finalized) ) {
+			std::cerr << "Input matrix was not yet finalised; see BLAS_duscr_end.\n";
+			return 100;
+		}
+		assert( matrix->finalized );
+		if( transa == blas_no_trans ) {
+			const grb::Vector< double > input = grb::internal::template
+				wrapRawVector< double >( matrix->n, x );
+			grb::Vector< double > output = grb::internal::template
+				wrapRawVector< double >( matrix->m, y );
+			const grb::RC rc = grb::mxv< grb::descriptors::dense >(
+				output, *(matrix->A), input, ring
+			);
+			if( rc != grb::SUCCESS ) {
+				std::cerr << "ALP/GraphBLAS returns error during SpMV: "
+					<< grb::toString( rc ) << ".\n";
+				return 200;
+			}
+		} else {
+			const grb::Vector< double > input = grb::internal::template
+				wrapRawVector< double >( matrix->m, x );
+			grb::Vector< double > output = grb::internal::template
+				wrapRawVector< double >( matrix->n, y );
+			const grb::RC rc = grb::mxv<
+				grb::descriptors::dense |
+				grb::descriptors::transpose_matrix
+			>(
+				output, *(matrix->A), input, ring
+			);
+			if( rc != grb::SUCCESS ) {
+				std::cerr << "ALP/GraphBLAS returns error during transposed SpMV: "
+					<< grb::toString( rc ) << ".\n";
+				return 200;
+			}
+		}
+		if( alpha != 1.0 ) {
+			grb::Vector< double > output = grb::internal::template
+				wrapRawVector< double >( matrix->m, y );
+			const grb::RC rc = grb::foldl< grb::descriptors::dense >(
+				output, alpha, ring.getMultiplicativeOperator() );
+			if( rc != grb::SUCCESS ) {
+				std::cerr << "Error during post-scaling during SpMV\n";
+				return 250;
+			}
+		}
+		return 0;
+	}
+
+	void spblas_dcsrgemv(
+		const char * transa,
+		const int * m_p,
+		const double * a, const int * ia, const int * ja,
+		const double * x,
+		double * y
+	) {
+		// declare algebraic structures
+		grb::Semiring<
+			grb::operators::add< double >, grb::operators::mul< double >,
+			grb::identities::zero, grb::identities::one
+		> ring;
+		grb::Monoid<
+			grb::operators::max< int >, grb::identities::negative_infinity
+		> maxMonoid;
+
+		// declare minimum necessary descriptors
+		constexpr grb::Descriptor minDescr = grb::descriptors::dense |
+			grb::descriptors::force_row_major;
+
+		// determine matrix size
+		const int m = *m_p;
+		const grb::Vector< int > columnIndices =
+			grb::internal::template wrapRawVector< int >( ia[ m ], ja );
+		int n = 0;
+		grb::RC rc = foldl( n, columnIndices, maxMonoid );
 		if( rc != grb::SUCCESS ) {
-			std::cerr << "Error during post-scaling during SpMV\n";
-			return 250;
-		}
-	}
-	return 0;
-}
-
-int BLAS_dusmm(
-	const enum blas_order_type order,
-	const enum blas_trans_type transa,
-	const int nrhs,
-	const double alpha, const blas_sparse_matrix A,
-	const double * B, const int ldb,
-	const double * C, const int ldc
-) {
-	(void) order;
-	(void) transa;
-	(void) nrhs;
-	(void) alpha;
-	(void) A;
-	(void) B;
-	(void) ldb;
-	(void) C;
-	(void) ldc;
-	// TODO requires dense ALP and mixed sparse/dense ALP operations
-	std::cerr << "BLAS_dusmm (sparse matrix times dense matrix) has not yet "
-		<< "been implemented.\n";
-	assert( false );
-	return 255;
-}
-
-int EXTBLAS_dusmsv(
-	const enum blas_trans_type transa,
-	const double alpha, const blas_sparse_matrix A,
-	const EXTBLAS_TYPE( sparse_vector ) x,
-	EXTBLAS_TYPE( sparse_vector ) y
-) {
-	grb::Semiring<
-		grb::operators::add< double >, grb::operators::mul< double >,
-		grb::identities::zero, grb::identities::one
-	> ring;
-	auto matrix = sparseblas::getDoubleMatrix( A );
-	auto input  = sparseblas::getDoubleVector( x );
-	auto output = sparseblas::getDoubleVector( y );
-	if( !(matrix->finalized) ) {
-		std::cerr << "Uninitialised input matrix during SpMSpV\n";
-		return 10;
-	}
-	if( !(input->finalized) ) {
-		std::cerr << "Uninitialised input vector during SpMSpV\n";
-		return 20;
-	}
-	if( !(output->finalized) ) {
-		std::cerr << "Uninitialised output vector during SpMSpV\n";
-		return 30;
-	}
-	grb::RC rc = grb::SUCCESS;
-	if( alpha != 1.0 ) {
-		rc = grb::foldl( *(output->vector), 1.0 / alpha,
-			ring.getMultiplicativeOperator() );
+			std::cerr << "Could not determine matrix column size\n";
+			assert( false );
+			return;
+		}
+
+		// retrieve buffers (only when A needs to be output also)
+		//char * const bitmask = sparseblas::getBitmask( n );
+		//char * const stack = sparseblas::getStack( n );
+		//double * const buffer = sparseblas::template getBuffer< double >( n );
+
+		// retrieve necessary ALP/GraphBLAS container wrappers
+		const grb::Matrix< double, grb::config::default_backend, int, int, int > A =
+			grb::internal::wrapCRSMatrix( a, ja, ia, m, n );
+		const grb::Vector< double > input = grb::internal::template
+			wrapRawVector< double >( n, x );
+		grb::Vector< double > output = grb::internal::template
+			wrapRawVector< double >( m, y );
+
+		// set output vector to zero
+		rc = grb::set( output, ring.template getZero< double >() );
 		if( rc != grb::SUCCESS ) {
-			std::cerr << "Error during pre-scaling of SpMSpV\n";
-			return 40;
+			std::cerr << "Could not set output vector to zero\n";
+			assert( false );
+			return;
 		}
+
+		// do either y=Ax or y=A^Tx
+		if( transa[0] == 'N' ) {
+			rc = grb::mxv< minDescr >(
+				output, A, input, ring
+			);
+			if( rc != grb::SUCCESS ) {
+				std::cerr << "ALP/GraphBLAS returns error during SpMV: "
+					<< grb::toString( rc ) << ".\n";
+				assert( false );
+				return;
+			}
+		} else {
+			// Hermitian is not supported
+			assert( transa[0] == 'T' );
+			rc = grb::mxv<
+				minDescr |
+				grb::descriptors::transpose_matrix
+			>(
+				output, A, input, ring
+			);
+			if( rc != grb::SUCCESS ) {
+				std::cerr << "ALP/GraphBLAS returns error during transposed SpMV: "
+					<< grb::toString( rc ) << ".\n";
+				assert( false );
+				return;
+			}
+		}
+
+		// done
 	}
-	if( transa == blas_no_trans ) {
-		rc = grb::mxv( *(output->vector), *(matrix->A), *(input->vector), ring );
-	} else {
-		rc = grb::mxv< grb::descriptors::transpose_matrix >(
-			*(output->vector), *(matrix->A), *(input->vector), ring );
+
+	int BLAS_dusmm(
+		const enum blas_order_type order,
+		const enum blas_trans_type transa,
+		const int nrhs,
+		const double alpha, const blas_sparse_matrix A,
+		const double * B, const int ldb,
+		const double * C, const int ldc
+	) {
+		(void) order;
+		(void) transa;
+		(void) nrhs;
+		(void) alpha;
+		(void) A;
+		(void) B;
+		(void) ldb;
+		(void) C;
+		(void) ldc;
+		// TODO requires dense ALP and mixed sparse/dense ALP operations
+		std::cerr << "BLAS_dusmm (sparse matrix times dense matrix) has not yet "
+			<< "been implemented.\n";
+		assert( false );
+		return 255;
 	}
-	if( rc != grb::SUCCESS ) {
-		std::cerr << "Error during call to grb::mxv (SpMSpV)\n";
-		return 50;
+
+	void spblas_dcsrmm(
+		const char * const transa,
+		const int * m, const int * n, const int * k,
+		const double * alpha,
+		const char * matdescra, const double * val, const int * indx,
+		const int * pntrb, const int * pntre,
+		const double * b, const int * ldb,
+		const double * beta,
+		double * c, const int * ldc
+	) {
+		assert( transa[0] == 'N' || transa[0] == 'T' );
+		assert( m != NULL );
+		assert( n != NULL );
+		assert( k != NULL );
+		assert( alpha != NULL );
+		// not sure yet what constraints if any on matdescra
+		if( *m > 0 && *k > 0 ) {
+			assert( pntrb != NULL );
+			assert( pntre != NULL );
+		}
+		// val and indx could potentially be NULL if there are no nonzeroes
+		assert( b != NULL );
+		assert( ldb != NULL );
+		assert( beta != NULL );
+		assert( c != NULL );
+		assert( ldc != NULL );
+		(void) transa;
+		(void) m; (void) n; (void) k;
+		(void) alpha;
+		(void) matdescra; (void) val; (void) indx; (void) pntrb; (void) pntre;
+		(void) b; (void) ldb;
+		(void) beta;
+		(void) c; (void) ldc;
+		// requires dense ALP and mixed sparse/dense operations
+		assert( false );
 	}
-	if( alpha != 1.0 ) {
-		rc = grb::foldl( *(output->vector), alpha,
-			ring.getMultiplicativeOperator() );
+
+	int EXTBLAS_dusmsv(
+		const enum blas_trans_type transa,
+		const double alpha, const blas_sparse_matrix A,
+		const extblas_sparse_vector x,
+		extblas_sparse_vector y
+	) {
+		grb::Semiring<
+			grb::operators::add< double >, grb::operators::mul< double >,
+			grb::identities::zero, grb::identities::one
+		> ring;
+		auto matrix = sparseblas::getDoubleMatrix( A );
+		auto input  = sparseblas::getDoubleVector( x );
+		auto output = sparseblas::getDoubleVector( y );
+		if( !(matrix->finalized) ) {
+			std::cerr << "Uninitialised input matrix during SpMSpV\n";
+			return 10;
+		}
+		if( !(input->finalized) ) {
+			std::cerr << "Uninitialised input vector during SpMSpV\n";
+			return 20;
+		}
+		if( !(output->finalized) ) {
+			std::cerr << "Uninitialised output vector during SpMSpV\n";
+			return 30;
+		}
+		grb::RC rc = grb::SUCCESS;
+		if( alpha != 1.0 ) {
+			rc = grb::foldl( *(output->vector), 1.0 / alpha,
+				ring.getMultiplicativeOperator() );
+			if( rc != grb::SUCCESS ) {
+				std::cerr << "Error during pre-scaling of SpMSpV\n";
+				return 40;
+			}
+		}
+		if( transa == blas_no_trans ) {
+			rc = grb::mxv( *(output->vector), *(matrix->A), *(input->vector), ring );
+		} else {
+			rc = grb::mxv< grb::descriptors::transpose_matrix >(
+				*(output->vector), *(matrix->A), *(input->vector), ring );
+		}
 		if( rc != grb::SUCCESS ) {
-			std::cerr << "Error during post-scaling of SpMSpV\n";
-			return 60;
+			std::cerr << "Error during call to grb::mxv (SpMSpV)\n";
+			return 50;
 		}
+		if( alpha != 1.0 ) {
+			rc = grb::foldl( *(output->vector), alpha,
+				ring.getMultiplicativeOperator() );
+			if( rc != grb::SUCCESS ) {
+				std::cerr << "Error during post-scaling of SpMSpV\n";
+				return 60;
+			}
+		}
+		return 0;
 	}
-	return 0;
-}
-
-int EXTBLAS_dusmsm(
-	const enum blas_trans_type transa,
-	const double alpha, const blas_sparse_matrix A,
-	const enum blas_trans_type transb, const blas_sparse_matrix B,
-	blas_sparse_matrix C
-) {
-	grb::Semiring<
-		grb::operators::add< double >, grb::operators::mul< double >,
-		grb::identities::zero, grb::identities::one
-	> ring;
-	auto matA = sparseblas::getDoubleMatrix( A );
-	auto matB = sparseblas::getDoubleMatrix( B );
-	auto matC = sparseblas::getDoubleMatrix( C );
-	if( !(matA->finalized) ) {
-		std::cerr << "Uninitialised left-hand input matrix during SpMSpM\n";
-		return 10;
-	}
-	if( !(matB->finalized) ) {
-		std::cerr << "Uninitialised right-hand input matrix during SpMSpM\n";
-		return 20;
-	}
-	if( !(matC->finalized) ) {
-		std::cerr << "Uninitialised output matrix during SpMSpM\n";
-		return 30;
-	}
-
-	grb::RC rc = grb::SUCCESS;
-	if( alpha != 1.0 ) {
-		/*const grb::RC rc = grb::foldl( *(matC->A), 1.0 / alpha,
-			ring.getMultiplicativeOperator() );
+
+	void extspblas_dcsrmultsv(
+		const char * trans, const int * request,
+		const int * m, const int * n,
+		const double * a, const int * ja, const int * ia,
+		const extblas_sparse_vector x,
+		extblas_sparse_vector y
+	) {
+		grb::Semiring<
+			grb::operators::add< double >, grb::operators::mul< double >,
+			grb::identities::zero, grb::identities::one
+		> ring;
+		const grb::Matrix< double, grb::config::default_backend, int, int, int > A =
+			grb::internal::wrapCRSMatrix( a, ja, ia, *m, *n );
+		auto input  = sparseblas::getDoubleVector( x );
+		auto output = sparseblas::getDoubleVector( y );
+		if( !(input->finalized) ) {
+			throw std::runtime_error( "Uninitialised input vector during SpMSpV\n" );
+		}
+		if( !(output->finalized) ) {
+			throw std::runtime_error( "Uninitialised output vector during SpMSpV\n" );
+		}
+		if( request[ 0 ] != 0 && request[ 1 ] != 1 ) {
+			throw std::runtime_error( "Illegal request during call to dcsrmultsv\n" );
+		}
+		grb::Phase phase = grb::EXECUTE;
+		if( request[ 0 ] == 1 ) {
+			phase = grb::RESIZE;
+		}
+		grb::RC rc;
+		if( trans[0] == 'N' ) {
+			rc = grb::mxv< grb::descriptors::force_row_major >( *(output->vector), A,
+				*(input->vector), ring, phase );
+		} else {
+			if( trans[1] != 'T' ) {
+				throw std::runtime_error( "Illegal trans argument to dcsrmultsv\n" );
+			}
+			rc = grb::mxv<
+				grb::descriptors::force_row_major |
+				grb::descriptors::transpose_matrix
+			>( *(output->vector), A, *(input->vector), ring, phase );
+		}
 		if( rc != grb::SUCCESS ) {
-			std::cerr << "Error during pre-scaling for SpMSpM\n";
-			return 40;
+			throw std::runtime_error( "ALP/GraphBLAS returns error during call to "
+				"SpMSpV: " + grb::toString( rc ) );
+		}
+	}
+
+	int EXTBLAS_dusmsm(
+		const enum blas_trans_type transa,
+		const double alpha, const blas_sparse_matrix A,
+		const enum blas_trans_type transb, const blas_sparse_matrix B,
+		blas_sparse_matrix C
+	) {
+		grb::Semiring<
+			grb::operators::add< double >, grb::operators::mul< double >,
+			grb::identities::zero, grb::identities::one
+		> ring;
+		auto matA = sparseblas::getDoubleMatrix( A );
+		auto matB = sparseblas::getDoubleMatrix( B );
+		auto matC = sparseblas::getDoubleMatrix( C );
+		if( !(matA->finalized) ) {
+			std::cerr << "Uninitialised left-hand input matrix during SpMSpM\n";
+			return 10;
+		}
+		if( !(matB->finalized) ) {
+			std::cerr << "Uninitialised right-hand input matrix during SpMSpM\n";
+			return 20;
+		}
+		if( !(matC->finalized) ) {
+			std::cerr << "Uninitialised output matrix during SpMSpM\n";
+			return 30;
+		}
+
+		grb::RC rc = grb::SUCCESS;
+		if( alpha != 1.0 ) {
+			/*const grb::RC rc = grb::foldl( *(matC->A), 1.0 / alpha,
+				ring.getMultiplicativeOperator() );
+			if( rc != grb::SUCCESS ) {
+				std::cerr << "Error during pre-scaling for SpMSpM\n";
+				return 40;
+			}*/
+			// TODO requires level-3 fold in ALP/GraphBLAS
+			std::cerr << "Any other alpha from 1.0 is currently not supported for "
+				<< "SpMSpM multiplication\n";
+			return 255;
+		}
+
+		// resize phase
+		if( transa == blas_no_trans && transb == blas_no_trans ) {
+			rc = grb::mxm( *(matC->A), *(matA->A), *(matB->A), ring, grb::RESIZE );
+		} else if( transa != blas_no_trans && transb == blas_no_trans ) {
+			rc = grb::mxm< grb::descriptors::transpose_left >(
+				*(matC->A), *(matA->A), *(matB->A), ring, grb::RESIZE );
+		} else if( transa == blas_no_trans && transb != blas_no_trans ) {
+			rc = grb::mxm< grb::descriptors::transpose_right >(
+				*(matC->A), *(matA->A), *(matB->A), ring, grb::RESIZE );
+		} else {
+			assert( transa != blas_no_trans );
+			assert( transb != blas_no_trans );
+			rc = grb::mxm<
+				grb::descriptors::transpose_left |
+				grb::descriptors::transpose_right
+			>( *(matC->A), *(matA->A), *(matB->A), ring, grb::RESIZE );
+		}
+		if( rc != grb::SUCCESS ) {
+			std::cerr << "Error during call to ALP/GraphBLAS mxm (RESIZE phase): "
+				<< grb::toString( rc ) << "\n";
+			return 50;
+		}
+
+		// execute phase
+		if( transa == blas_no_trans && transb == blas_no_trans ) {
+			rc = grb::mxm( *(matC->A), *(matA->A), *(matB->A), ring );
+		} else if( transa != blas_no_trans && transb == blas_no_trans ) {
+			rc = grb::mxm< grb::descriptors::transpose_left >(
+				*(matC->A), *(matA->A), *(matB->A), ring );
+		} else if( transa == blas_no_trans && transb != blas_no_trans ) {
+			rc = grb::mxm< grb::descriptors::transpose_right >(
+				*(matC->A), *(matA->A), *(matB->A), ring );
+		} else {
+			assert( transa != blas_no_trans );
+			assert( transb != blas_no_trans );
+			rc = grb::mxm<
+				grb::descriptors::transpose_left |
+				grb::descriptors::transpose_right
+			>( *(matC->A), *(matA->A), *(matB->A), ring );
+		}
+		if( rc != grb::SUCCESS ) {
+			std::cerr << "Error during call to ALP/GraphBLAS mxm (EXECUTE phase): \n"
+				<< grb::toString( rc ) << "\n";
+			return 60;
+		}
+
+		/*TODO see above
+		if( alpha != 1.0 ) {
+			rc = grb::foldl( *(matC->A), 1.0 / alpha,
+				ring.getMultiplicativeOperator() );
+			if( rc != grb::SUCCESS ) {
+				std::cerr << "Error during post-scaling for SpMSpM\n";
+				return 70;
+			}
 		}*/
-		// TODO requires level-3 fold in ALP/GraphBLAS
-		std::cerr << "Any other alpha from 1.0 is currently not supported for "
-			<< "SpMSpM multiplication\n";
-		return 255;
+		return 0;
 	}
 
-	// resize phase
-	if( transa == blas_no_trans && transb == blas_no_trans ) {
-		rc = grb::mxm( *(matC->A), *(matA->A), *(matB->A), ring, grb::RESIZE );
-	} else if( transa != blas_no_trans && transb == blas_no_trans ) {
-		rc = grb::mxm< grb::descriptors::transpose_left >(
-			*(matC->A), *(matA->A), *(matB->A), ring, grb::RESIZE );
-	} else if( transa == blas_no_trans && transb != blas_no_trans ) {
-		rc = grb::mxm< grb::descriptors::transpose_right >(
-			*(matC->A), *(matA->A), *(matB->A), ring, grb::RESIZE );
-	} else {
-		assert( transa != blas_no_trans );
-		assert( transb != blas_no_trans );
-		rc = grb::mxm<
-			grb::descriptors::transpose_left |
-			grb::descriptors::transpose_right
-		>( *(matC->A), *(matA->A), *(matB->A), ring, grb::RESIZE );
-	}
-	if( rc != grb::SUCCESS ) {
-		std::cerr << "Error during call to ALP/GraphBLAS mxm (RESIZE phase): "
-			<< grb::toString( rc ) << "\n";
-		return 50;
-	}
-
-	// execute phase
-	if( transa == blas_no_trans && transb == blas_no_trans ) {
-		rc = grb::mxm( *(matC->A), *(matA->A), *(matB->A), ring );
-	} else if( transa != blas_no_trans && transb == blas_no_trans ) {
-		rc = grb::mxm< grb::descriptors::transpose_left >(
-			*(matC->A), *(matA->A), *(matB->A), ring );
-	} else if( transa == blas_no_trans && transb != blas_no_trans ) {
-		rc = grb::mxm< grb::descriptors::transpose_right >(
-			*(matC->A), *(matA->A), *(matB->A), ring );
-	} else {
-		assert( transa != blas_no_trans );
-		assert( transb != blas_no_trans );
-		rc = grb::mxm<
-			grb::descriptors::transpose_left |
-			grb::descriptors::transpose_right
-		>( *(matC->A), *(matA->A), *(matB->A), ring );
-	}
-	if( rc != grb::SUCCESS ) {
-		std::cerr << "Error during call to ALP/GraphBLAS mxm (EXECUTE phase): \n"
-			<< grb::toString( rc ) << "\n";
-		return 60;
-	}
-
-	/*TODO see above
-	if( alpha != 1.0 ) {
-		rc = grb::foldl( *(matC->A), 1.0 / alpha,
-			ring.getMultiplicativeOperator() );
+	void spblas_dcsrmultcsr(
+		const char * trans, const int * request, const int * sort,
+		const int * m_p, const int * n_p, const int * k_p,
+		double * a, int * ja, int * ia,
+		double * b, int * jb, int * ib,
+		double * c, int * jc, int * ic,
+		const int * nzmax, int * info
+	) {
+		assert( trans[0] == 'N' );
+		assert( sort != NULL && sort[0] == 7 );
+		assert( m_p != NULL );
+		assert( n_p != NULL );
+		assert( k_p != NULL );
+		assert( a != NULL ); assert( ja != NULL ); assert( ia != NULL );
+		assert( b != NULL ); assert( jb != NULL ); assert( ib != NULL );
+		assert( c != NULL ); assert( jc != NULL ); assert( ic != NULL );
+		assert( nzmax != NULL );
+		assert( info != NULL );
+
+		// declare algebraic structures
+		grb::Semiring<
+			grb::operators::add< double >, grb::operators::mul< double >,
+			grb::identities::zero, grb::identities::one
+		> ring;
+
+		// check support
+		if( trans[ 0 ] != 'N' ) {
+			std::cerr << "ALP/SparseBLAS, error: illegal trans argument to dcsrmultcsr\n";
+			*info = 4;
+		}
+		if( sort[ 0 ] != 7 ) {
+			std::cerr << "ALP/SparseBLAS, error: illegal sort argument to dcsrmultcsr\n";
+			*info = 5;
+			return;
+		}
+
+		// declare minimum necessary descriptors
+		constexpr const grb::Descriptor minDescr = grb::descriptors::dense |
+			grb::descriptors::force_row_major;
+
+		// determine matrix size
+		const int m = *m_p;
+		const int n = *n_p;
+		const int k = *k_p;
+
+		// retrieve buffers (only when A needs to be output also)
+		char * bitmask = nullptr;
+		char * stack = nullptr;
+		double * valbuf = nullptr;
+		if( sparseblas::template getBuffer< double >(
+				bitmask, stack, valbuf, n
+			) == false
+		) {
+			std::cerr << "ALP/SparseBLAS, error: could not allocate buffer for "
+				<< "computations on an output matrix\n";
+			*info = 10;
+			return;
+		}
+
+		// retrieve necessary ALP/GraphBLAS container wrappers
+		const grb::Matrix< double, grb::config::default_backend, int, int, int > A =
+			grb::internal::wrapCRSMatrix( a, ja, ia, m, k );
+		const grb::Matrix< double, grb::config::default_backend, int, int, int > B =
+			grb::internal::wrapCRSMatrix( b, jb, ib, k, n );
+		grb::Matrix< double, grb::config::default_backend, int, int, int > C =
+			grb::internal::wrapCRSMatrix(
+				c, jc, ic,
+				m, n, *nzmax,
+				bitmask, stack, valbuf
+			);
+
+		// set output vector to zero
+		grb::RC rc = grb::clear( C );
 		if( rc != grb::SUCCESS ) {
-			std::cerr << "Error during post-scaling for SpMSpM\n";
-			return 70;
-		}
-	}*/
-	return 0;
-}
-
-int EXTBLAS_dusm_nz( const blas_sparse_matrix A, int * nz ) {
-	auto matA = sparseblas::getDoubleMatrix( A );
-	if( !(matA->finalized) ) {
-		std::cerr << "Uninitialised left-hand input matrix during dusm_nz\n";
-		return 10;
-	}
-	const size_t grb_nz = grb::nnz( *(matA->A) );
-	if( grb_nz > static_cast< size_t >(std::numeric_limits< int >::max()) ) {
-		std::cerr << "Number of nonzeroes in given sparse matrix is larger than "
-			<< "what can be represented by a SparseBLAS int\n";
-		return 20;
-	}
-	*nz = static_cast< int >( grb_nz );
-	return 0;
-}
-
-int EXTBLAS_dusm_open( const blas_sparse_matrix A ) {
-	auto matA = sparseblas::getDoubleMatrix( A );
-	if( !(matA->finalized) ) {
-		std::cerr << "Uninitialised left-hand input matrix during dusm_nz\n";
-		return 10;
-	}
-	try{
-		matA->start = matA->A->cbegin();
-		matA->end = matA->A->cend();
-	} catch( ... ) {
-		std::cerr << "Could not retrieve matrix iterators\n";
-		return 20;
-	}
-	return 0;
-}
-
-int EXTBLAS_dusm_get(
-	const blas_sparse_matrix A,
-	double * value, int * row, int * col
-) {
-	auto matA = sparseblas::getDoubleMatrix( A );
-	if( !(matA->finalized) ) {
-		std::cerr << "Uninitialised left-hand input matrix during dusm_nz\n";
-		return 10;
-	}
-	assert( matA->start != matA->end );
-	const auto &triplet = *(matA->start);
-	*value = triplet.second;
-	*row = triplet.first.first;
-	*col = triplet.first.second;
-	try {
-		(void) ++(matA->start);
-	} catch( ... ) {
-		return 2;
-	}
-	if( matA->start == matA->end ) {
+			std::cerr << "ALP/SparseBLAS, error: Could not clear output matrix\n";
+			assert( false );
+			*info = 20;
+			return;
+		}
+
+		// do either C=AB or C=A^TB
+		if( trans[0] == 'N' ) {
+			if( *request == 1 ) {
+				rc = grb::mxm< minDescr >( C, A, B, ring, grb::RESIZE );
+			} else {
+				assert( *request == 0 || *request == 2 );
+				rc = grb::mxm< minDescr >( C, A, B, ring );
+			}
+			if( rc != grb::SUCCESS ) {
+				std::cerr << "ALP/SparseBLAS, error during call to SpMSpM: "
+					<< grb::toString( rc ) << ".\n";
+				assert( false );
+				*info = 30;
+				return;
+			}
+		} else {
+			// this case is not supported
+			assert( false );
+		}
+
+		// done
+		if( *request == 1 ) {
+			*info = -1;
+		} else {
+			*info = 0;
+		}
+	}
+
+	int EXTBLAS_dusm_nz( const blas_sparse_matrix A, int * nz ) {
+		auto matA = sparseblas::getDoubleMatrix( A );
+		if( !(matA->finalized) ) {
+			std::cerr << "Uninitialised left-hand input matrix during dusm_nz\n";
+			return 10;
+		}
+		const size_t grb_nz = grb::nnz( *(matA->A) );
+		if( grb_nz > static_cast< size_t >(std::numeric_limits< int >::max()) ) {
+			std::cerr << "Number of nonzeroes in given sparse matrix is larger than "
+				<< "what can be represented by a SparseBLAS int\n";
+			return 20;
+		}
+		*nz = static_cast< int >( grb_nz );
+		return 0;
+	}
+
+	int EXTBLAS_dusm_open( const blas_sparse_matrix A ) {
+		auto matA = sparseblas::getDoubleMatrix( A );
+		if( !(matA->finalized) ) {
+			std::cerr << "Uninitialised left-hand input matrix during dusm_nz\n";
+			return 10;
+		}
+		try{
+			matA->start = matA->A->cbegin();
+			matA->end = matA->A->cend();
+		} catch( ... ) {
+			std::cerr << "Could not retrieve matrix iterators\n";
+			return 20;
+		}
 		return 0;
-	} else {
-		return 1;
 	}
-}
 
-int EXTBLAS_dusm_close( const blas_sparse_matrix A ) {
-	auto matA = sparseblas::getDoubleMatrix( A );
-	if( !(matA->finalized) ) {
-		std::cerr << "Uninitialised left-hand input matrix during dusm_nz\n";
-		return 10;
+	int EXTBLAS_dusm_get(
+		const blas_sparse_matrix A,
+		double * value, int * row, int * col
+	) {
+		auto matA = sparseblas::getDoubleMatrix( A );
+		if( !(matA->finalized) ) {
+			std::cerr << "Uninitialised left-hand input matrix during dusm_nz\n";
+			return 10;
+		}
+		assert( matA->start != matA->end );
+		const auto &triplet = *(matA->start);
+		*value = triplet.second;
+		*row = triplet.first.first;
+		*col = triplet.first.second;
+		try {
+			(void) ++(matA->start);
+		} catch( ... ) {
+			return 2;
+		}
+		if( matA->start == matA->end ) {
+			return 0;
+		} else {
+			return 1;
+		}
 	}
-	matA->start = matA->end;
-	return 0;
-}
 
-int EXTBLAS_free() {
-	const grb::RC rc = grb::finalize();
-	if( rc != grb::SUCCESS ) {
-		std::cerr << "Error during call to EXTBLAS_free\n";
-		return 10;
+	int EXTBLAS_dusm_close( const blas_sparse_matrix A ) {
+		auto matA = sparseblas::getDoubleMatrix( A );
+		if( !(matA->finalized) ) {
+			std::cerr << "Uninitialised left-hand input matrix during dusm_nz\n";
+			return 10;
+		}
+		matA->start = matA->end;
+		return 0;
 	}
-	return 0;
-}
+
+	int EXTBLAS_free() {
+		if( sparseblas::buffer != nullptr || sparseblas::buffer_size > 0 ) {
+			assert( sparseblas::buffer != nullptr );
+			assert( sparseblas::buffer_size > 0 );
+			free( sparseblas::buffer );
+			sparseblas::buffer_size = 0;
+		}
+		const grb::RC rc = grb::finalize();
+		if( rc != grb::SUCCESS ) {
+			std::cerr << "Error during call to EXTBLAS_free\n";
+			return 10;
+		}
+		return 0;
+	}
+
+	void extspblas_free() {
+		(void) EXTBLAS_free();
+	}
+
+} // end extern "C"
 
diff --git a/tests/smoke/CMakeLists.txt b/tests/smoke/CMakeLists.txt
index 1f99446ee..1bef2efe5 100644
--- a/tests/smoke/CMakeLists.txt
+++ b/tests/smoke/CMakeLists.txt
@@ -180,6 +180,11 @@ add_grb_executables( kcore_decomposition kcore_decomposition.cpp
 	BACKENDS reference reference_omp hyperdags nonblocking bsp1d hybrid
 )
 
+# add_grb_executables( triangle_count triangle_count.cpp
+# 	ADDITIONAL_LINK_LIBRARIES test_utils_headers
+# 	BACKENDS reference reference_omp bsp1d hybrid hyperdags nonblocking
+# )
+
 # targets to list and build the test for this category
 get_property( smoke_tests_list GLOBAL PROPERTY tests_category_smoke )
 add_custom_target( "list_tests_category_smoke"
diff --git a/tests/smoke/smoketests.sh b/tests/smoke/smoketests.sh
index c069445d9..ed29d69c4 100755
--- a/tests/smoke/smoketests.sh
+++ b/tests/smoke/smoketests.sh
@@ -366,6 +366,34 @@ for BACKEND in ${BACKENDS[@]}; do
 			fi
 			echo " "
 
+			echo ">>>      [x]           [ ]       Testing the Triangle couting algorithm on the dwt_59.mtx"
+			if [ -f ${INPUT_DIR}/dwt_59.mtx ]; then
+				$runner ${TEST_BIN_DIR}/triangle_count_${BACKEND} ${INPUT_DIR}/dwt_59.mtx direct 30 1 1 &> ${TEST_OUT_DIR}/triangle_count_dwt_59_${BACKEND}_${P}_${T}.log
+				head -1 ${TEST_OUT_DIR}/triangle_count_dwt_59_${BACKEND}_${P}_${T}.log
+				if ! grep -q 'Test OK' ${TEST_OUT_DIR}/triangle_count_dwt_59_${BACKEND}_${P}_${T}.log; then
+					echo "Test FAILED"
+				else
+					echo "Test OK"
+				fi
+			else
+				echo "Test DISABLED: dwt_59.mtx was not found. To enable, please provide ${INPUT_DIR}/dwt_59.mtx"
+			fi
+			echo " "
+
+			echo ">>>      [x]           [ ]       Testing the Triangle couting algorithm on the gyro_m.mtx"
+			if [ -f ${INPUT_DIR}/gyro_m.mtx ]; then
+				$runner ${TEST_BIN_DIR}/triangle_count_${BACKEND} ${INPUT_DIR}/gyro_m.mtx direct 598470 1 1 &> ${TEST_OUT_DIR}/triangle_count_gyro_m_${BACKEND}_${P}_${T}.log
+				head -1 ${TEST_OUT_DIR}/triangle_count_gyro_m_${BACKEND}_${P}_${T}.log
+				if ! grep -q 'Test OK' ${TEST_OUT_DIR}/triangle_count_gyro_m_${BACKEND}_${P}_${T}.log; then
+					echo "Test FAILED"
+				else
+					echo "Test OK"
+				fi
+			else
+				echo "Test DISABLED: gyro_m.mtx was not found. To enable, please provide ${INPUT_DIR}/gyro_m.mtx"
+			fi
+			echo " "
+
 			if [ "$BACKEND" = "bsp1d" ] || [ "$BACKEND" = "hybrid" ]; then
 				echo "Additional standardised smoke tests not yet supported for the ${BACKEND} backend"
 				echo
diff --git a/tests/smoke/triangle_count.cpp b/tests/smoke/triangle_count.cpp
new file mode 100644
index 000000000..f7c8892db
--- /dev/null
+++ b/tests/smoke/triangle_count.cpp
@@ -0,0 +1,293 @@
+/*
+ *   Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <exception>
+#include <iostream>
+#include <vector>
+
+#ifdef _CG_COMPLEX
+#include <complex>
+#endif
+
+#include <inttypes.h>
+
+#include <graphblas/algorithms/triangle_count.hpp>
+#include <graphblas/utils/timer.hpp>
+#include <graphblas/utils/parser.hpp>
+
+#include <graphblas.hpp>
+#include <utils/output_verification.hpp>
+
+/** Must be an integer type (int, long, unsigned, etc.) */
+using BaseScalarType = int;
+#ifdef _CG_COMPLEX
+using IntegerType = std::complex< BaseScalarType >;
+#else
+using IntegerType = BaseScalarType;
+#endif
+
+constexpr BaseScalarType TOL = 0;
+constexpr size_t MAX_ITERS = 10000;
+
+using namespace grb;
+using namespace algorithms;
+
+struct input {
+	size_t inner_rep;
+	size_t outer_rep;
+	TriangleCountAlgorithm algorithm;
+	size_t expectedTriangleCount;
+	char filename[ 1024 ];
+	bool direct;
+};
+
+struct output {
+	RC rc = RC::SUCCESS;
+	size_t inner_rep;
+	size_t outer_rep;
+	size_t iterations;
+	size_t triangleCount;
+	grb::utils::TimerResults times;
+};
+
+bool parse_arguments( int argc, char ** argv, input & in, int& err );
+
+void grbProgram( const input & data_in, output & out ) {
+	// get user process ID
+	const size_t s = spmd<>::pid();
+	assert( s < spmd<>::nprocs() );
+	grb::utils::Timer timer;
+
+	// Sanity checks on input
+	if( data_in.filename[ 0 ] == '\0' ) {
+		std::cerr << s << ": no file name given as input." << std::endl;
+		out.rc = ILLEGAL;
+		return;
+	}
+
+
+	timer.reset();
+	// Create a local parser
+	grb::utils::MatrixFileReader<
+		void,
+		std::conditional<
+			( sizeof( grb::config::RowIndexType ) > sizeof( grb::config::ColIndexType ) ),
+			grb::config::RowIndexType,
+			grb::config::ColIndexType
+		>::type
+	> parser( data_in.filename, data_in.direct );
+	assert( parser.m() == parser.n() );
+	const size_t n = parser.n();
+	// Load the matrix, first as a pattern, then copy it into a matrix with integer values
+	Matrix< IntegerType > A( n, n );
+	{
+		Matrix< void > A_pattern( n, n );
+		{
+			const RC rc = buildMatrixUnique( A_pattern,
+				parser.begin( SEQUENTIAL ), parser.end( SEQUENTIAL),
+				SEQUENTIAL
+			);
+			/* Once internal issue #342 is resolved this can be re-enabled
+			const RC rc = buildMatrixUnique( A_pattern,
+				parser.begin( PARALLEL ), parser.end( PARALLEL),
+				PARALLEL
+			);*/
+			if( rc != SUCCESS ) {
+				std::cerr << "Failure: call to buildMatrixUnique did not succeed "
+					<< "(" << toString( rc ) << ")." << std::endl;
+				return;
+			}
+		}
+		// Check number of non-zero entries between the parser and the matrix A_pattern
+		try {
+			const size_t global_nnz = nnz( A_pattern );
+			const size_t parser_nnz = parser.nz();
+			if( global_nnz != parser_nnz ) {
+				std::cerr << "Failure: global nnz (" << global_nnz << ") does not equal "
+					<< "parser nnz (" << parser_nnz << ")." << std::endl;
+				return;
+			}
+		} catch( const std::runtime_error & ) {
+			std::cout << "Info: nonzero check skipped as the number of nonzeroes "
+				<< "cannot be derived from the matrix file header. The "
+				<< "grb::Matrix reports " << nnz( A ) << " nonzeroes.\n";
+		}
+		// Build A from A_pattern, filled with static_cast< IntegerType>( 1 )
+		std::vector< size_t > rows, cols;
+		rows.reserve( nnz( A_pattern ) );
+		cols.reserve( nnz( A_pattern ) );
+		for( const std::pair< size_t, size_t > p : A_pattern ) {
+			// FIXME: this is a workaround while waiting for a masked version of mxm
+			if( p.first == p.second ) {
+				continue;
+			}
+
+			rows.push_back( p.first );
+			cols.push_back( p.second );
+		}
+		std::vector< IntegerType > values( rows.size(), static_cast< IntegerType >( 1 ) );
+		assert( SUCCESS ==
+			buildMatrixUnique( A, rows.data(), cols.data(), values.data(), values.size(), IOMode::SEQUENTIAL )
+		);
+	}
+	out.times.io = timer.time();
+
+	// Check that the input matrix does not contains self-loops
+	for( const auto & p : A ) {
+		if( p.first.first == p.first.second ) {
+			std::cerr << "Failure: input matrix contains self-loops." << std::endl;
+			return;
+		}
+	}
+
+	timer.reset();
+	// Allocate the buffers
+	Matrix< IntegerType > buffer( n, n );
+	Matrix< IntegerType > buffer2( n, n );
+	Matrix< IntegerType > L( n, n );
+	Matrix< IntegerType > U( n, n );
+	// Split A into L and U
+	grb::tril( L, A, Phase::RESIZE );
+	grb::triu( U, A, Phase::RESIZE );
+	grb::tril( L, A, Phase::EXECUTE );
+	grb::triu( U, A, Phase::EXECUTE );
+	out.times.preamble = timer.time();
+
+	timer.reset();
+	out.rc = triangle_count( data_in.algorithm, out.triangleCount, A, buffer, buffer2, L, U );
+	out.times.useful = timer.time();
+}
+
+int main( int argc, char ** argv ) {
+	(void)argc;
+	(void)argv;
+
+	std::cout << "Test executable: " << argv[ 0 ] << std::endl;
+
+	// Input struct
+	struct input in;
+	int err;
+	if( !parse_arguments( argc, argv, in, err ) ) {
+		return err;
+	}
+
+	std::cout << "Executable called with parameters " << in.filename << ", "
+		<< "inner repititions = " << in.inner_rep << ", and outer reptitions = "
+		<< in.outer_rep	<< std::endl;
+
+	// Run the test for all algorithms
+	RC all_algorithms_rc = RC::SUCCESS;
+	for( const std::pair< TriangleCountAlgorithm, std::string > & algo : TriangleCountAlgorithmNames ) {
+		in.algorithm = algo.first;
+		std::cout << "  -- Running algorithm " << algo.second << std::endl;
+
+		// Output struct
+		struct output out;
+		RC rc = RC::SUCCESS;
+
+		// Launch the estimator (if requested)
+		if( in.inner_rep == 0 ) {
+			grb::Launcher< AUTOMATIC > launcher;
+			rc = launcher.exec( &grbProgram, in, out, true );
+			if( rc == RC::SUCCESS ) {
+				in.inner_rep = out.inner_rep;
+			}
+			if( rc != RC::SUCCESS ) {
+				std::cerr << "launcher.exec returns with non-SUCCESS error code "
+					<< (int)rc << std::endl;
+				return 6;
+			}
+		}
+
+		// Launch the benchmarker
+		grb::Benchmarker< EXEC_MODE::AUTOMATIC > benchmarker;
+		rc = benchmarker.exec( &grbProgram, in, out, 1, in.outer_rep, true );
+		if( rc != RC::SUCCESS ) {
+			std::cerr << "benchmarker.exec returns with non-SUCCESS error code "
+				<< grb::toString( rc ) << std::endl;
+			return 8;
+		}
+		if( out.rc == RC::SUCCESS ) {
+			std::cout << "Benchmark completed successfully.\n";
+			std::cout << "** Obtained " << out.triangleCount << " triangles.\n";
+			std::cout << "** Expected " << in.expectedTriangleCount << " triangles.\n";
+			if( out.triangleCount != in.expectedTriangleCount ) {
+				all_algorithms_rc = RC::FAILED;
+			}
+		} else {
+			std::cerr << "Benchmark failed with error code "
+				<< grb::toString( out.rc ) << std::endl;
+			std::cerr << std::flush;
+			all_algorithms_rc = RC::FAILED;
+		}
+		std::cout << std::endl;
+	}
+
+	if( all_algorithms_rc == RC::SUCCESS ) {
+		std::cout << "Test OK" << std::endl;
+	} else {
+		std::cout << "Test FAILED" << std::endl;
+	}
+
+	return 0;
+}
+
+bool parse_arguments( int argc, char ** argv, input & in, int& err ) {
+	// Check if we are testing on a file
+	if( argc < 4 || argc > 6 ) {
+		std::cerr << "Usages: \n\t"
+			<< argv[ 0 ] << " <graph_filepath> <direct/indirect> <expected_triangle_count> (inner iterations) (outer iterations)"
+			<< std::endl;
+		err = 1;
+		return false;
+	}
+
+	// Get file name
+	(void)strncpy( in.filename, argv[ 1 ], 1023 );
+	in.filename[ 1023 ] = '\0';
+
+	// Get direct or indirect addressing
+	in.direct = ( strncmp( argv[ 2 ], "direct", 6 ) == 0 );
+
+	// Get the expected number of triangles
+	in.expectedTriangleCount = std::stoul( argv[ 3 ] );
+
+	// Get the inner number of iterations
+	in.inner_rep = grb::config::BENCHMARKING::inner();
+	char * end = nullptr;
+	if( argc > 4 ) {
+		in.inner_rep = strtoumax( argv[ 4 ], &end, 10 );
+		if( argv[ 4 ] == end ) {
+			std::cerr << "Could not parse argument " << argv[ 4 ] << " "
+				<< "for number of inner experiment repititions." << std::endl;
+			err = 4;
+			return false;
+		}
+	}
+
+	// Get the outer number of iterations
+	in.outer_rep = grb::config::BENCHMARKING::outer();
+	if( argc > 5 ) {
+		in.outer_rep = strtoumax( argv[ 5 ], &end, 10 );
+		if( argv[ 5 ] == end ) {
+			std::cerr << "Could not parse argument " << argv[ 5 ] << " "
+				<< "for number of outer experiment repititions." << std::endl;
+			err = 5;
+			return false;
+		}
+	}
+	return true;
+}
\ No newline at end of file
diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt
index ab0dac498..c24026aa3 100644
--- a/tests/unit/CMakeLists.txt
+++ b/tests/unit/CMakeLists.txt
@@ -251,10 +251,6 @@ add_grb_executables( eWiseApplyMatrixReference eWiseApplyMatrixReference.cpp
 	ADDITIONAL_LINK_LIBRARIES test_utils_headers
 )
 
-add_grb_executables( eWiseLambda eWiseLambda.cpp
-	BACKENDS reference reference_omp hyperdags nonblocking
-)
-
 add_grb_executables( outer outer.cpp
 	BACKENDS reference reference_omp hyperdags nonblocking
 )
@@ -300,10 +296,6 @@ add_grb_executables( adapterIterator adapterIterator.cpp
 	BACKENDS reference reference_omp hyperdags nonblocking bsp1d hybrid
 )
 
-add_grb_executables( vectorFromListConstructor vectorFromListConstructor.cpp
-	BACKENDS reference reference_omp bsp1d hybrid hyperdags nonblocking
-)
-
 # the below targets test successfully when they compile -- they do not need to
 # be executed successfully as part of the unit test suite.
 
diff --git a/tests/unit/unittests.sh b/tests/unit/unittests.sh
index 5b7dfc16a..0089d2674 100755
--- a/tests/unit/unittests.sh
+++ b/tests/unit/unittests.sh
@@ -407,12 +407,6 @@ for MODE in ${MODES}; do
 				grep 'Test OK' ${TEST_OUT_DIR}/buildVector_${MODE}_${BACKEND}_${P}_${T}.log || echo "Test FAILED"
 				echo " "
 
-				echo ">>>      [x]           [ ]       Testing grb::Vector( initializer_list ) constructor"
-				$runner ${TEST_BIN_DIR}/vectorFromListConstructor_${MODE}_${BACKEND} &> ${TEST_OUT_DIR}/vectorFromListConstructor_${MODE}_${BACKEND}_${P}_${T}.log
-				head -1 ${TEST_OUT_DIR}/vectorFromListConstructor_${MODE}_${BACKEND}_${P}_${T}.log
-				grep 'Test OK' ${TEST_OUT_DIR}/vectorFromListConstructor_${MODE}_${BACKEND}_${P}_${T}.log || echo "Test FAILED"
-				echo " "
-
 				echo ">>>      [x]           [ ]       Testing grb::vectorToMatrixConverter"
 				$runner ${TEST_BIN_DIR}/vectorToMatrix_${MODE}_${BACKEND} &> ${TEST_OUT_DIR}/vectorToMatrix_${MODE}_${BACKEND}_${P}_${T}.log
 				head -1 ${TEST_OUT_DIR}/vectorToMatrix_${MODE}_${BACKEND}_${P}_${T}.log
@@ -631,12 +625,6 @@ for MODE in ${MODES}; do
 				grep 'Test OK' ${TEST_OUT_DIR}/mxm_${MODE}_${BACKEND}_${P}_${T}.log || echo "Test FAILED"
 				echo " "
 
-				echo ">>>      [x]           [ ]       Testing grb::eWiseLambda on a small matrix"
-				$runner ${TEST_BIN_DIR}/eWiseLambda_${MODE}_${BACKEND} &> ${TEST_OUT_DIR}/eWiseLambda_${MODE}_${BACKEND}_${P}_${T}.log
-				head -1 ${TEST_OUT_DIR}/eWiseLambda_${MODE}_${BACKEND}_${P}_${T}.log
-				grep 'Test OK' ${TEST_OUT_DIR}/eWiseLambda_${MODE}_${BACKEND}_${P}_${T}.log || echo "Test FAILED"
-				echo " "
-
 				echo ">>>      [x]           [ ]       Testing grb::outer on a small matrix"
 				$runner ${TEST_BIN_DIR}/outer_${MODE}_${BACKEND} &> ${TEST_OUT_DIR}/outer_${MODE}_${BACKEND}_${P}_${T}.log
 				head -1 ${TEST_OUT_DIR}/outer_${MODE}_${BACKEND}_${P}_${T}.log