diff --git a/include/graphblas/algorithms/simulated_annealing_re.hpp b/include/graphblas/algorithms/simulated_annealing_re.hpp new file mode 100644 index 000000000..21f9d90fb --- /dev/null +++ b/include/graphblas/algorithms/simulated_annealing_re.hpp @@ -0,0 +1,810 @@ +/* + * Copyright 2025 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file + * + * Provides a Simulated Annealing-Replica Exchange QUBO optimizator. + * + * @author Giovanni Gaio + * @date TODO 2025 + */ + +#ifndef _H_GRB_ALGORITHMS_SA_RE +#define _H_GRB_ALGORITHMS_SA_RE + +#include +#include +#include +#include +#include +#include +#include + +#ifndef NDEBUG +#include +#endif + +#include + +#define ISCLOSE(a,b) (std::abs((b)-(a))/std::abs(a) < 1e-4) || (std::abs((b)-(a)) < 1e-4) + +namespace grb { + namespace algorithms { + + /* + * Do a Parallel Tempering pass. + * This means exchanging states at low temperature with states at higher temperature. + * + * TODO: Complete this documentation. + * + * @param[in,out] states On input: initial states. + * @param[in,out] energies The initial energy of each state. + * @param[in,out] betas Inverse temperature of each state. + * The betas may be permuted. + * + * @tparam StateType The state variable type. + * @tparam EnergyType The energy type. + * @tparam TempType The inverse temperature type. + * + * This implementation of parallel tempering does not use any spmd characteristics. + */ + template< + Backend backend, + typename StateType, + typename EnergyType, + typename TempType + > + typename std::enable_if< + (grb::_GRB_BACKEND != grb::BSP1D) || (backend == grb::BSP1D), + grb::RC >::type + pt( + std::vector< grb::Vector< StateType, backend > > &states, + grb::Vector< EnergyType, backend > &energies, + const grb::Vector< TempType, backend > &betas, + const int seed = 42 + ){ + + const size_t n_replicas = states.size(); + // const size_t s = spmd<>::pid(); + // const size_t nprocs = spmd<>::nprocs(); + grb::RC rc = grb::SUCCESS; + std::minstd_rand rng ( seed ); + std::exponential_distribution< EnergyType > rand ( 1.0 ); + + for( size_t i = n_replicas - 1 ; i > 0 ; --i ){ + const EnergyType de = ( energies[ i ] - energies[ i-1 ]) * (betas[ i ] - betas[ i-1 ]); + + if( -rand( rng ) < de ){ + std::swap( states[i], states[i-1] ); + std::swap( energies[i], energies[i-1] ); + } + } + + return rc; + } + + /* + * Implementation of parallel tempering using spmd. + */ + template< + Backend backend, + typename StateType, + typename EnergyType, + typename TempType + > + typename std::enable_if< + (grb::_GRB_BACKEND == grb::BSP1D) && (backend != grb::BSP1D), + grb::RC >::type + pt( + std::vector< grb::Vector< StateType, backend > > &states, + grb::Vector< EnergyType, backend > &energies, + const grb::Vector< TempType, backend > &betas, + const int seed = 42 + ){ + static_assert( backend != grb::BSP1D ); + // static_assert( grb::_GRB_BACKEND == grb::BSP1D ); + + const size_t n = grb::size( states[0] ); + const size_t n_replicas = states.size(); + const size_t s = spmd<>::pid(); + const size_t nprocs = spmd<>::nprocs(); + grb::RC rc = grb::SUCCESS; + +#ifndef NDEBUG + assert( grb::size(energies) == n_replicas ); + assert( grb::size(betas) == n_replicas ); +#endif + struct data { + EnergyType e; + TempType b; + EnergyType r; + }; + // TODO: should these two be static? Probably. + grb::Vector< StateType, backend > s0 ( n ); + grb::Vector< StateType, backend > s1 ( n ); + grb::set( s0, static_cast< StateType >( 0 ) ); + grb::set( s1, static_cast< StateType >( 0 ) ); + + struct data msg[ 2 ]; + rc = rc ? rc : grb::resize( s0, n ); + rc = rc ? rc : grb::resize( s1, n ); + if( rc != grb::SUCCESS ) return rc; + + std::minstd_rand rng; + std::exponential_distribution< EnergyType > rand ( 1.0 ); + + rng.seed( seed + s ); + const EnergyType myrand = -rand( rng ); + + for( size_t si = nprocs ; rc == grb::SUCCESS && si > 0; --si ){ + if( si-1 == s ){ + for( size_t i = n_replicas - 1 ; i > 0 ; --i ){ + const EnergyType de = ( energies[ i ] - energies[ i-1 ]) * (betas[ i ] - betas[ i-1 ]); + + if( -rand( rng ) < de ){ + std::swap( states[i], states[i-1] ); + std::swap( energies[i], energies[i-1] ); + } + } + grb::set( s1, states[0] ); + msg[ 1 ].e = energies[ 0 ]; + msg[ 1 ].b = betas[0]; + msg[ 1 ].r = myrand; + }else if( si-2 == s ){ + grb::set( s0, states[ n_replicas - 1 ] ); + msg[ 0 ].e = energies[ n_replicas - 1 ]; + msg[ 0 ].b = betas[ n_replicas - 1 ]; + msg[ 0 ].r = myrand; + } + if( si == 1 ) continue; + +#ifdef _GRB_WITH_LPF + rc = rc ? rc : grb::collectives<>::broadcast( msg[ 0 ].e, si-2 ); + rc = rc ? rc : grb::collectives<>::broadcast( msg[ 0 ].b, si-2 ); + rc = rc ? rc : grb::collectives<>::broadcast( msg[ 0 ].r, si-2 ); + rc = rc ? rc : grb::collectives<>::broadcast( msg[ 1 ].e, si-1 ); + rc = rc ? rc : grb::collectives<>::broadcast( msg[ 1 ].b, si-1 ); + rc = rc ? rc : grb::collectives<>::broadcast( msg[ 1 ].r, si-1 ); +#else + assert( false ); // this should never run +#endif + +#ifndef NDEBUG + + if( rc != grb::SUCCESS ){ + std::cerr << "\n\t Error in a collective broadcast " << rc << " : " << + grb::toString( rc ) << std::endl; + } + assert( rc == grb::SUCCESS ); +#endif + + const EnergyType de = ( msg[ 1 ].e - msg[ 0 ].e ) * ( msg[ 1 ].b - msg[ 0 ].b ); + + if( rc == grb::SUCCESS && ( msg[ 1 ].r < de ) ){ +#ifdef _GRB_WITH_LPF + rc = rc ? rc : grb::internal::broadcast( s1, si-1 ); + rc = rc ? rc : grb::internal::broadcast( s0, si-2 ); + assert( grb::nnz(s0) == n ); // state has to be dense! + assert( grb::nnz(s1) == n ); // state has to be dense! +#else + assert( false ); // this should never run +#endif + if( si-1 == s ){ + rc = rc ? rc : grb::set( states[ 0 ], s0 ); + rc = rc ? rc : grb::setElement( energies, msg[ 0 ].e, 0 ); + }else if( si-2 == s ){ + rc = rc ? rc : grb::set( states[ n_replicas - 1 ], s1 ); + rc = rc ? rc : grb::setElement( energies, msg[ 1 ].e, n_replicas - 1 ); + } + } + } + return rc; + } + + + /* + * Estimate a solution to a given optimization problem. The solution is found + * using Simulated Annealing-Replica Exchange (also known as Parallel Tempering). + * + * The state will be optimized to minimize the value of an energy function $U(x)$, + * where $x$ is the state vector. Energies will be changed when changing the + * states, so that each energy is the actual energy of the relative state. + * + * The parameter sweep is a user-defined function that changes a given state + * (possibly randomly) and returns the variation of energy made from its + * changes of the state. It should take three parameters: a state vector, the + * inverse temperature (a scalar) and sweep_data. + * + * @param[in] sweep The sweeping function. + * Should return the energy variation relative to the changes that it + * made on the state. + * @param[in] sweep_data Additional data to be passed to the sweep function. + * @param[in,out] states On input: initial states. + * On output: optimized states. + * @param[in,out] energies The initial energy of each state. + * @param[in,out] betas Inverse temperature of each state. + * @param[in,out] best_state The state with the minimum energy found by the algorithm. + * @param[in,out] best_energy The minimum value of an energy found. + * @param[in] n_sweeps Number of Simulated Annealing iterations. + * @param[in] use_pt Whether to use Parallel Tampering or not. + * + * @tparam backend The backend used for the single objects + * @tparam StateType The state variable type. + * @tparam EnergyType The energy type. + * @tparam TempType The inverse temperature type. + * @tparam SweepDataType Type of data to be passed on to the sweep function + * (e.g. a tuple of references to temporary vectors). + * @tparam SweepFuncType The type of the function. + * The default value suggests the signature that the function should have. + * + */ + template< + Backend backend, + typename StateType, // type of state, possibly 0/1 + typename EnergyType, + typename TempType, + typename SweepDataType, // type of data to be passed through to the sweep function + typename SweepFuncType = std::function< + EnergyType( + grb::Vector< StateType, backend >&, + const TempType&, + SweepDataType& + ) + > + > + grb::RC simulated_annealing_RE( + const SweepFuncType &sweep, + SweepDataType& sweep_data, + std::vector< grb::Vector< StateType, backend > > &states, + grb::Vector< EnergyType, backend > &energies, + grb::Vector< TempType, backend > &betas, + grb::Vector< StateType, backend > &best_state, + EnergyType &best_energy, + const size_t &n_sweeps, + const EnergyType &goal = 0, + const bool &use_pt = false, + const size_t &seed = 42 + ){ + + const size_t s = spmd<>::pid(); + const size_t n_procs = spmd<>::nprocs(); + const size_t n_replicas = states.size(); + const size_t n = grb::size(states[0]); + (void) n; + (void) s; + + grb::RC rc = grb::SUCCESS; + +#ifndef NDEBUG + assert( n_replicas > 0 ); + assert( n_replicas == grb::size( betas ) ); + + for(size_t i = 0; i < n_replicas ; ++i ){ + assert( n == grb::size( states[ i ] ) ); + } + if( s == 0 ) { + std::cerr << "DEBUG: Called simulated_annealing_RE with parameters: " + << "\n\t n = " << n + << "\n\t n_replicas = " << n_replicas + << "\n\t n_sweeps = " << n_sweeps + << "\n\t goal = " << goal + << "\n\t use_pt = " << use_pt + << "\n\t seed = " << seed + << std::endl; + } + assert( grb::size(best_state) == n ); +#endif + + best_energy = std::numeric_limits< EnergyType >::max(); + + for( size_t i_sweep = 0 ; rc == grb::SUCCESS && i_sweep < n_sweeps ; ++i_sweep ){ + for( size_t j = 0 ; j < n_replicas ; ++j ){ + + energies[j] += sweep( states[j], betas[j], sweep_data ); + grb::wait(); + + // update_best state and energy + if( energies[j] < best_energy ){ + best_energy = energies[j]; + best_state = states[j]; + } + } // n_replicas + + // TODO: find a better way than this, to avoid a sync at each iteration + rc = rc ? rc : grb::collectives<>::allreduce( + best_energy, grb::operators::min< EnergyType >() ); + + if( rc == SUCCESS && use_pt ){ + // do a Parallel Tempering move + rc = pt( states, energies, betas, seed + i_sweep ); + } + +#ifndef NDEBUG + if( s == 0 ) { + std::cerr << "Energy at iteration " << i_sweep << " = " << best_energy << std::endl; + } +#endif + if( goal < -1 && best_energy <= goal ) i_sweep = n_sweeps; + } // n_sweeps + +#ifndef NDEBUG + if( rc != grb::SUCCESS ){ + std::cerr << "ERROR at line " << __LINE__ << " in " + << __FILE__ << ": " << grb::toString( rc ) << "\n"; + } +#endif + if( rc == SUCCESS ){ + rc = rc ? rc : grb::collectives<>::allreduce( + best_energy, grb::operators::min< EnergyType >() ); + // TODO: update best state to match best energy + } + + return rc; + } + + /* + * Create a set of independent masks. + * + * Uses a graph coloring algorithm. + * Adapted from Alg. 2 of `Graph Coloring on the GPU, M. Osama, M. Truong, C. Yang, A. Buluc, J.D. Owens`. + * + * @param[out] masks The vector of constructed masks. + * @param[in] A The square (symmetric) A matrix. It should not contain any explicit zeros! + * @param[in] frontier Temporary (dense) vector used in the function + * @param[in] w Temporary (dense) vector used in the function + * @param[in] seed seed for randomization + * + * @tparam descr grb::Descriptor for matrix operations. Should probably not be used + * @tparam backend The backend of GraphBLAS to be used + * @tparam MaskType The state variable type. + * @tparam AType The matrix values' type. + * + */ + template< + grb::Descriptor descr = grb::descriptors::no_operation, + Backend backend, + typename MaskType, + typename AType, + typename RSI, typename CSI, typename NZI + > + grb::RC matrix_partition( + std::vector< grb::Vector< MaskType, backend > > &masks, + const grb::Matrix< AType, backend, RSI, CSI, NZI > &A, + grb::Vector< AType, backend > &frontier, + grb::Vector< AType, backend > &w, + const int seed = 42 + ) { + masks.clear(); + grb::RC rc = grb::SUCCESS; + const size_t n = grb::nrows( A ); + const size_t s = spmd<>::pid(); + assert( n == grb::ncols( A ) ); // A needs to be square + // assert( grb::is_symmetric( A ) ); + (void) s; + + grb::resize( frontier, n ); + grb::resize( w, n ); + + std::minstd_rand rng ( seed ); + + // random shuffle w + for( size_t i = 0 ; i < n ; ++i ){ + rc = rc ? rc : grb::setElement( w, i+1, i ); + } + for( size_t i = 0 ; i < n ; ++i ){ + std::uniform_int_distribution< size_t > rand ( i, n-1 ); + const auto j = rand( rng ); + const auto a = w[i]; + const auto b = w[j]; + rc = rc ? rc : grb::setElement( w, b, i ); + rc = rc ? rc : grb::setElement( w, a, j ); + } + + const grb::Semiring< + grb::operators::max< AType >, grb::operators::right_assign< AType >, + grb::identities::negative_infinity, grb::identities::zero + > maxTimesRing; + const grb::Monoid< grb::operators::add< AType >, grb::identities::zero > addMonoid; + const grb::operators::greater_than< AType > gtOp; + const grb::Monoid< grb::operators::right_assign< AType >, grb::identities::zero > right_assign; + + for( size_t i = 0; rc == grb::SUCCESS && i < n ; ++i ) { + // find max of neighbors + rc = rc ? rc : grb::set< descr >( frontier, static_cast< AType >( 0 ) ); + rc = rc ? rc : grb::mxv< descr | grb::descriptors::dense >( frontier, A, w, maxTimesRing ); + rc = rc ? rc : grb::foldl< descr | grb::descriptors::dense >( frontier, w, gtOp ); + + // is there any new node? + AType succ = static_cast< AType >( 0 ); + rc = rc ? rc : grb::foldl< descr >( succ, frontier, addMonoid ); + if( succ <= 0 ){ + break; + } + + // add new mask + masks.emplace_back( grb::Vector< bool, backend >( n ) ); + auto &new_mask = masks.at(i); + rc = rc ? rc : grb::resize( new_mask, n ); + rc = rc ? rc : grb::set< descr >( new_mask, frontier, static_cast< MaskType >(true) ); + + // do not consider the weights of used nodes + rc = rc ? rc : grb::foldl< descr >( w, new_mask, + static_cast< AType >( 0 ), right_assign ); + } + assert( rc == grb::SUCCESS ); + +#ifndef NDEBUG + if( rc != grb::SUCCESS) { + std::cerr << "Error in matrix_partition: " << rc << " " << grb::toString(rc) << std::endl; + + } + size_t cnt = 0; + if( s == 0 ) { + std::cerr << "Final masks: \n"; + } + for(const auto&mask : masks ){ + for( const auto &x : mask ){ + if( x.second ){ + if( s == 0 ) { + std::cerr << x.first << ", "; + } + cnt++; + } + } + if( s == 0 ) { + std::cerr << std::endl; + } + } + if( s == 0 ){ + assert( cnt == n ); + } +#endif + return rc; + } + + /* + * Estimate a solution to a given Ising problem. The solution is found + * using the Simulated Annealing-Replica Exchange function above. + * + * The function minimized is $U(x) = x^T(\frac{1}{2}Jx + h)$, where $J$ is the supplied + * couplings matrix and $h$ is the local_fields vector. The solution is searched + * in the space of vectors $x$ with entries $0$ or $1$. + * + * states should be a vector of already initialized and filled dense grb::Vector. + * + * Warning: This function allocates $O(n)$ memory for temporary vectors. + * + * @param[in,out] states On input: initial (dense) states. + * On output: optimized (dense) states. + * @param[in] couplings The square (symmetric) couplings matrix. + * The diagonal has to be zero! + * @param[in] local_fields The vector of local fields. + * @param[in,out] energies The initial energy of each state. + * @param[in,out] betas Inverse temperature of each state. + * @param[in] n_sweeps Number of Simulated Annealing iterations. + * @param[in] use_pt Whether to use Parallel Tampering or not. + * @param[in] seed Seed to use for internal randomization (must be the same for all processees); + * + * @tparam StateType The state variable type. + * @tparam QType The matrix values' type. + * @tparam EnergyType The energy type. + * @tparam TempType The inverse temperature type. + * @tparam SweepDataType Type of data to be passed on to the sweep function + * (e.g. a tuple of references to temporary vectors). + * + */ + template< + Backend backend, + grb::Descriptor descr = grb::descriptors::no_operation, + bool empty_local_fields = false, + typename StateType, // type of state, possibly 0/1 + typename QType, // type of coupling matrix values + typename EnergyType, + typename TempType, + typename RSI, typename CSI, typename NZI, + class Ring = Semiring< + grb::operators::add< QType >, grb::operators::mul< QType >, + grb::identities::zero, grb::identities::one + > + > + grb::RC simulated_annealing_RE_Ising( + const grb::Matrix< QType, backend, RSI, CSI, NZI > &couplings, + const grb::Vector< QType, backend> &local_fields, + std::vector< grb::Vector< StateType, backend > > &states, + grb::Vector< EnergyType, backend > &energies, + grb::Vector< TempType, backend > &betas, + grb::Vector< StateType, backend > &best_state, + EnergyType &best_energy, + const size_t &n_sweeps, + const EnergyType &goal = 0, + const bool &use_pt = false, + const int seed = 42, + const Ring &ring = Ring() + ){ + const size_t n = grb::size( states[0] ); + const size_t n_replicas = grb::size(betas); + const size_t s = spmd<>::pid(); + (void) s; + grb::RC rc = grb::SUCCESS; + + assert( grb::nnz(states[0]) == n ); // state is dense + assert( states.size() == n_replicas ); + // assert( grb::is_symmetric( couplings ) ); + + assert( empty_local_fields || ( grb::size( local_fields ) == n ) ); + assert( empty_local_fields || ( grb::nnz(local_fields) == n ) ); + EnergyType energy; + grb::Vector< EnergyType, backend > tmp_calc_energy ( n ); + + const auto get_energy = [&couplings, &local_fields, &tmp_calc_energy, &ring, &n]( + EnergyType &energy, const grb::Vector< StateType, backend > &state + ){ + assert( n == grb::size( state ) ); + assert( n == grb::ncols( couplings ) ); + assert( n == grb::nrows( couplings ) ); + grb::RC rc = grb::SUCCESS; + constexpr auto dense_descr = descr | grb::descriptors::dense; + + grb::set( tmp_calc_energy, static_cast( 0.0 ) ); + rc = rc ? rc : grb::mxv< dense_descr >( tmp_calc_energy, couplings, state, ring ); + rc = rc ? rc : grb::foldl< dense_descr >( tmp_calc_energy, static_cast< EnergyType >( 0.5 ), + ring.getMultiplicativeMonoid() ); + if( !empty_local_fields) { + rc = rc ? rc : grb::foldl< dense_descr >( tmp_calc_energy, local_fields, ring.getAdditiveMonoid() ); + } + rc = rc ? rc : grb::dot< dense_descr >( energy, tmp_calc_energy, state, ring ); + return rc; + }; + + // it is reasonable to allow the energies to be allocated and evaluated by this function + if( grb::nnz(energies) == 0 ){ + grb::resize( energies, n_replicas ); + + for(size_t i = 0 ; i < n_replicas ; ++i){ + energy = static_cast< EnergyType >( 0.0 ); + rc = rc ? rc : get_energy( energy, states[i] ); + grb::setElement( energies, energy, i ); + } + } + + grb::Vector< QType, backend > h ( n ); + grb::Vector< QType, backend > rand ( n ); + grb::Vector< QType, backend > delta ( n ); + grb::Vector< QType, backend > dn ( n ); + grb::Vector< bool, backend > accept ( n ); + std::minstd_rand rng ( seed ); // minstd_rand or std::mt19937 + + rc = rc ? rc : grb::resize( h, n ); + rc = rc ? rc : grb::resize( rand, n ); + rc = rc ? rc : grb::resize( delta, n ); + rc = rc ? rc : grb::resize( dn, n ); + rc = rc ? rc : grb::resize( accept, n ); + + std::vector< grb::Vector< bool, backend > > masks ; + rc = rc ? rc : matrix_partition< descr >( masks, couplings, h, rand, seed ); + rc = rc ? rc : grb::clear(h); + constexpr auto dense_descr = descr | grb::descriptors::dense; + + auto sweep_data = std::tie( + (const decltype(couplings)&) couplings, + (const decltype(local_fields)&) local_fields, + (const decltype(masks)&) masks, + h, + rand, + delta, + dn, + accept, + rng, + (const decltype(ring)&) ring + ); + +#ifdef NDEBUG + const auto ising_sweep = []( +#else + const auto ising_sweep = [&get_energy]( +#endif + grb::Vector< StateType, backend > &state, + const TempType &beta, + decltype(sweep_data) &data + ){ + const size_t s = spmd<>::pid(); + (void) s; + + const auto &couplings = std::get<0>(data); + const auto &local_fields = std::get<1>(data); + const auto &masks = std::get<2>(data); + auto &h = std::get<3>(data); + auto &rand = std::get<4>(data); + auto &delta = std::get<5>(data); + auto &dn = std::get<6>(data); + auto &accept = std::get<7>(data); + auto &rng = std::get<8>(data); + const auto &ring = std::get<9>(data); + + const size_t n = grb::size( state ); + EnergyType delta_energy = static_cast< EnergyType >(0.0); + grb::RC rc = grb::SUCCESS; + + assert( grb::nnz(state) == n ); // state has to be dense! + + if( !empty_local_fields) { + rc = rc ? rc : grb::set< descr >( h, local_fields ); + }else { + rc = rc ? rc : grb::set< descr >( h, static_cast< QType >( 0.0 ) ); + } + rc = rc ? rc : grb::mxv< dense_descr >( h, couplings, state , ring ); + + std::exponential_distribution< EnergyType > rand_gen ( beta ); + for( size_t i = 0 ; i < n; ++i ){ + const auto rnd = -rand_gen( rng ); + rc = rc ? rc : grb::setElement( rand, rnd, i ); + } + + const grb::operators::leq< EnergyType > leq_operator; + const grb::operators::right_assign< EnergyType > right_assign_op; + const grb::operators::not_equal< EnergyType > neq_operator; +#ifndef NDEBUG + const grb::Vector< StateType, backend > old_state = state; +#endif + rc = rc ? rc : grb::wait(); + for(const auto &mask : masks ){ + // dn = (2*state_slice - 1) * h_slice + rc = rc ? rc : grb::set< descr >( dn, mask, state ); + rc = rc ? rc : grb::foldl< descr | grb::descriptors::invert_mask >( dn, state, static_cast< QType >( -1 ), right_assign_op ); + rc = rc ? rc : grb::foldl< descr >( dn, h, ring.getMultiplicativeMonoid() ); + assert( grb::nnz( dn ) == grb::nnz( mask ) ); +#ifndef NDEBUG + for( const auto x : dn ){ + assert( mask[x.first] == 1 ); + assert( (2*int(state[x.first])-1)*h[x.first] == x.second ); + } + const auto dn0 = dn; +#endif + + // Choose which changes to accept + // ( dn >= 0 ) | ( rand/beta < dn ) + rc = rc ? rc : grb::foldl< descr >( dn, rand, leq_operator ); + rc = rc ? rc : grb::set< descr >( accept, dn, mask ); + assert( grb::nnz( accept ) <= grb::nnz( mask ) ); +#ifndef NDEBUG + size_t cnt = 0; + for( const auto x : dn0 ){ + const size_t i = x.first; + assert( mask[x.first] == 1 ); + assert( x.second ); + if( x.second >= rand[i] ){ + assert( dn[i] == 1 ); + assert( accept[i] == 1 ); + cnt++; + }else{ + assert( dn[i] == 0 ); + } + } + assert( grb::nnz( accept ) == cnt ); +#endif + + // new_state = np.where(accept, 1 - old, old) + rc = rc ? rc : grb::foldl< descr >( state, accept, static_cast< StateType >( 1 ), neq_operator ); + + // delta = new - old ==> delta[accept] = 2*new_state[accept]-1 + rc = rc ? rc : grb::set< descr >( delta, accept, state ); + rc = rc ? rc : grb::foldl< descr | grb::descriptors::invert_mask >( delta, delta, static_cast< QType >( -1 ), right_assign_op ); + + // Update delta_energy -= dot(dn, accept) + rc = rc ? rc : grb::dot< descr >( delta_energy, delta, h, ring ); + + // update h + rc = rc ? rc : grb::mxv< descr >( h, couplings, delta, ring ); + } + rc = rc ? rc : grb::wait(); + +#ifndef NDEBUG + if( rc != grb::SUCCESS ){ + std::cerr << "\n\t Error in some GraphBLAS function of ising_sweep " << rc << " : " << grb::toString( rc ) << std::endl; + abort(); + } + assert( rc == grb::SUCCESS ); + const auto new_state = state; + rc = rc ? rc : grb::wait(); + + EnergyType e1 = static_cast< EnergyType >( 0.0 ), + e2 = static_cast< EnergyType >( 0.0 ); + get_energy(e1, old_state); + get_energy(e2, new_state); + const auto real_delta = e2 - e1; + if( s == 0 ){ + std::cerr << "\n\t Delta_energy: " << delta_energy; + std::cerr << "\n\t Real delta: " << real_delta; + std::cerr << "\n\t Discrepancy: " << real_delta - delta_energy; + std::cerr << std::endl; + } + assert( ISCLOSE(real_delta, delta_energy ) ); +#endif + + return delta_energy; + }; + + return simulated_annealing_RE( + ising_sweep, sweep_data, states, energies, betas, best_state, best_energy, n_sweeps, goal, use_pt, seed + ); + } + + /* + * Estimate a solution to a given QUBO problem. The solution is found + * using the Simulated Annealing-Replica Exchange function above. + * + * The function optimized is $U(x) = \frac{1}{2}x^TQx$, with the constraint that $x$ is a + * 0/1 vector. + * + * TODO: expand and complete documentation + * + * Warning: This function allocates O(n*n_replicas) memory for temporary vectors. + * + * @param[in,out] states On input: initial (dense) states. + * On output: optimized (dense) states. + * @param[in] Q The square (symmetric) Q matrix. + * @param[in,out] energies The initial energy of each state. + * @param[in,out] betas Inverse temperature of each state. + * @param[in] n_replicas Number of replicas to run concurrently. + * @param[in] n_sweeps Number of Simulated Annealing iterations. + * @param[in] use_pt Whether to use Parallel Tampering or not. + * + * @tparam StateType The state variable type. + * @tparam QType The matrix values' type. + * @tparam EnergyType The energy type. + * @tparam TempType The inverse temperature type. + * + */ + template< + Backend backend, + grb::Descriptor descr = grb::descriptors::no_operation, + typename StateType, // type of state, possibly 0/1 + typename QType, // type of coupling matrix values + typename EnergyType, + typename TempType, + typename RSI, typename CSI, typename NZI, + class Ring = Semiring< + grb::operators::add< QType >, grb::operators::mul< QType >, + grb::identities::zero, grb::identities::one + > + > + grb::RC simulated_annealing_RE_QUBO( + const grb::Matrix< QType, backend, RSI, CSI, NZI > &Q, + std::vector< grb::Vector< StateType, backend > > &states, + grb::Vector< EnergyType, backend > &energies, + grb::Vector< TempType, backend > &betas, + grb::Vector< StateType, backend > &best_state, + EnergyType &best_energy, + const size_t &n_sweeps, + const EnergyType &goal = 0, + const bool &use_pt = false, + const int seed = 42, + const Ring &ring = Ring() + ){ + grb::Vector< QType > empty_local_fields ( 0 ); + + return simulated_annealing_RE_Ising< backend, descr, true >( + Q, empty_local_fields, states, energies, betas, best_state, best_energy, n_sweeps, goal, use_pt, seed, ring + ); + } + } // namespace algorithms +} // end namespace grb +#undef ISCLOSE + +#endif // end _H_GRB_ALGORITHMS_SA-RE + + diff --git a/tests/smoke/CMakeLists.txt b/tests/smoke/CMakeLists.txt index 7e7f2af4a..cfb1d8506 100644 --- a/tests/smoke/CMakeLists.txt +++ b/tests/smoke/CMakeLists.txt @@ -144,6 +144,21 @@ add_grb_executables( conjugate_gradient_complex conjugate_gradient.cpp COMPILE_DEFINITIONS _CG_COMPLEX ) +add_grb_executables( simulated_annealing_re_from_mpi simulated_annealing_re_from_mpi.cpp + BACKENDS bsp1d + ADDITIONAL_LINK_LIBRARIES test_utils_headers +) + +add_grb_executables( simulated_annealing_re_ising simulated_annealing_re_ising.cpp + BACKENDS reference reference_omp hyperdags nonblocking + ADDITIONAL_LINK_LIBRARIES test_utils_headers +) + +add_grb_executables( simulated_annealing_re_planted_sol simulated_annealing_re_planted_sol.cpp + BACKENDS reference reference_omp hyperdags nonblocking bsp1d + ADDITIONAL_LINK_LIBRARIES test_utils_headers +) + add_grb_executables( gmres gmres.cpp BACKENDS reference reference_omp bsp1d hybrid hyperdags nonblocking ADDITIONAL_LINK_LIBRARIES test_utils_headers diff --git a/tests/smoke/simulated_annealing_re_from_mpi.cpp b/tests/smoke/simulated_annealing_re_from_mpi.cpp new file mode 100644 index 000000000..1c84c5078 --- /dev/null +++ b/tests/smoke/simulated_annealing_re_from_mpi.cpp @@ -0,0 +1,756 @@ +/* + Minimal scaffold adapted from ising_machine_sb.cpp to drive a replica-exchange + simulated-annealing (RE-SA) solver. Algorithmic parts are intentionally left + unimplemented (stubs). This file mirrors the existing IO / launcher / + program structure and replaces numpy arrays with grb::Vector and lists of + numpy vectors with std::vector< grb::Vector<...> >. Sparse matrices are + represented as grb::Matrix< JType >. + + Purpose: allow running internal tests or an external-run mode while the RE-SA + algorithm is implemented separately. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +const int LPF_MPI_AUTO_INITIALIZE = 0; + +using namespace grb; + +// #define DEBUG_SARE 1 +constexpr size_t MAX_FN_SIZE = 255; + +// Types +using IOType = int8_t; // scalar/vector element type +using JType = float; // coupling (matrix) value type +using EnergyType = double; // energy value type + +template< typename T1, typename T2 > +inline bool ISCLOSE( const T1 &a, const T2 &b, const double tol = 1e-4){ + return (std::abs((b)-(a))/std::abs(a) < tol) || (std::abs((b)-(a)) < tol); +} + +// Backend to be used inside each process +constexpr grb::Backend internal_backend = grb::reference; + +/** Parser type */ +typedef grb::utils::MatrixFileReader< + JType, + std::conditional< + (sizeof(grb::config::RowIndexType) > sizeof(grb::config::ColIndexType)), + grb::config::RowIndexType, + grb::config::ColIndexType + >::type +> Parser; + +/** Nonzero type */ +typedef internal::NonzeroStorage< + grb::config::RowIndexType, + grb::config::ColIndexType, + JType +> NonzeroT; + +/** In-memory storage type using tuple */ +typedef grb::utils::Singleton< + std::tuple< + size_t, // n (rows/columns) + size_t, // nz (nonzeros) + size_t, // nsweeps + size_t, // n_replicas + bool, // use_pt + unsigned, // seed + std::vector, // matrix data + std::vector // h vector + > +> Storage; + +namespace test_data { + constexpr size_t n = 16; + constexpr size_t nsweeps = 2; + constexpr size_t n_replicas = 3; + constexpr bool use_pt = true; + constexpr unsigned seed = 8; + + const std::vector< std::pair< std::pair< grb::config::RowIndexType, grb::config::ColIndexType >, JType > > j_matrix_data = { + {{0, 1}, -0.2752300610319546}, + {{1, 0}, -0.2752300610319546}, + {{1, 2}, -0.10636508505639508}, + {{2, 1}, -0.10636508505639508}, + {{2, 3}, 0.3961450048806352}, + {{3, 2}, 0.3961450048806352}, + {{3, 4}, -0.15453838800213293}, + {{3, 5}, 0.4847494372852713}, + {{4, 3}, -0.15453838800213293}, + {{4, 5}, -0.4712679510367046}, + {{5, 3}, 0.4847494372852713}, + {{5, 4}, -0.4712679510367046}, + {{5, 6}, -0.1483152637298799}, + {{6, 5}, -0.1483152637298799}, + {{7, 8}, -0.11904111079614699}, + {{8, 7}, -0.11904111079614699}, + {{9, 10}, -0.18031020353297234}, + {{10, 9}, -0.18031020353297234}, + {{10, 11}, -0.22985425840853468}, + {{11, 10}, -0.22985425840853468}, + {{11, 12}, 0.30105588632639446}, + {{11, 13}, 0.13823880612312134}, + {{12, 11}, 0.30105588632639446}, + {{13, 11}, 0.13823880612312134}, + {{13, 14}, 0.10364447636911123}, + {{14, 13}, 0.10364447636911123}, + {{14, 15}, 0.2955745584289766}, + {{15, 14}, 0.2955745584289766}, + }; + + + const size_t nnz = j_matrix_data.size(); + + const std::vector< JType > h_array_data = { + -0.08910436, 0.58034508, 0.97719304, 0.16792909, + -0.9221754 , -0.10715418, -0.62365497, 0.25411129, + -0.5693644 , -0.69805978, 0.07228861, -0.79922641, + 0.46231686 , 0.87930208 , 0.88663637, -0.25052299, + }; + +} +// --- New, minimal runner configuration and result types --- +struct input { + bool use_default_data = false; + size_t n_replicas = test_data::n_replicas; + size_t nsweeps = test_data::nsweeps; + bool use_pt = test_data::use_pt; + unsigned seed = test_data::seed; + EnergyType reference_energy = 0.0; + bool verify = false; + char filename_Jmatrix [ MAX_FN_SIZE + 1 ]; + char filename_h [ MAX_FN_SIZE + 1 ]; + char filename_ref_solution [ MAX_FN_SIZE + 1 ]; + bool direct; + size_t rep = 0; + size_t outer = 1; +}; + +struct output { + int error_code = 0; + // TODO: remove itrations if not applicable + size_t iterations = 10; // total number of iterations performed does not make sense since the code does not have convergence criteria + EnergyType best_energy = std::numeric_limits< EnergyType >::max(); + size_t rep; + grb::utils::TimerResults times; + std::unique_ptr< PinnedVector< JType, internal_backend > > pinnedSolutionVector; + std::unique_ptr< PinnedVector< JType, internal_backend > > pinnedRefSolutionVector; + // other things like eg: best replicas ... +}; + +template< typename Dtype > +void read_matrix_data(const std::string &filename, std::vector &data, bool direct) { + // Implementation for reading matrix data from file + try { + Parser parser( filename, direct ); + assert( parser.m() == parser.n() ); + std::get<0>(Storage::getData()) = parser.n(); + try { + std::get<1>(Storage::getData()) = parser.nz(); + } catch( ... ) { + std::get<1>(Storage::getData()) = parser.entries(); + } + /* Once internal issue #342 is resolved this can be re-enabled + for( + auto it = parser.begin( PARALLEL ); + it != parser.end( PARALLEL ); + ++it + ) { + data.push_back( *it ); + }*/ + for( + auto it = parser.begin( SEQUENTIAL ); + it != parser.end( SEQUENTIAL ); + ++it + ) { + data.push_back( Dtype( *it ) ); +#ifdef DEBUG_SARE + if( spmd<>::pid() == 0 ){ + // print last data element from std::vector data + std::cout << "readmatrix_data: " << data.back().first.first << ", " + << data.back().first.second << ", " << data.back().second << "\n"; + } +#endif + } + } catch( std::exception &e ) { + std::cerr << "I/O program failed: " << e.what() << "\n"; + return; + } +} + +template< typename NonzeroT, typename IType, typename VType > +void read_matrix_data_from_array( + const std::vector, VType > > &array, + std::vector &data +) { + // Implementation for reading matrix data from array + try { + for (const auto &entry : array) { + data.emplace_back( + NonzeroT( entry.first.first, entry.first.second, entry.second ) + ); +#ifdef DEBUG_SARE + if( spmd<>::pid() < 2 ){ + // print last data element from std::vector data + std::cout << "read_matrix_data_from_array: " << data.back().first.first << ", " + << data.back().first.second << ", " << data.back().second << "\n"; + } +#endif + } + std::get<0>(Storage::getData()) = test_data::n; + std::get<1>(Storage::getData()) = data.size(); + } catch (const std::exception &e) { + std::cerr << "Failed to read matrix data from array: " << e.what() << "\n"; + return; + } +} + +template< typename Dtype > +void read_vector_data(const std::string &filename, std::vector &data) { + // Implementation for reading vector data from file + try { + std::ifstream file( filename ); + if( !file.is_open() ) { + std::cerr << "Failed to open vector file: " << filename << "\n"; + return; + } + std::string line; + while( std::getline( file, line ) ) { + if( line.empty() ) continue; // skip empty lines + std::istringstream iss( line ); + Dtype v; + if( !(iss >> v) ) { + throw std::runtime_error( "Failed to parse line in vector file" ); + } + data.push_back( v ); + } + } catch( std::exception &e ) { + std::cerr << "I/O program failed: " << e.what() << "\n"; + return; + } +} + +template< typename Dtype > +void read_vector_data_from_array( + const std::vector &array, std::vector &data +) { + // Implementation for reading vector data from array + try { + for (size_t i = 0; i < array.size(); ++i) { + data.push_back(array[i]); + } + } catch (const std::exception &e) { + std::cerr << "Failed to read vector data from array: " << e.what() << "\n"; + return; + } +} + +template< + Backend backend, + grb::Descriptor descr = grb::descriptors::no_operation, + class Ring = Semiring< + grb::operators::add< JType >, grb::operators::mul< JType >, + grb::identities::zero, grb::identities::one + >, + typename Ttmp + > +EnergyType get_energy( + const grb::Matrix< JType, backend >& couplings, + const grb::Vector< JType, backend > &local_fields, + const grb::Vector< IOType,backend > &state, + grb::Vector< Ttmp, backend > &tmp, + const Ring &ring = Ring() + ){ + const size_t n = grb::size( local_fields ); + assert( n == grb::size( state ) ); + assert( n == grb::ncols( couplings ) ); + assert( n == grb::nrows( couplings ) ); + grb::RC rc = grb::SUCCESS; + rc = rc ? rc : grb::resize( tmp, n ); + EnergyType energy = 0.0; + constexpr auto dense_descr = descr | grb::descriptors::dense; + + rc = rc ? rc : grb::set< descr >( tmp, 0.0 ); + rc = rc ? rc : grb::mxv< dense_descr >( tmp, couplings, state, ring ); + rc = rc ? rc : grb::foldl< dense_descr >( tmp, static_cast< JType >( 0.5 ), ring.getMultiplicativeMonoid() ); + rc = rc ? rc : grb::foldl< dense_descr >( tmp, local_fields, ring.getAdditiveMonoid() ); + rc = rc ? rc : grb::dot< dense_descr >( energy, tmp, state, ring ); + assert( rc == grb::SUCCESS ); + + return energy; +} + +void ioProgram( const struct input &data_in, bool &success ) { + + using namespace test_data; + success = false; + + const size_t s = spmd<>::pid(); + (void) s; + assert( s < spmd<>::nprocs() ); + + try { + // Parse and store matrix in singleton class + // Map Storage tuple fields to meaningful names and wire up default data + auto &storage = Storage::getData(); + auto &n = std::get<0>(storage); // n (rows/cols) + auto &nnz = std::get<1>(storage); // nz (nonzeros) + auto &nsweeps_st = std::get<2>(storage); // nsweeps + auto &n_replicas_st = std::get<3>(storage); // n_replicas + auto &use_pt = std::get<4>(storage); // use_pt + auto &seed_st = std::get<5>(storage); // seed + auto &Jdata = std::get<6>(storage); // std::vector + auto &h = std::get<7>(storage); // std::vector + + // Initialize metadata from input (allow CLI to override defaults) + (void) n; + (void) nnz; + nsweeps_st = data_in.nsweeps; + n_replicas_st = data_in.n_replicas; + use_pt = data_in.use_pt; + seed_st = data_in.seed; + + + if ( data_in.use_default_data ) { + // if no file provided, use default data from file_content + read_matrix_data_from_array( test_data::j_matrix_data, Jdata ); + read_vector_data_from_array( test_data::h_array_data, h ); + // other data + } else { + // read from files if provided + read_matrix_data( data_in.filename_Jmatrix, Jdata, data_in.direct ); + if( std::strlen( data_in.filename_h ) > 0 ) { + read_vector_data( data_in.filename_h, h ); + }else{ + h.resize( n ); + std::fill( h.begin(), h.end(), static_cast< JType >( 0 ) ); + } + if( data_in.verify ) { + if( std::strlen(data_in.filename_ref_solution) == 0 ) { + std::cerr << "Reference solution file not provided for verification\n"; + return; + } + } + } + } catch( std::exception &e ) { + std::cerr << "I/O program failed: " << e.what() << "\n"; + return; + } + + success = true; +} + + +void grbProgram( + const struct input &data_in, + struct output &out +) { + std::cout<< "grbProgram: running simulated-annealing RE solver (stub)\n"; + + // get user process ID + const size_t s = spmd<>::pid(); + const size_t nprocs = spmd<>::nprocs(); + (void) nprocs; + + grb::utils::Timer timer; + timer.reset(); + + /* --- Problem setup --- */ + const size_t n = std::get<0>(Storage::getData()); + (void) n; + if( s == 0 ){ + std::cout << "problem size n = " << n << "\n"; + } + grb::Vector< JType, internal_backend > h( n ); + + // populate J with test (random) values + grb::RC rc = grb::SUCCESS; + + // load into GraphBLAS + grb::Matrix< JType, internal_backend > J( n, n ); + { + const auto &data = std::get<6>(Storage::getData()); + RC io_rc = buildMatrixUnique( + J, + utils::makeNonzeroIterator< + grb::config::RowIndexType, grb::config::ColIndexType, JType + >( data.cbegin() ), + utils::makeNonzeroIterator< + grb::config::RowIndexType, grb::config::ColIndexType, JType + >( data.cend() ), + SEQUENTIAL + ); + /* Once internal issue #342 is resolved this can be re-enabled + RC io_rc = buildMatrixUnique( + J, + utils::makeNonzeroIterator< + grb::config::RowIndexType, grb::config::ColIndexType, JType + >( data.cbegin() ), + utils::makeNonzeroIterator< + grb::config::RowIndexType, grb::config::ColIndexType, JType + >( data.cend() ), + PARALLEL + );*/ + io_rc = io_rc ? io_rc : wait(); + if( io_rc != SUCCESS ) { + std::cerr << "Failure: call to buildMatrixUnique did not succeed " + << "(" << toString( io_rc ) << ")." << std::endl; + out.error_code = 5; + return; + } + +#ifdef DEBUG_SARE + if( s == 0 && grb::ncols( J ) < 40 ) { + std::cout << "Matrix J:\n"; + print_matrix( J ); + } +#endif + } + + // build vector h with data from singleton + { + const auto &h_data = std::get<7>(Storage::getData()); + rc = rc ? rc : buildVector( + h, + h_data.cbegin(), + h_data.cend(), + SEQUENTIAL + ); + } + + // seed RNGs (C and C++ engines) using requested seed (hardcoded default 8 if not provided) + std::minstd_rand rng ( data_in.seed + s ); // rng or std::mt19937 + + // create states storage and initialize with random 1/0 values + const size_t n_replicas = data_in.n_replicas; + std::vector< grb::Vector< IOType, internal_backend > > states0; + std::vector< grb::Vector< IOType, internal_backend > > states; + std::vector< IOType > rand_data (n); + for ( size_t r = 0; r < n_replicas; ++r ) { + // initialize with random values + std::uniform_int_distribution< IOType > randint(0,1); + // we use buildvectorUnique with a random set of indices + for ( size_t i = 0; i < n; ++i ) { + rand_data[i] = randint( rng ); + } + + states.emplace_back( n ); + states0.emplace_back( n ); + rc = rc ? rc : grb::buildVector( + states0.back(), + rand_data.cbegin(), + rand_data.cend(), + SEQUENTIAL + ); + rc = rc ? rc : grb::set( states.back(), states0.back() ); + } + + // also make betas vector of size n_replicas and initialize with a geometric gradient + grb::Vector< JType, internal_backend > betas( n_replicas ); + grb::Vector< EnergyType, internal_backend > energies( n_replicas ); + grb::Vector< EnergyType, internal_backend > energies0( n_replicas ); + grb::Vector< EnergyType, internal_backend > tmp_energy( n ); + for ( size_t r = 0; rc == grb::SUCCESS && r < n_replicas; ++r ) { + rc = rc ? rc : grb::setElement( betas, static_cast< JType >( ( 1.0 ) * std::pow< JType >( 1.5, ( n_replicas * s + r ) ) ), r ); + rc = rc ? rc : grb::setElement( energies0, get_energy( J, h, states[r], tmp_energy ), r ); + } + rc = rc ? rc : grb::set( energies, energies0 ); + + #ifdef DEBUG_SARE + if( s == 0 ) { + for ( size_t r = 0; r < n_replicas; ++r ) { + std::cout << "Process " << s << ": "; + std::cout << "Initial state replica " << r << ":\n"; + print_vector( states[r], 30 ,"states values" ); + std::cout << "With energy " << energies0[r] << "\n"; + std::cout << std::endl; + } + } + #endif + grb::Vector< IOType, internal_backend > best_state ( n ); + rc = rc ? rc : wait(); + + out.rep = data_in.rep; + // time a single call + if( out.rep == 0 ) { + timer.reset(); + rc = grb::algorithms::simulated_annealing_RE_Ising( + J, h, states, energies, betas, best_state, out.best_energy, data_in.nsweeps, data_in.reference_energy, data_in.use_pt, data_in.seed + ); + + rc = rc ? rc : wait(); + double single_time = timer.time(); + if( !(rc == SUCCESS || rc == FAILED) ) { + std::cerr << "Failure: call to Simulated Annealing RE did not succeed (" + << toString( rc ) << ")." << std::endl; + out.error_code = 20; + } + if( rc == FAILED ) { + std::cout << "Warning: call to Simulated Annealing RE did not converge\n"; + } + if( rc == SUCCESS ) { + rc = collectives<>::reduce( single_time, 0, operators::max< double >() ); + + } + if( rc != SUCCESS ) { + out.error_code = 25; + } + out.times.useful = single_time; + out.rep = static_cast< size_t >( 1000.0 / single_time ) + 1; + if( rc == SUCCESS || rc == FAILED ) { + if( s == 0 ) { + if( rc == FAILED ) { + std::cout << "Info: cold Simulated Annealing RE did not converge within "; + } else { + std::cout << "Info: cold Simulated Annealing RE completed within "; + } + std::cout << out.iterations << " iterations. " + << "Time taken was " << single_time << " ms. " + << "Deduced inner repetitions parameter of " << out.rep << " " + << "to take 1 second or more per inner benchmark.\n"; + } + } + } else { + for( size_t i = 0; i < 2 ; ++i ){ + for ( size_t r = 0; r < n_replicas; ++r ) { + rc = rc ? rc : grb::set(states[r], states0[r]); + } + rc = rc ? rc : grb::set( energies, energies0 ); + + rc = grb::algorithms::simulated_annealing_RE_Ising( + J, h, states, energies, betas, best_state, out.best_energy, data_in.nsweeps, data_in.reference_energy, data_in.use_pt, data_in.seed + ); + } + // do benchmark + double min_time = 1e9; + double max_time = 0; + double total_time = 0; + for( size_t i = 0; i < out.rep && rc == SUCCESS; ++i ) { + for ( size_t r = 0; r < n_replicas; ++r ) { + rc = rc ? rc : grb::set(states[r], states0[r]); + } + out.best_energy = std::numeric_limits< EnergyType >::max(); + rc = rc ? rc : grb::set( energies, energies0 ); + timer.reset(); + if( rc == SUCCESS ) { + out.iterations = data_in.nsweeps; + + rc = grb::algorithms::simulated_annealing_RE_Ising( + J, h, states, energies, betas, best_state, out.best_energy, data_in.nsweeps, data_in.reference_energy, data_in.use_pt, data_in.seed + i + ); + grb::collectives<>::allreduce( out.best_energy, grb::operators::min< EnergyType >() ); + } + const double time_taken = timer.time(); + min_time = std::min(min_time, time_taken); + max_time = std::max(max_time, time_taken); + total_time += time_taken; + if(s == 0){ + std::cerr << n_replicas << "," << data_in.nsweeps << "," << time_taken << "," << out.best_energy << std::endl; + } + } + + out.times.useful = total_time / static_cast< double >( out.rep ); + // print timing at root process + if( s == 0 ) { + std::cout << "Average Time taken for " << out.rep << " " + << "Simulated Annealing RE calls (hot start): " << out.times.useful << ". " + << "Error code is " << grb::toString( rc ) << std::endl; + std::cout << "\tnumber of IM-SB iterations: " << out.rep << "\n"; std::cout << "\tmilliseconds per iteration: " + << ( out.times.useful / static_cast< double >( out.iterations ) ) << "\n";; + std::cout << "\tMin Time: " << min_time << "\n"; + std::cout << "\tMax Time: " << max_time << "\n"; + } + sleep( 1 ); + } + + // start postamble + timer.reset(); + + // set error code + if( rc == FAILED ) { + out.error_code = 30; + } else if( rc != SUCCESS ) { + std::cerr << "Benchmark run returned error: " << toString( rc ) << "\n"; + out.error_code = 35; + return; + } +} + + +// --- Simple help / CLI parser for the new runner (no backward compatibility) --- +void printhelp( char *progname ) { + std::cout << "Usage: " << progname << " [--use-default-data] [--j-matrix-fname STR] [--h-fname STR]\n" + << " [--n-replicas INT] [--nsweeps INT] [--seed INT]\n" + << " [--rep INT] [--goal INT] [--verify] [--ref-solution-fname STR] [--help]\n\n" + << "Options:\n" + << " --use-default-data Use embedded default test data\n" + << " --j-matrix-fname STR Path to J matrix file (matrix-market or supported)\n" + << " --h-fname STR Path to h (local fields) vector (whitespace separated), if not provided assume zero\n" + << " --n-replicas INT Number of replicas (default: 3)\n" + << " --nsweeps INT Number of sweeps (default: 2)\n" + << " --use-pt BOOL Use Parallel Tampering (default: 1)\n" + << " --seed INT RNG seed (default: 8)\n" + << " --rep INT Number of times to repeat the run of the algorithm (default: 1)\n" + << " --goal FLOAT The value of the energy to achieve before stopping (default: 0, no such check).\n" + << " --verify Verify output against reference solution\n" + << " --ref-solution-fname STR Reference solution file (required with --verify unless using default data)\n" + << " --help, -h Print this help message\n"; +} + +bool parse_arguments( input &in, int argc, char ** argv ) { + std::fill( in.filename_Jmatrix, in.filename_Jmatrix + MAX_FN_SIZE, '\0' ); + std::fill( in.filename_h, in.filename_h + MAX_FN_SIZE, '\0' ); + std::fill( in.filename_ref_solution, in.filename_ref_solution + MAX_FN_SIZE, '\0' ); + in.direct = true; + // map benchmarking configuration to the runner's fields + in.rep = grb::config::BENCHMARKING::inner(); + in.outer = grb::config::BENCHMARKING::outer(); + in.reference_energy = static_cast( 0.0 ); + // keep verify default (false) unless overridden via CLI + in.verify = false; + + for ( int i = 1; i < argc; ++i ) { + std::string a = argv[i]; + if ( a == "--use-default-data" ) { + in.use_default_data = true; + } else if ( a == "--j-matrix-fname" ) { + if ( i+1 >= argc ) { std::cerr << "--j-matrix-fname requires an argument\n"; return false; } + std::strncpy( in.filename_Jmatrix, argv[++i], MAX_FN_SIZE ); + } else if ( a == "--h-fname" ) { + if ( i+1 >= argc ) { std::cerr << "--h-fname requires an argument\n"; return false; } + std::strncpy( in.filename_h, argv[++i], MAX_FN_SIZE ); + } else if ( a == "--n-replicas" ) { + if ( i+1 >= argc ) { std::cerr << "--n-replicas requires an argument\n"; return false; } + in.n_replicas = static_cast( std::stoul(argv[++i]) ); + } else if ( a == "--nsweeps" ) { + if ( i+1 >= argc ) { std::cerr << "--nsweeps requires an argument\n"; return false; } + in.nsweeps = static_cast( std::stoul(argv[++i]) ); + } else if ( a == "--use-pt" ) { + if ( i+1 >= argc ) { std::cerr << "--use-pt requires an argument\n"; return false; } + in.use_pt = static_cast( std::stoul(argv[++i]) ); + } else if ( a == "--seed" ) { + if ( i+1 >= argc ) { std::cerr << "--seed requires an argument\n"; return false; } + in.seed = static_cast( std::stoul(argv[++i]) ); + } else if ( a == "--rep" ) { + if ( i+1 >= argc ) { std::cerr << "--rep requires an argument\n"; return false; } + in.rep = static_cast( std::stoul(argv[++i]) ); + } else if ( a == "--goal" ) { + if ( i+1 >= argc ) { std::cerr << "--goal requires an argument\n"; return false; } + in.reference_energy = std::stof(argv[++i]); + } else if ( a == "--verify" ) { + in.verify = true; + } else if ( a == "--ref-solution-fname" ) { + if ( i+1 >= argc ) { std::cerr << "--ref-solution-fname requires an argument\n"; return false; } + std::strncpy( in.filename_ref_solution, argv[++i], MAX_FN_SIZE ); + } else if ( a == "--help" || a == "-h" ) { + printhelp( argv[0] ); + return false; + } else { + std::cerr << "Unknown argument: " << a << "\n"; + return false; + } + } + + // basic validation + if ( !in.use_default_data ) { + if ( std::strlen( in.filename_Jmatrix ) == 0 ) { + std::cerr << "Either --use-default-data or --j-matrix-fname must be provided\n"; + return false; + } + } + if ( in.verify && !in.use_default_data + && std::strlen( in.filename_ref_solution ) == 0 ) { + std::cerr << "--ref-solution-fname required when --verify is used without --use-default-data\n"; + return false; + } + return true; +} + +// --- Minimal main that uses the existing ioProgram / grbProgram entrypoints --- +int main( int argc, char ** argv ) { + std::cout << "simulated_anealing_re runner\n"; + input in; + output out; + + // init MPI + if( MPI_Init( &argc, &argv ) != MPI_SUCCESS ) { + std::cerr << "MPI_Init returns with non-SUCCESS exit code." << std::endl; + return 10; + } + + if ( !parse_arguments( in, argc, argv ) ) { + printhelp( argv[0] ); + return 1; + } + + + std::cout << "seed=" << in.seed << " n_replicas=" << in.n_replicas << " nsweeps=" << in.nsweeps << " sweep=ising_sweep_spmd" << "\n"; + + // Run IO program (populates Storage or similar) + { + bool success = false; + grb::Launcher< FROM_MPI > launcher( MPI_COMM_WORLD ); + grb::RC rc = launcher.exec( &ioProgram, in, success, true ); + if ( rc != SUCCESS ) { + std::cerr << "I/O launcher failed: " << toString(rc) << "\n"; + return 2; + } + if ( !success ) { + std::cerr << "I/O program reported failure\n"; + return 3; + } + } + + // Run main GraphBLAS program that builds data and calls reSA stub + { + grb::Launcher< FROM_MPI > launcher( MPI_COMM_WORLD ); + grb::RC rc = launcher.exec( &grbProgram, in, out, true ); + if ( rc != SUCCESS ) { + std::cerr << "grbProgram launcher failed: " << toString(rc) << "\n"; + return 4; + } + } + + int s; + if( MPI_Comm_rank(MPI_COMM_WORLD, &s) != MPI_SUCCESS ) { + std::cerr << "MPI_Comm_rank returns with non-SUCCESS exit code." << std::endl; + return 51; + } + if( s == 0 ){ + std::cout << "Finished: error_code=" << out.error_code << " iterations=" << out.rep << " best_energy=" << out.best_energy << "\n"; + } + + // finalise MPI + if( MPI_Finalize() != MPI_SUCCESS ) { + std::cerr << "MPI_Finalize returns with non-SUCCESS exit code." << std::endl; + return 50; + } + + return out.error_code; +} diff --git a/tests/smoke/simulated_annealing_re_ising.cpp b/tests/smoke/simulated_annealing_re_ising.cpp new file mode 100644 index 000000000..e3f7794d6 --- /dev/null +++ b/tests/smoke/simulated_annealing_re_ising.cpp @@ -0,0 +1,739 @@ +/* + Minimal scaffold adapted from ising_machine_sb.cpp to drive a replica-exchange + simulated-annealing (RE-SA) solver. Algorithmic parts are intentionally left + unimplemented (stubs). This file mirrors the existing IO / launcher / + program structure and replaces numpy arrays with grb::Vector and lists of + numpy vectors with std::vector< grb::Vector<...> >. Sparse matrices are + represented as grb::Matrix< JType >. + + Purpose: allow running internal tests or an external-run mode while the RE-SA + algorithm is implemented separately. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace grb; + +// #define DEBUG_SARE 1 +constexpr size_t MAX_FN_SIZE = 255; + +// Types +using IOType = int8_t; // scalar/vector element type +using JType = float; // coupling (matrix) value type +using EnergyType = double; // energy value type + +template< typename T1, typename T2 > +inline bool ISCLOSE( const T1 &a, const T2 &b, const double tol = 1e-4){ + return (std::abs((b)-(a))/std::abs(a) < tol) || (std::abs((b)-(a)) < tol); +} + +/** Parser type */ +typedef grb::utils::MatrixFileReader< + JType, + std::conditional< + (sizeof(grb::config::RowIndexType) > sizeof(grb::config::ColIndexType)), + grb::config::RowIndexType, + grb::config::ColIndexType + >::type +> Parser; + +/** Nonzero type */ +typedef internal::NonzeroStorage< + grb::config::RowIndexType, + grb::config::ColIndexType, + JType +> NonzeroT; + +/** In-memory storage type using tuple */ +typedef grb::utils::Singleton< + std::tuple< + size_t, // n (rows/columns) + size_t, // nz (nonzeros) + size_t, // nsweeps + size_t, // n_replicas + bool, // use_pt + unsigned, // seed + std::vector, // matrix data + std::vector // h vector + > +> Storage; + +namespace test_data { + constexpr size_t n = 16; + constexpr size_t nsweeps = 2; + constexpr size_t n_replicas = 3; + constexpr bool use_pt = true; + constexpr unsigned seed = 8; + + const std::vector< std::pair< std::pair< grb::config::RowIndexType, grb::config::ColIndexType >, JType > > j_matrix_data = { + {{0, 1}, -0.2752300610319546}, + {{1, 0}, -0.2752300610319546}, + {{1, 2}, -0.10636508505639508}, + {{2, 1}, -0.10636508505639508}, + {{2, 3}, 0.3961450048806352}, + {{3, 2}, 0.3961450048806352}, + {{3, 4}, -0.15453838800213293}, + {{3, 5}, 0.4847494372852713}, + {{4, 3}, -0.15453838800213293}, + {{4, 5}, -0.4712679510367046}, + {{5, 3}, 0.4847494372852713}, + {{5, 4}, -0.4712679510367046}, + {{5, 6}, -0.1483152637298799}, + {{6, 5}, -0.1483152637298799}, + {{7, 8}, -0.11904111079614699}, + {{8, 7}, -0.11904111079614699}, + {{9, 10}, -0.18031020353297234}, + {{10, 9}, -0.18031020353297234}, + {{10, 11}, -0.22985425840853468}, + {{11, 10}, -0.22985425840853468}, + {{11, 12}, 0.30105588632639446}, + {{11, 13}, 0.13823880612312134}, + {{12, 11}, 0.30105588632639446}, + {{13, 11}, 0.13823880612312134}, + {{13, 14}, 0.10364447636911123}, + {{14, 13}, 0.10364447636911123}, + {{14, 15}, 0.2955745584289766}, + {{15, 14}, 0.2955745584289766}, + }; + + const size_t nnz = j_matrix_data.size(); + + const std::vector< JType > h_array_data = { + -0.08910436, 0.58034508, 0.97719304, 0.16792909, + -0.9221754 , -0.10715418, -0.62365497, 0.25411129, + -0.5693644 , -0.69805978, 0.07228861, -0.79922641, + 0.46231686 , 0.87930208 , 0.88663637, -0.25052299, + }; +} +// --- New, minimal runner configuration and result types --- +struct input { + size_t n = test_data::n; + size_t n_replicas = test_data::n_replicas; + size_t nsweeps = test_data::nsweeps; + bool use_pt = test_data::use_pt; + unsigned seed = test_data::seed; + bool use_default_data = false; + char filename_Jmatrix [ MAX_FN_SIZE + 1 ]; + char filename_h [ MAX_FN_SIZE + 1 ]; + EnergyType reference_energy = 0.0; + bool verify = false; + char filename_ref_solution [ MAX_FN_SIZE + 1 ]; + bool direct; + size_t rep = 0; + size_t outer = 1; +}; + +struct output { + int error_code = 0; + // TODO: remove itrations if not applicable + size_t iterations = 10; // total number of iterations performed does not make sense since the code does not have convergence criteria + EnergyType best_energy = std::numeric_limits< EnergyType >::max(); + size_t rep; + grb::utils::TimerResults times; + std::unique_ptr< PinnedVector< JType > > pinnedSolutionVector; + std::unique_ptr< PinnedVector< JType > > pinnedRefSolutionVector; + // other things like eg: best replicas ... +}; + +template< typename Dtype > +void read_matrix_data(const std::string &filename, std::vector &data, bool direct) { + // Implementation for reading matrix data from file + try { + Parser parser( filename, direct ); + assert( parser.m() == parser.n() ); + std::get<0>(Storage::getData()) = parser.n(); + try { + std::get<1>(Storage::getData()) = parser.nz(); + } catch( ... ) { + std::get<1>(Storage::getData()) = parser.entries(); + } + /* Once internal issue #342 is resolved this can be re-enabled + for( + auto it = parser.begin( PARALLEL ); + it != parser.end( PARALLEL ); + ++it + ) { + data.push_back( *it ); + }*/ + for( + auto it = parser.begin( SEQUENTIAL ); + it != parser.end( SEQUENTIAL ); + ++it + ) { + data.push_back( Dtype( *it ) ); +#ifdef DEBUG_SARE + if( spmd<>::pid() == 0 ){ + // print last data element from std::vector data + std::cout << "readmatrix_data: " << data.back().first.first << ", " + << data.back().first.second << ", " << data.back().second << "\n"; + } +#endif + } + } catch( std::exception &e ) { + std::cerr << "I/O program failed: " << e.what() << "\n"; + return; + } +} + +template< typename NonzeroT, typename IType, typename VType > +void read_matrix_data_from_array( + const std::vector, VType > > &array, + std::vector &data +) { + // Implementation for reading matrix data from array + try { + for (const auto &entry : array) { + data.emplace_back( + NonzeroT( entry.first.first, entry.first.second, entry.second ) + ); +#ifdef DEBUG_SARE + if( spmd<>::pid() < 1 ){ + // print last data element from std::vector data + std::cout << "read_matrix_data_from_array: " << data.back().first.first << ", " + << data.back().first.second << ", " << data.back().second << "\n"; + } +#endif + } + std::get<0>(Storage::getData()) = test_data::n; + std::get<1>(Storage::getData()) = data.size(); + } catch (const std::exception &e) { + std::cerr << "Failed to read matrix data from array: " << e.what() << "\n"; + return; + } +} + +template< typename Dtype > +void read_vector_data(const std::string &filename, std::vector &data) { + // Implementation for reading vector data from file + try { + std::ifstream file( filename ); + if( !file.is_open() ) { + std::cerr << "Failed to open vector file: " << filename << "\n"; + return; + } + std::string line; + while( std::getline( file, line ) ) { + if( line.empty() ) continue; // skip empty lines + std::istringstream iss( line ); + Dtype v; + if( !(iss >> v) ) { + throw std::runtime_error( "Failed to parse line in vector file" ); + } + data.push_back( v ); + } + } catch( std::exception &e ) { + std::cerr << "I/O program failed: " << e.what() << "\n"; + return; + } +} + +template< typename Dtype > +void read_vector_data_from_array( + const std::vector &array, std::vector &data +) { + // Implementation for reading vector data from array + try { + for (size_t i = 0; i < array.size(); ++i) { + data.push_back(array[i]); + } + } catch (const std::exception &e) { + std::cerr << "Failed to read vector data from array: " << e.what() << "\n"; + return; + } +} + +template< + Backend backend, + grb::Descriptor descr = grb::descriptors::no_operation, + class Ring = Semiring< + grb::operators::add< JType >, grb::operators::mul< JType >, + grb::identities::zero, grb::identities::one + >, + typename Ttmp + > +EnergyType get_energy( + const grb::Matrix< JType, backend >& couplings, + const grb::Vector< JType, backend > &local_fields, + const grb::Vector< IOType, backend > &state, + grb::Vector< Ttmp, backend > &tmp, + const Ring &ring = Ring() + ){ + const size_t n = grb::size( local_fields ); + assert( n == grb::size( state ) ); + assert( n == grb::ncols( couplings ) ); + assert( n == grb::nrows( couplings ) ); + grb::resize( tmp, n ); + grb::RC rc = grb::SUCCESS; + EnergyType energy = 0.0; + constexpr auto dense_descr = descr | grb::descriptors::dense; + + rc = rc ? rc : grb::set< descr >( tmp, 0.0 ); + rc = rc ? rc : grb::mxv< dense_descr >( tmp, couplings, state, ring ); + rc = rc ? rc : grb::foldl< dense_descr >( tmp, static_cast< JType >( 0.5 ), ring.getMultiplicativeMonoid() ); + rc = rc ? rc : grb::foldl< dense_descr >( tmp, local_fields, ring.getAdditiveMonoid() ); + rc = rc ? rc : grb::dot< dense_descr >( energy, tmp, state, ring ); + assert( rc == grb::SUCCESS ); + + return energy; +} + +void ioProgram( const struct input &data_in, bool &success ) { + success = false; + + // Parse and store matrix in singleton class + // Map Storage tuple fields to meaningful names and wire up default data + auto &storage = Storage::getData(); + auto &n = std::get<0>(storage); // n (rows/cols) + auto &nnz = std::get<1>(storage); // nz (nonzeros) + auto &nsweeps_st = std::get<2>(storage); // nsweeps + auto &n_replicas_st = std::get<3>(storage); // n_replicas + auto &use_pt = std::get<4>(storage); // use_pt + auto &seed_st = std::get<5>(storage); // seed + auto &Jdata = std::get<6>(storage); // std::vector + auto &h = std::get<7>(storage); // std::vector + + try { + // Initialize metadata from input (allow CLI to override defaults) + (void) n; // initialized by read_matrix_* + (void) nnz; // initialized by read_matrix_* + nsweeps_st = data_in.nsweeps; + n_replicas_st = data_in.n_replicas; + use_pt = data_in.use_pt; + seed_st = data_in.seed; + + if ( data_in.use_default_data ) { + // if no file provided, use default data from file_content + read_matrix_data_from_array( test_data::j_matrix_data, Jdata ); + read_vector_data_from_array( test_data::h_array_data, h ); + // other data + } else { + // read from files if provided + read_matrix_data( data_in.filename_Jmatrix, Jdata, data_in.direct ); + if( std::strlen( data_in.filename_h ) > 0 ) { + read_vector_data( data_in.filename_h, h ); + }else{ + h.resize( n ); + std::fill( h.begin(), h.end(), static_cast< JType >( 0 ) ); + } + if(data_in.verify) { + if( std::strlen(data_in.filename_ref_solution) == 0 ) { + std::cerr << "Reference solution file not provided for verification\n"; + return; + } + } + } + } catch( std::exception &e ) { + std::cerr << "I/O program failed: " << e.what() << "\n"; + return; + } + + success = true; +} + +void grbProgram( + const struct input &data_in, + struct output &out +) { + std::cout<< "grbProgram: running simulated-annealing RE solver (stub)\n"; + + // get user process ID + const size_t s = spmd<>::pid(); + assert( s < spmd<>::nprocs() ); + + grb::utils::Timer timer; + timer.reset(); + + /* --- Problem setup --- */ + const size_t n = std::get<0>(Storage::getData()); + const size_t n_replicas = std::get<3>(Storage::getData()); + if( s == 0 ){ + std::cout << "problem size n = " << n << "\n"; + } + grb::Vector< JType > h( n ); + + // populate J with test (random) values + grb::RC rc = grb::SUCCESS; + + // load into GraphBLAS + grb::Matrix< JType > J( n, n ); + { + const auto &data = std::get<6>(Storage::getData()); + RC io_rc = buildMatrixUnique( + J, + utils::makeNonzeroIterator< + grb::config::RowIndexType, grb::config::ColIndexType, JType + >( data.cbegin() ), + utils::makeNonzeroIterator< + grb::config::RowIndexType, grb::config::ColIndexType, JType + >( data.cend() ), + SEQUENTIAL + ); + /* Once internal issue #342 is resolved this can be re-enabled + RC io_rc = buildMatrixUnique( + J, + utils::makeNonzeroIterator< + grb::config::RowIndexType, grb::config::ColIndexType, JType + >( data.cbegin() ), + utils::makeNonzeroIterator< + grb::config::RowIndexType, grb::config::ColIndexType, JType + >( data.cend() ), + PARALLEL + );*/ + io_rc = io_rc ? io_rc : wait(); + if( io_rc != SUCCESS ) { + std::cerr << "Failure: call to buildMatrixUnique did not succeed " + << "(" << toString( io_rc ) << ")." << std::endl; + out.error_code = 5; + return; + } + // make J symmetric + // grb::Matrix< JType > Jt ( n, n ); + // Jt = J; + // const grb::Monoid< grb::operators::add< JType >, grb::identities::zero > addMonoid; + // const grb::Monoid< grb::operators::mul< JType >, grb::identities::one > mulMonoid; + // grb::foldl< grb::descriptors::transpose_right >( J, Jt, addMonoid); // issue #210 + // grb::foldl<>( J, static_cast< JType >( 0.5 ), mulMonoid); + +#ifdef DEBUG_SARE + if( s == 0 && grb::ncols( J ) < 40 ) { + std::cout << "Matrix J:\n"; + print_matrix( J ); + } +#endif + } + + // build vector h with data from singleton + { + const auto &h_data = std::get<7>(Storage::getData()); + rc = rc ? rc : buildVector( + h, + h_data.cbegin(), + h_data.cend(), + SEQUENTIAL + ); + } + + // seed RNGs (C and C++ engines) using requested seed (hardcoded default 8 if not provided) + std::minstd_rand rng ( data_in.seed + s ); // rng or std::mt19937 + + // create states storage and initialize with random 1/0 values + std::vector< grb::Vector > states0; + std::vector< grb::Vector > states; + for ( size_t r = 0; r < n_replicas; ++r ) { + states0.emplace_back( grb::Vector(n) ); + states.emplace_back( grb::Vector(n) ); + // initialize with random values + std::uniform_int_distribution< unsigned short > randint(0,1); + // we use buildvectorUnique with a random set of indices + std::vector< IOType > rand_data; + for ( size_t i = 0; i < n; ++i ) { + rand_data.emplace_back( static_cast( + randint( rng ) ) ); + } + rc = rc ? rc : grb::buildVector( + states0.back(), + rand_data.cbegin(), + rand_data.cend(), + SEQUENTIAL + ); + rc = rc ? rc : grb::set( states.back(), states0.back() ); + } + + grb::Vector< EnergyType > tmp_energy ( n ); + EnergyType initial_energy = get_energy( J, h, states[0], tmp_energy ); + + for ( size_t r = 0; r < n_replicas; ++r ) { + const auto en = get_energy( J, h, states[r], tmp_energy ); + initial_energy = std::min( en, initial_energy ); + #ifdef DEBUG_SARE + if( s == 0 ) { + std::cout << "Initial state replica " << r << ":\n"; + print_vector( states[r], 30 ,"states values" ); + std::cout << "With energy " << en << "\n"; + std::cout << std::endl; + } + #endif + } + + // also make betas vector os size n_replicas and initialize with 10.0 + grb::Vector< JType > betas( n_replicas ); + grb::Vector< EnergyType > energies( n_replicas ); + for ( size_t r = 0; rc == grb::SUCCESS && r < n_replicas; ++r ) { + rc = rc ? rc : grb::setElement( betas, static_cast< JType >( (10.0) * std::pow( 2, r ) ), r ); + // rc = rc ? rc : grb::setElement( energies, get_energy( J, h, states[r], tmp_energy ), r ); + } + assert( rc == grb::SUCCESS ); + + grb::Vector< IOType > best_state ( n ); + + out.rep = data_in.rep; + // time a single call + if( out.rep == 0 ) { + timer.reset(); + rc = grb::algorithms::simulated_annealing_RE_Ising( + J, h, states, energies, betas, best_state, out.best_energy, data_in.nsweeps, data_in.reference_energy, data_in.use_pt, data_in.seed + ); + + rc = rc ? rc : wait(); + double single_time = timer.time(); + if( !(rc == SUCCESS || rc == FAILED) ) { + std::cerr << "Failure: call to Simulated Annealing RE did not succeed (" + << toString( rc ) << ")." << std::endl; + out.error_code = 20; + } + if( rc == FAILED ) { + std::cout << "Warning: call to Simulated Annealing RE did not converge\n"; + } + if( rc == SUCCESS ) { + rc = collectives<>::reduce( single_time, 0, operators::max< double >() ); + } + if( rc != SUCCESS ) { + out.error_code = 25; + } + out.times.useful = single_time; + out.rep = static_cast< size_t >( 1000.0 / single_time ) + 1; + if( rc == SUCCESS || rc == FAILED ) { + if( s == 0 ) { + if( rc == FAILED ) { + std::cout << "Info: cold Simulated Annealing RE did not converge within "; + } else { + std::cout << "Info: cold Simulated Annealing RE completed within "; + } + std::cout << out.iterations << " iterations. " + << "Time taken was " << single_time << " ms. " + << "Deduced inner repetitions parameter of " << out.rep << " " + << "to take 1 second or more per inner benchmark.\n"; + } + } + } else { + for( size_t i = 0; i < 2 ; ++i ){ + for ( size_t r = 0; r < n_replicas; ++r ) { + rc = rc ? rc : grb::set(states[r], states0[r]); + } + out.best_energy = std::numeric_limits< EnergyType >::max(); + rc = rc ? rc : grb::clear( energies ); + + rc = grb::algorithms::simulated_annealing_RE_Ising( + J, h, states, energies, betas, best_state, out.best_energy, data_in.nsweeps, data_in.reference_energy, data_in.use_pt, data_in.seed + i + ); + + assert( ISCLOSE( get_energy( J, h, best_state, tmp_energy ), out.best_energy) ); + } + // do benchmark + double min_time = 1e9; + double max_time = 0; + double total_time = 0; + for( size_t i = 0; i < out.rep && rc == SUCCESS; ++i ) { + for ( size_t r = 0; r < n_replicas; ++r ) { + rc = rc ? rc : grb::set(states[r], states0[r]); + } + out.best_energy = std::numeric_limits< EnergyType >::max(); + rc = rc ? rc : grb::clear( energies ); + timer.reset(); + if( rc == SUCCESS ) { + out.iterations = data_in.nsweeps; + + rc = grb::algorithms::simulated_annealing_RE_Ising( + J, h, states, energies, betas, best_state, out.best_energy, data_in.nsweeps, data_in.reference_energy, data_in.use_pt, data_in.seed + i + ); + } + if( grb::Properties<>::isNonblockingExecution ) { + rc = rc ? rc : wait(); + } + const double time_taken = timer.time(); + + assert( ISCLOSE( get_energy( J, h, best_state, tmp_energy ), out.best_energy) ); + min_time = std::min(min_time, time_taken); + max_time = std::max(max_time, time_taken); + total_time += time_taken; + std::cerr << n_replicas << "," << data_in.nsweeps << "," << time_taken << "," << out.best_energy << std::endl; + } + + out.times.useful = total_time / static_cast< double >( out.rep ); + // print timing at root process + if( s == 0 ) { + std::cout << "Average Time taken for " << out.rep << " " + << "Simulated Annealing RE calls (hot start): " << out.times.useful << ". " + << "Error code is " << grb::toString( rc ) << std::endl; + std::cout << "\tnumber of IM-SB iterations: " << out.rep << "\n"; std::cout << "\tmilliseconds per iteration: " + << ( out.times.useful / static_cast< double >( out.iterations ) ) << "\n";; + std::cout << "\tMin Time: " << min_time << "\n"; + std::cout << "\tMax Time: " << max_time << "\n"; + + if( data_in.verify ){ + if( out.best_energy < initial_energy ){ + std::cout << "Test OK" << std::endl; + }else{ + std::cout << "Test FAILED" << std::endl; + } + } + } + sleep( 1 ); + } + + // start postamble + timer.reset(); + + // set error code + if( rc == FAILED ) { + out.error_code = 30; + } else if( rc != SUCCESS ) { + std::cerr << "Benchmark run returned error: " << toString( rc ) << "\n"; + out.error_code = 35; + return; + } +} + + +// --- Simple help / CLI parser for the new runner (no backward compatibility) --- +void printhelp( char *progname ) { + std::cout << "Usage: " << progname << " [--use-default-data] [--j-matrix-fname STR] [--h-fname STR]\n" + << " [--n-replicas INT] [--nsweeps INT] [--seed INT]\n" + << " [--rep INT] [--goal INT] [--verify] [--ref-solution-fname STR] [--help]\n\n" + << "Options:\n" + << " --use-default-data Use embedded default test data\n" + << " --j-matrix-fname STR Path to J matrix file (matrix-market or supported)\n" + << " --h-fname STR Path to h (local fields) vector (whitespace separated), if not provided assume zero\n" + << " --n-replicas INT Number of replicas (default: 3)\n" + << " --nsweeps INT Number of sweeps (default: 2)\n" + << " --use-pt BOOL Use Parallel Tampering (default: 1)\n" + << " --seed INT RNG seed (default: 8)\n" + << " --rep INT number of times to repeat the run of the algorithm (default: 1)\n" + << " --goal FLOAT The value of the energy to achieve before stopping (default: 0, no such check).\n" + << " --verify Verify output against reference solution\n" + << " --ref-solution-fname STR Reference solution file (required with --verify unless using default data)\n" + << " --help, -h Print this help message\n"; +} + +bool parse_arguments( input &in, int argc, char ** argv ) { + std::fill( in.filename_Jmatrix, in.filename_Jmatrix + MAX_FN_SIZE, '\0' ); + std::fill( in.filename_h, in.filename_h + MAX_FN_SIZE, '\0' ); + std::fill( in.filename_ref_solution, in.filename_ref_solution + MAX_FN_SIZE, '\0' ); + in.direct = true; + // map benchmarking configuration to the runner's fields + in.rep = grb::config::BENCHMARKING::inner(); + in.outer = grb::config::BENCHMARKING::outer(); + in.reference_energy = static_cast( 0.0 ); + // keep verify default (false) unless overridden via CLI + in.verify = false; + + for ( int i = 1; i < argc; ++i ) { + std::string a = argv[i]; + if ( a == "--use-default-data" ) { + in.use_default_data = true; + } else if ( a == "--j-matrix-fname" ) { + if ( i+1 >= argc ) { std::cerr << "--j-matrix-fname requires an argument\n"; return false; } + std::strncpy( in.filename_Jmatrix, argv[++i], MAX_FN_SIZE ); + } else if ( a == "--h-fname" ) { + if ( i+1 >= argc ) { std::cerr << "--h-fname requires an argument\n"; return false; } + std::strncpy( in.filename_h, argv[++i], MAX_FN_SIZE ); + } else if ( a == "--n-replicas" ) { + if ( i+1 >= argc ) { std::cerr << "--n-replicas requires an argument\n"; return false; } + in.n_replicas = static_cast( std::stoul(argv[++i]) ); + } else if ( a == "--nsweeps" ) { + if ( i+1 >= argc ) { std::cerr << "--nsweeps requires an argument\n"; return false; } + in.nsweeps = static_cast( std::stoul(argv[++i]) ); + } else if ( a == "--use-pt" ) { + if ( i+1 >= argc ) { std::cerr << "--use-pt requires an argument\n"; return false; } + in.use_pt = static_cast( std::stoul(argv[++i]) ); + } else if ( a == "--rep" ) { + if ( i+1 >= argc ) { std::cerr << "--rep requires an argument\n"; return false; } + in.rep = static_cast( std::stoul(argv[++i]) ); + } else if ( a == "--seed" ) { + if ( i+1 >= argc ) { std::cerr << "--seed requires an argument\n"; return false; } + in.seed = static_cast( std::stoul(argv[++i]) ); + } else if ( a == "--goal" ) { + if ( i+1 >= argc ) { std::cerr << "--goal requires an argument\n"; return false; } + in.reference_energy = std::stof(argv[++i]); + } else if ( a == "--verify" ) { + in.verify = true; + } else if ( a == "--ref-solution-fname" ) { + if ( i+1 >= argc ) { std::cerr << "--ref-solution-fname requires an argument\n"; return false; } + std::strncpy( in.filename_ref_solution, argv[++i], MAX_FN_SIZE ); + } else if ( a == "--help" || a == "-h" ) { + printhelp( argv[0] ); + return false; + } else { + std::cerr << "Unknown argument: " << a << "\n"; + return false; + } + } + + // basic validation + if ( !in.use_default_data ) { + if ( std::strlen( in.filename_Jmatrix ) == 0 ) { + std::cerr << "Either --use-default-data or both --j-matrix-fname must be provided\n"; + return false; + } + } + if ( in.verify && !in.use_default_data + && std::strlen( in.filename_ref_solution ) == 0 ) { + std::cerr << "--ref-solution-fname required when --verify is used without --use-default-data\n"; + return false; + } + return true; +} + +// --- Minimal main that uses the existing ioProgram / grbProgram entrypoints --- +int main( int argc, char ** argv ) { + std::cout << "simulated_anealing_re runner\n"; + input in; + output out; + + if ( !parse_arguments( in, argc, argv ) ) { + printhelp( argv[0] ); + return 1; + } + + + std::cout << "seed=" << in.seed << " n_replicas=" << in.n_replicas << " nsweeps=" << in.nsweeps << " sweep=ising_sweep" << "\n"; + + // Run IO program (populates Storage or similar) + { + bool success = false; + grb::Launcher< AUTOMATIC > launcher; + grb::RC rc = launcher.exec( &ioProgram, in, success, true ); + if ( rc != SUCCESS ) { + std::cerr << "I/O launcher failed: " << toString(rc) << "\n"; + return 2; + } + if ( !success ) { + std::cerr << "I/O program reported failure\n"; + return 3; + } + } + + // Run main GraphBLAS program that builds data and calls reSA stub + { + grb::Launcher< AUTOMATIC > launcher; + grb::RC rc = launcher.exec( &grbProgram, in, out, true ); + if ( rc != SUCCESS ) { + std::cerr << "grbProgram launcher failed: " << toString(rc) << "\n"; + return 4; + } + } + + std::cout << "Finished: error_code=" << out.error_code << " iterations=" << out.rep << " best_energy=" << out.best_energy << "\n"; + return out.error_code; +} diff --git a/tests/smoke/simulated_annealing_re_planted_sol.cpp b/tests/smoke/simulated_annealing_re_planted_sol.cpp new file mode 100644 index 000000000..f6210806f --- /dev/null +++ b/tests/smoke/simulated_annealing_re_planted_sol.cpp @@ -0,0 +1,249 @@ +#include +#include +#include +#include +#include +#include + +#include +#include + +using QType = float; +using StateType = int8_t; +using EnergyType = double; + +template< grb::Backend backend > +void generate_sparse_planted_qubo( + int n, + int degree, + std::pair< QType, QType > weight_range, + grb::Vector< QType, backend > &Q_diag, + grb::Matrix< QType, backend > &Q_off, + grb::Vector< StateType, backend > &x_star, + double &E_star, + unsigned int seed = 0 +) { + std::minstd_rand rng( seed ); + std::uniform_int_distribution< StateType > int_dist(0, 1); + std::uniform_real_distribution< QType > weight_dist(weight_range.first, weight_range.second); + + std::vector< StateType > x ( n ); + for( auto &y : x ){ + y = int_dist( rng ); + } + grb::resize( x_star, n ); + grb::buildVector( x_star, x.begin(), x.end(), grb::SEQUENTIAL ); + + grb::clear( Q_diag ); + grb::clear( Q_off ); + E_star = 0.0; + + std::map< std::pair, QType > Q; + std::vector< QType > Qdiag ( n, 0 ); + + for (size_t i = 0; i < n; ++i) { + std::vector< size_t > neighbors; + for (size_t j = 0; j < n; ++j) { + if (j != i) neighbors.push_back(j); + } + std::shuffle( neighbors.begin(), neighbors.end(), rng ); + neighbors.resize( degree ); + + for (const auto&j : neighbors) { + if (j < i) continue; + + const double w = weight_dist( rng ); + const int b = x_star[i] ^ x_star[j]; + + if (b == 0) { + Qdiag[i] += w; + Qdiag[j] += w; + Q[{i, j}] -= 2*w; + Q[{j, i}] -= 2*w; + } else { + Qdiag[i] -= w; + Qdiag[j] -= w; + Q[{i, j}] += 2*w; + Q[{j, i}] += 2*w; + E_star += w; + } + } + } + std::vector< size_t > i, j; + std::vector< QType > v; + for(const auto &x : Q ){ + i.push_back( x.first.first ); + j.push_back( x.first.second ); + v.push_back( x.second ); + } + + grb::buildVector( Q_diag, Qdiag.begin(), Qdiag.end(), grb::SEQUENTIAL ); + grb::buildMatrixUnique( Q_off, + i.begin(), i.end(), + j.begin(), j.end(), + v.begin(), v.end(), + grb::SEQUENTIAL ); +} + +template< + grb::Backend backend, + grb::Descriptor descr = grb::descriptors::no_operation, + class Ring = grb::Semiring< + grb::operators::add< QType >, grb::operators::mul< QType >, + grb::identities::zero, grb::identities::one + >, + typename Ttmp + > +EnergyType get_energy( + const grb::Matrix< QType, backend >& couplings, + const grb::Vector< QType, backend > &local_fields, + const grb::Vector< StateType,backend > &state, + grb::Vector< Ttmp, backend > &tmp, + const Ring &ring = Ring() + ){ + const size_t n = grb::size( local_fields ); + assert( n == grb::size( state ) ); + assert( n == grb::ncols( couplings ) ); + assert( n == grb::nrows( couplings ) ); + grb::RC rc = grb::SUCCESS; + rc = rc ? rc : grb::resize( tmp, n ); + EnergyType energy = 0.0; + constexpr auto dense_descr = descr | grb::descriptors::dense; + + rc = rc ? rc : grb::set< descr >( tmp, 0.0 ); + rc = rc ? rc : grb::mxv< dense_descr >( tmp, couplings, state, ring ); + rc = rc ? rc : grb::foldl< dense_descr >( tmp, static_cast< QType >( 0.5 ), ring.getMultiplicativeMonoid() ); + rc = rc ? rc : grb::foldl< dense_descr >( tmp, local_fields, ring.getAdditiveMonoid() ); + rc = rc ? rc : grb::dot< dense_descr >( energy, tmp, state, ring ); + assert( rc == grb::SUCCESS ); + + return energy; +} + +template< grb::Backend backend > +bool brute_force_check( + const grb::Vector< QType, backend > &Q_diag, + const grb::Matrix< QType, backend > &Q_off, + const grb::Vector< StateType, backend > &x_star, + double E_star +) { + const int n = grb::size( x_star ); + EnergyType min_energy = 1e7; + std::vector< grb::Vector< StateType > > argmins; + assert( n < 8 * sizeof( int64_t ) ); + + grb::Vector< double, backend > tmp ( n ); + grb::Vector< StateType, backend > x ( n ); + for (int64_t bits = 0; bits < (1 << n); ++bits) { + for (int i = 0; i < n; ++i) { + grb::setElement( x, (bits >> i) & 1, i); + } + double E = get_energy( Q_off, Q_diag, x, tmp ); + + if (E < min_energy - 1e-9) { + min_energy = E; + argmins = {x}; + } else if (abs(E - min_energy) < 1e-9) { + argmins.push_back(x); + } + } + + std::cout << "Planted energy : " << -E_star << std::endl; + std::cout << "Minimum found : " << min_energy << std::endl; + std::cout << "# ground states : " << argmins.size() << std::endl; + + bool planted_ok = false; + for (const auto &x : argmins) { + if ( std::equal(x.begin(), x.end(), x_star.begin()) ) { + planted_ok = true; + break; + } + } + + std::cout << std::boolalpha; + std::cout << "planted_is_optimal: " << planted_ok << std::endl; + std::cout << "energy_matches: " << (std::abs(min_energy + E_star) < 1e-9) << std::endl; + std::cout << "degeneracy " << argmins.size() << std::endl; + return planted_ok; +} + +void grbProgram( const size_t&n, grb::RC &rc ) { + rc = grb::SUCCESS; + const int degree = 4; + const std::pair< QType, QType > weight_range = {1.0, 1.0}; + const unsigned int seed = 1; + + grb::Vector< QType > Q_diag ( n ); + grb::Matrix< QType > Q_off ( n, n ); + grb::Vector< StateType > x_star ( n ); + double E_star = 0.0; + + generate_sparse_planted_qubo( n, degree, weight_range, Q_diag, Q_off, x_star, E_star, seed ); + + const bool optimal = brute_force_check(Q_diag, Q_off, x_star, E_star); + // assert( optimal ); + + std::cout << "------------------ Test with SA-RE ----------------------" << std::endl; + grb::Vector< StateType > best_state ( n ); + EnergyType best_energy = 42; + constexpr bool use_pt = true; + constexpr EnergyType reference_energy = 0; + constexpr size_t nsweeps = 100; + constexpr size_t n_replicas = 16; + const size_t s = grb::spmd<>::pid(); + + std::minstd_rand rng ( seed + s ); // rng or std::mt19937 + + // create states storage and initialize with random 1/0 values + std::vector< grb::Vector< StateType > > states; + for ( size_t r = 0; r < n_replicas; ++r ) { + std::uniform_int_distribution< StateType > randint(0,1); + std::vector< StateType > rand_data; + for ( size_t i = 0; i < n; ++i ) { + rand_data.emplace_back( static_cast< StateType >( + randint( rng ) ) ); + } + states.emplace_back( n ); + rc = rc ? rc : grb::buildVector( + states.back(), + rand_data.cbegin(), + rand_data.cend(), + grb::SEQUENTIAL + ); + } + + // also make betas vector of size n_replicas and initialize with 10.0 + grb::Vector< QType > betas( n_replicas ); + grb::Vector< EnergyType > energies( n_replicas ); + grb::Vector< EnergyType > tmp_energy( n ); + for ( size_t r = 0; rc == grb::SUCCESS && r < n_replicas; ++r ) { + rc = rc ? rc : grb::setElement( betas, static_cast< QType >( (10.0) * std::pow( 2, r ) ), r ); + rc = rc ? rc : grb::setElement( energies, get_energy( Q_off, Q_diag, states[r], tmp_energy ), r ); + } + assert( rc == grb::SUCCESS ); + + rc = grb::algorithms::simulated_annealing_RE_Ising( + Q_off, Q_diag, states, energies, betas, best_state, best_energy, nsweeps, reference_energy, use_pt, seed + ); + assert( get_energy( Q_off, Q_diag, best_state, tmp_energy ) == best_energy ); + std::cout << "Optimized SA-RE value: " << best_energy << std::endl; + + if( best_energy != -E_star ){ + rc = grb::FAILED; + } +} + +int main() { + const size_t in = 18; + grb::RC out; + + grb::Launcher< grb::AUTOMATIC > launcher; + grb::RC rc = launcher.exec( &grbProgram, in, out, true ); + if ( rc != grb::SUCCESS ) { + std::cerr << "grbProgram launcher failed: " << toString(rc) << "\n"; + return 4; + } + std::cout << "Test " << (( out == grb::SUCCESS )? "OK" : "FAILED") << std::endl; + return 0; + +} diff --git a/tests/smoke/smoketests.sh b/tests/smoke/smoketests.sh index ce6c3ce6b..3e08561da 100755 --- a/tests/smoke/smoketests.sh +++ b/tests/smoke/smoketests.sh @@ -436,6 +436,19 @@ for BACKEND in ${BACKENDS[@]}; do grep "Test OK" ${TEST_OUT_DIR}/fuselets.log || echo "Test FAILED" echo " " fi + + if [ "$BACKEND" = "reference_omp" ] || [ "$BACKEND" = "reference" ] || [ "$BACKEND" = "hyperdags" ] || [ "$BACKEND" = "nonblocking" ]; then + echo ">>> [x] [ ] Testing Simulated Annealing-Replica Exchange on a" + echo " small 16x16 matrix." + echo "Functional test executable: ${TEST_BIN_DIR}/simulated_annealing_re_ising_${BACKEND}" + $runner ${TEST_BIN_DIR}/simulated_annealing_re_ising_${BACKEND} --use-default-data --verify &> ${TEST_OUT_DIR}/simulated_annealing_re_ising_${BACKEND}_${P}_${T}.log + ( grep "Test OK" ${TEST_OUT_DIR}/simulated_annealing_re_ising_${BACKEND}_${P}_${T}.log ) || printf 'Test FAILED.\n' + echo ">>> [x] [ ] Testing Simulated Annealing-Replica Exchange on a" + echo " 18x18 matrix with constructed optimal solution." + echo "Functional test executable: ${TEST_BIN_DIR}/simulated_annealing_re_planted_sol_${BACKEND}" + $runner ${TEST_BIN_DIR}/simulated_annealing_re_planted_sol_${BACKEND} &> ${TEST_OUT_DIR}/simulated_annealing_re_ising_${BACKEND}_${P}_${T}.log + ( grep "Test OK" ${TEST_OUT_DIR}/simulated_annealing_re_ising_${BACKEND}_${P}_${T}.log ) || printf 'Test FAILED.\n' + fi done done diff --git a/tests/smoke/test_qubo_parallel.py b/tests/smoke/test_qubo_parallel.py new file mode 100644 index 000000000..b81ad862d --- /dev/null +++ b/tests/smoke/test_qubo_parallel.py @@ -0,0 +1,664 @@ +""" +Copyright © 2023, United States Government, as represented by the Administrator +of the National Aeronautics and Space Administration. All rights reserved. + +The PySA, a powerful tool for solving optimization problems is licensed under +the Apache License, Version 2.0 (the "License"); you may not use this file +except in compliance with the License. You may obtain a copy of the License at +http://www.apache.org/licenses/LICENSE-2.0. + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. +""" + +from more_itertools import distribute +from itertools import repeat +from multiprocessing import Pool +from os import cpu_count + +import numpy as np +from typing import List, Tuple, Any, Callable, Optional, NoReturn + + +Vector = List[float] +# Matrix = List[List[float]] instead for List[List[float]] we will use sparse matrix from scipy +from scipy.sparse import csr_matrix +# Define general type +dtype = 'float' +# set random seed +random_seed = 8 + +Vector = List[float] +Matrix = csr_matrix +State = List[float] +RefProbFun = Callable[[Vector, Optional[int]], float] +EnergyFunction = Callable[[Matrix, Vector, State], float] +UpdateSpinFunRef = Callable[ + [Matrix, Vector, Vector, int, float, float, RefProbFun], float] +UpdateSpinFunction = Callable[[Matrix, Vector, Vector, int, float, float], + float] +SweepFunction = Callable[[UpdateSpinFunction, Matrix, Vector, Vector, float], + float] + +def partition_rows_by_independent_sets(couplings: csr_matrix, *, method: str = "welsh_powell"): + """ + Partition the rows of a sparse symmetric matrix into independent sets (color classes) + so that within each set no two rows share a non-zero coupling (i.e. they are + pairwise non-adjacent in the row-interaction graph). + + This implements a greedy graph-coloring (Welsh-Powell) style algorithm on the + graph implied by the sparsity pattern of `couplings`. The intent is to produce + row blocks that can be updated in parallel (or in any order) without violating + sequential Metropolis dependencies. + + Parameters + - couplings: csr_matrix (shape [n, n]) sparse symmetric coupling matrix (only sparsity matters) + - method: currently only "welsh_powell" supported + + Returns + - blocks: list of lists, each inner list is a color class containing row indices + - colors: numpy array of length n with integer color for each row + + Complexity: O(n log n + m) where m is number of nonzeros (greedy ordering cost) + """ + + if not isinstance(couplings, csr_matrix): + couplings = csr_matrix(couplings) + + n = couplings.shape[0] + + # Build adjacency lists from the sparsity pattern (ignore diagonal) + indptr = couplings.indptr + indices = couplings.indices + + degrees = np.diff(indptr) + + # Order vertices by decreasing degree (Welsh-Powell) + order = np.argsort(-degrees) + + colors = -1 * np.ones(n, dtype=int) + blocks = [] + + for v in order: + if colors[v] != -1: + continue + + # Try to place v into the first existing color where it's independent + placed = False + for c_idx, block in enumerate(blocks): + # Check independence: v must not be adjacent to any node in block + # We can test by scanning neighbors of v and see if any have colors == c_idx + neigh = indices[indptr[v]:indptr[v+1]] + if not np.intersect1d(neigh, np.array(block)).size: + block.append(int(v)) + colors[v] = c_idx + placed = True + break + + if not placed: + # Create new color + colors[v] = len(blocks) + blocks.append([int(v)]) + + return blocks, colors + + +def compute_row_batches(couplings: csr_matrix, *, order: str = "color", return_row_blocks: bool = False): + """ + Compute a sequential list of batches (sizes) that cover all rows exactly once. + + The simplest use is to partition rows by independent sets using + `partition_rows_by_independent_sets` and then produce a list of batch sizes + (and optionally the row indices per batch) that should be executed + sequentially. This is useful for the lazy-accumulator sweep which wants + to update entire independent blocks before flushing or proceeding. + + Parameters + - couplings: csr_matrix + - order: currently only 'color' supported (return blocks in color order) + - return_row_blocks: if True return (batches, row_blocks) else return batches + + Returns + - batches: list[int] of batch sizes whose sum equals n + - row_blocks (optional): list[list[int]] of row indices per batch + """ + + blocks, colors = partition_rows_by_independent_sets(couplings) + + # Optionally reorder blocks; for now 'color' is the natural order + row_blocks = blocks + batches = [len(b) for b in row_blocks] + + if return_row_blocks: + return batches, row_blocks + return batches + + + +def masked_row_dot(couplings: Matrix, D: np.ndarray, pos: int, bs: int) -> float: + """ + Compute the contribution of the accumulated deltas `D` to the local field + at row `pos`, i.e. return (J[row=pos] dot D). This models a masked-mxv + primitive that computes only the scalar contribution required for the + Metropolis decision at `pos` without materializing the whole h += J.dot(D). + + In Python we use the sparse row matvec; in C++ this should map to a + fast masked-mxv primitive or a pipeline read for the single scalar. + """ + # For CSR matrices getrow returns a 1xN sparse matrix; dot(D) returns a 1-elem array + # we use todense to simulate a fast masked-mxv primitive + return (couplings.todense()[pos:pos+bs, :].dot(D)) + + +def get_energy(couplings: Matrix, local_fields: Vector, state: State) -> float: + """ + Compute energy given couplings and local fields. + """ + # Ensure state is 0/1 + assert np.all((state == 0) | (state == 1)), "State must contain only 0 or 1 values." + # Ensure shapes are compatible + assert couplings.shape[0] == couplings.shape[1], "Couplings must be a square matrix." + assert couplings.shape[0] == state.shape[0], "State and couplings must have compatible shapes." + assert local_fields.shape[0] == state.shape[0], "Local fields and state must have compatible shapes." + return state.dot(couplings.dot(state) / 2 + local_fields) + + +def sequential_sweep_x(couplings: Matrix, local_fields: Vector, + state: State, beta: float) -> float: + """ + Metropolis sweep that preserves sequential Metropolis semantics but avoids + expensive per-row dense conversions by precomputing the local field + h = J.dot(state) + local_fields and updating h incrementally when a spin + flip is accepted. Works with CSR sparse `couplings`. + """ + + n = len(state) + + # Precompute local fields h = J.dot(state) + local_fields. In a lazy + # evaluation framework you'd typically start a pipeline here that + # represents h but we materialize once for correctness in the Python + # prototype. + h = couplings.dot(state) + local_fields + + # Random numbers (log uniform) + log_r = np.log(np.random.random(size=n)) + + delta_energy = 0.0 + + # D is the lazy accumulator vector: it holds pending 0/1 deltas that we + # haven't yet materialized into h. The lazy evaluator in your framework + # would instead record these into a pipeline and only execute when a + # reduction-to-scalar (dot, min, sum) requires it. + D = np.zeros(n, dtype=h.dtype) + + # iterate sequentially (Metropolis semantics). We allow D to accumulate + # multiple nonzeros. For each pos we compute the scalar contribution from + # D to h[pos] via a masked row dot (row J[pos,:] dot D) so we don't have + # to apply the whole matvec until we need to. + + # Use precomputed batches/row_blocks attached to the matrix when available + # (compute_row_batches attaches them to the matrix as _row_blocks/_batches) + if hasattr(couplings, '_row_blocks') and hasattr(couplings, '_batches'): + row_blocks = getattr(couplings, '_row_blocks') + batches = getattr(couplings, '_batches') + else: + # Compute batches (row blocks) from the matrix sparsity pattern. Each + # block is an independent set and can be updated without internal + # dependencies. compute_row_batches returns sizes; request row blocks too. + batches, row_blocks = compute_row_batches(couplings, return_row_blocks=True) + + # Iterate sequentially over blocks. Inside each block rows are independent + # so they can be processed in any order (or in parallel) while preserving + # Metropolis semantics between blocks. + for block_idx, rows in enumerate(row_blocks): + # test_states for debugging + test_states = state.copy() + + # If there are pending deltas compute their effect on these rows only + # using a masked row dot over the block. masked_row_dot expects a + # starting pos and block size; we call it for each row in the block. + #row_contrib = np.asarray([masked_row_dot(couplings, D, r, 1).item() for r in rows]) + # rewritten as a dense matrix vector product for the block to simulate efficent code + print("rows:",rows) + row_contrib = couplings[rows, :].dot(D) + + # compute delta energies for the block + state_slice = np.array(state)[rows] + h_slice = np.array(h)[rows] + dn = (2.0 * state_slice - 1.0) * (h_slice + row_contrib) + + # Vectorized Metropolis decision for rows in this block + accept = (dn >= 0) | (log_r[rows] < beta * dn) + old = np.array(state)[rows] + new = np.where(accept, 1 - old, old) + delta_energy += -np.sum(dn * accept) + # Update state and D in place + for idx, r in enumerate(rows): + state[r] = new[idx] + D[r] += (new[idx] - old[idx]) + + + # Flush any remaining accumulated deltas into h before returning. This + # materializes the lazy pipeline; the framework could do this lazily at + # a later synchronization point instead. + # if np.any(D): + # apply_accumulated(couplings, D, h) + + return float(delta_energy) + + +def sequential_sweep_immediate(couplings: Matrix, local_fields: Vector, + state: State, beta: float, printinfo: bool = False) -> float: + """ + Immediate-update Metropolis sweep: on each accepted flip we update the + local-field vector `h` immediately by iterating the nonzeros of the + flipped row (neighbor updates). This is the standard efficient approach + for CSR matrices and is provided here for performance comparison against + the lazy `D`-accumulator approach. + + For simplicity in this prototype, we convert the sparse matrix to dense internally. + """ + n = len(state) + # Convert couplings to dense numpy array + dense_couplings = couplings.toarray() + h = dense_couplings.dot(state) + local_fields + log_r = np.log(np.random.random(size=n)) + delta_energy = 0.0 + + # Use same batching mechanism as sequential_sweep_x to improve locality. + if hasattr(couplings, '_row_blocks') and hasattr(couplings, '_batches'): + row_blocks = getattr(couplings, '_row_blocks') + batches = getattr(couplings, '_batches') + else: + batches, row_blocks = compute_row_batches(couplings, return_row_blocks=True) + + for block_idx, rows in enumerate(row_blocks): + # process rows in this independent block (vectorized) + if printinfo: + print("rows = ", rows) + rows = np.array(rows) + hi = h[rows] + si = state[rows] + dn = (2.0 * si - 1.0) * hi + + # Vectorized Metropolis decision + accept = (dn >= 0) | (log_r[rows] < beta * dn) + old = si + new = np.where(accept, 1 - old, old) + delta = new - old + + # Update state and delta_energy + state[rows] = new + delta_energy += -np.sum(dn * accept) + + # Update h for all spins (dense update) + if np.any(delta): + h += dense_couplings[:, rows].dot(delta) + + return float(delta_energy) + + + +def pt(states: List[State], energies: List[float], beta_idx: List[int], + betas: List[float]) -> NoReturn: + """ + Parallel tempering move. + states: [n_replicas, ...] Array of replicas + energies: [n_replicas] Array of energies of each replica + beta_idx: [n_replicas] The replica index currently assigned to each beta, + i.e. inverse temperature K is currently used for simulating replica beta_idx[K] + betas: [n_replicas] Sequential array of inverse temperatures. + + This function only modifies the order of beta_idx. + """ + print("pt(in):",energies[beta_idx], " beta(in):", betas[beta_idx]) + + # Get number of replicas + n_replicas = len(states) + + # Apply PT for each pair of replicas + for k in range(n_replicas - 1): + + # Get first index + k1 = n_replicas - k - 1 + + # Get second index + k2 = n_replicas - k - 2 + + # Compute delta energy + de = (energies[beta_idx[k1]] - energies[beta_idx[k2]]) * (betas[k1] - betas[k2]) + + # Accept/reject following Metropolis + if de >= 0 or np.random.random() < np.exp(de): + beta_idx[k1], beta_idx[k2] = beta_idx[k2], beta_idx[k1] + + print("pt(out):",energies[beta_idx], " beta(out):", betas[beta_idx]) + + + +def simulation_parallel_x( + sweep: SweepFunction, + couplings: Matrix, + local_fields: Vector, + states: List[State], + energies: List[float], + beta_idx: List[int], + betas: List[float], + n_sweeps: int, + get_part_fun: bool = False, + use_pt: bool = True) -> Tuple[State, float, int, int]: + """ + Apply simulation. + """ + + # Get number of replicas + n_replicas = len(states) + + # Best configuration/energy + _best_energy = np.copy(energies) + _best_state = np.copy(states) + _best_sweeps = np.zeros(n_replicas, dtype=np.int32) + betas_sorted = np.empty_like(betas) + log_omegas = np.zeros(n_sweeps) + + # For each run ... + for s in range(n_sweeps): + for k in range(n_replicas): + betas_sorted[beta_idx[k]] = betas[k] + # ... apply sweep for each replica ... + # interate k in random order to avoid bias + #print("betas_sorted: ",betas_sorted) + perm = np.random.permutation(n_replicas) + print("perm: ",perm) + for k in perm: # numba.prange(n_replicas): + + # Apply sweep + tmp = sweep(couplings, local_fields,states[k], betas_sorted[k]) + energies[k] += tmp + print("Replica ",k," energy=",energies[k]) + + # Store best state + if energies[k] < _best_energy[k]: + _best_energy[k] = energies[k] + _best_state[k] = np.copy(states[k]) + _best_sweeps[k] = s + + # ... and pt move. + #print("beta_idx after sweep: ",beta_idx) + if use_pt: + pt(states, energies, beta_idx, betas) + #print("beta_idx after pt: ",beta_idx) + # Calculate the weights for the partition function + + # Get lowest energy + best_pos = np.argmin(_best_energy) + best_state = _best_state[best_pos] + best_energy = _best_energy[best_pos] + best_sweeps = _best_sweeps[best_pos] + + # Return states and energies + return ((states, energies, beta_idx, log_omegas), (best_state, best_energy, + best_sweeps, s + 1)) + + + +def get_min_energy(couplings: Matrix, local_fields: Vector): + + # Get number of variables + n_vars = couplings.shape[0] + # assert square matrix + assert couplings.shape[0] == couplings.shape[1], "Couplings must be a square matrix." + # assert compatible shapes + assert local_fields.shape[0] == n_vars, "Local fields must have the same number of variables as couplings." + min_energy = np.inf + best_state = np.array([0]*n_vars, dtype=dtype) + + # Find minimum energy by bruteforce + for state in range(2**n_vars): + # Transform state + spin_state_unsigned = np.array([int(x) for x in bin(state)[2:].zfill(n_vars)], dtype=dtype) + # Get energy for the state + assert np.all(np.isin(spin_state_unsigned, [0, 1])), "State contains values other than 0 and 1" + energy = get_energy(couplings, local_fields, spin_state_unsigned) + # Store only the minimum energy + if energy < min_energy: + min_energy = energy + best_state = np.copy(spin_state_unsigned) + + return min_energy, best_state + + +def gen_random_problem(n_vars: int, + dtype: Any = 'float', nzratio = 0.1, test_dense: bool = False, printinfo: bool = False) -> Tuple[Matrix, Vector]: + + # Generate random problem + if (test_dense): + couplings = 2 * np.random.random((n_vars, n_vars)).astype(dtype) - 1 + couplings = (couplings + couplings.T) / 2 + vals = couplings.flatten() + row = np.array([i for i in range(n_vars) for j in range(n_vars)]) + col = np.array([j for i in range(n_vars) for j in range(n_vars)]) + else: + # couplings are random sparse matrix instead of dense with nz none zero elements + nz = int(nzratio * n_vars * n_vars) + row = np.sort(np.random.randint(0, n_vars, nz)) + col = np.sort(np.random.randint(0, n_vars, nz)) + # make sure there are no duplicate entries ins same i,j pairs in (row,col) + unique = np.unique(np.array([row, col]).T) + row = row[unique] + col = col[unique] + vals = 2 * np.random.random(len(unique)).astype(dtype) - 1 + + couplings = csr_matrix((vals, (row, col)), shape=(n_vars, n_vars)) + couplings = (couplings + couplings.T) / 2 + diag_couplings = couplings.diagonal() + #set diagonal to zero + couplings = couplings - csr_matrix((diag_couplings, (np.arange(n_vars), np.arange(n_vars))), shape=(n_vars, n_vars)) + + if printinfo: + # print sparse matrix structure + # ie + # 0 0 0 1 0 + # 0 0 1 0 0 + # 0 1 0 0 0 + # print actual value for the unit test + print("Couplings matrix (indices and nonzero values in COO):") + print("Row indices:", couplings.nonzero()[0]) + print("Column indices:", couplings.nonzero()[1]) + print("Nonzero values:", couplings.data) + + print("Couplings matrix structure (*=nonzero, .=zero):") + dense_couplings = couplings.toarray() + for i in range(n_vars): + row_str = "" + for j in range(n_vars): + if dense_couplings[i,j] != 0: + row_str += "* " + else: + row_str += ". " + print("[{}]".format(i),"\t",row_str) + + # Split in couplings and local_fields + #local_fields = np.copy(np.diagonal(couplings)) + #make local_fields random instead of from diagonal + local_fields = 2 * np.random.random(n_vars).astype(dtype) - 1 + #print local_fields + if printinfo: + print("Local fields (random):", local_fields) + + return couplings, local_fields + + +def test_sequential_sweep_simulation_qubo(n_vars: int): + + n_replicas = 3 + print("n_vars =",n_vars) + print("n_replicas =",n_replicas) + + + # Generate random problem + couplings, local_fields = gen_random_problem(n_vars, dtype=dtype,printinfo=True) + + # Find minimum energy by bruteforce + min_energy,best_state_bruteforce = get_min_energy(couplings, local_fields) + + # Fix temperature + betas = np.array([10]*n_replicas, dtype=dtype) + print("Betas =",betas) + beta_idx = np.arange(n_replicas) + print("Initial beta_idx =",beta_idx) + # Get initial state + states = np.random.randint(2, size=(n_replicas, n_vars)).astype(dtype) + print("Initial states =") + for s in states: + print(s) + + # Compute energies + for s in states: + assert np.all(np.isin(s, [0, 1])), "State contains values other than 0 and 1" + energies = np.array( + [get_energy(couplings, local_fields, state) for state in states]) + print("Initial energies=",energies) + + # Simulate + print("beta_idx =",beta_idx) + nsweeps = 2 + (state, energy, _, _), (best_state, best_energy, _, _) = simulation_parallel_x( + sequential_sweep_immediate, + couplings, + local_fields, + states, + energies, + beta_idx, + betas, + nsweeps) + + # Check that best energy is correct + #ref_best_energy = -7.9322789708332255 # dense + ref_best_energy = -5.079571790854985 # dense + ref_nsweeps = 2 + ref_n_vars = 16 + ref_random_seed = 8 + # make sure parameters match reference + assert n_vars == ref_n_vars, f"n_vars {n_vars} does not match reference {ref_n_vars}" + assert nsweeps == ref_nsweeps, f"nsweeps {nsweeps} does not match reference {ref_nsweeps}" + assert random_seed == ref_random_seed, f"random_seed {random_seed} does not match reference {ref_random_seed}" + # best_energy + assert np.isclose(best_energy, ref_best_energy), f"best_energy {best_energy} does not match reference {ref_best_energy}" + + assert np.all(np.isin(best_state, [0, 1])), "State contains values other than 0 and 1" + qubo_energy = get_energy(couplings, local_fields, best_state) + + print("beta_idx(out) =",beta_idx) + + print("energy =",energy) + for e in energy: + print(e) + print("best_energy by solver = ",best_energy) + print("best_energy by bruteforce = ",min_energy) + + print("state(out) =") + for s in state: + print(s) + print("best state by solver = ",best_state) + print("best state by bruteforce = ",best_state_bruteforce) + + + assert (np.isclose(best_energy, + get_energy(couplings, local_fields, best_state))) + + # Best energy should be always larger than the minimum energy + assert (np.round(best_energy, 6) >= np.round(min_energy, 6)) + #assert (np.isclose(qubo_energy, ref_energy)) + + + +# ## test semantics of row-wise vs immediate update sweeps +# def test_rowwise_vs_immediate_equivalence( +# nzratio=0.25, +# seed0 = 12345, +# seed1 = 20241010, +# n_vars = 815, +# beta = 10.0): +# """ +# Cross-check that a row-wise sequential sweep implemented via +# `sequential_sweep_rowwise` (with an `update_spin` that follows the +# immediate-neighbor-update semantics) produces the same delta_energy and +# final state as `sequential_sweep_immediate` when both use the same RNG +# seed. +# """ +# def sequential_sweep_rowwise(couplings: Matrix, local_fields: Vector, +# state: State, beta: float) -> float: +# """ +# Metropolis update. +# """ +# def update_spin(couplings: Matrix, local_fields: Vector, state: State, pos: int, beta: float, log_r: float) -> float: +# """ +# Update spin accordingly to Metropolis update. +# """ +# # Ensure state is 0/1 +# assert np.all((state == 0) | (state == 1)), "State must contain only 0 or 1 values." +# # Ensure pos is valid +# assert 0 <= pos < state.shape[0], "pos index out of bounds." +# # Ensure shapes are compatible +# assert couplings.shape[0] == couplings.shape[1], "Couplings must be a square matrix." +# assert couplings.shape[0] == state.shape[0], "State and couplings must have compatible shapes." +# assert local_fields.shape[0] == state.shape[0], "Local fields and state must have compatible shapes." + +# # Get the negate delta energy (qubo) +# # delta_n_energy = (2. * state[pos] - 1.) * (couplings[pos].dot(state) + local_fields[pos]) +# # # we need to rewrite this for sparse matrix couplings, ie couplings[pos] for dense matrix becomes couplings.getrow(pos).toarray()[0] +# delta_n_energy = (2. * state[pos] - 1.) * (couplings.getrow(pos).toarray()[0].dot(state) + local_fields[pos]) + +# # Metropolis update +# if delta_n_energy >= 0 or log_r < beta * delta_n_energy: +# # Update spin (qubo) +# state[pos] = 0 if state[pos] else 1 +# # Return delta energy +# return -delta_n_energy +# else: +# # Otherwise, return no change in energy +# return 0. + +# # Get random numbers +# log_r = np.log(np.random.random(size=len(state))) + +# # Try to update every spin +# delta_energy = 0. +# for pos in range(len(state)): +# delta_energy += update_spin(couplings, local_fields, state, pos, beta,log_r[pos]) + +# return delta_energy + +# np.random.seed(seed0) +# couplings, local_fields = gen_random_problem(n_vars, dtype=dtype, nzratio=nzratio) + + +# # initial state +# init_state = np.random.randint(2, size=n_vars).astype(dtype) + +# # copy for both runs +# state_immediate = init_state.copy() +# state_rowwise = init_state.copy() + +# np.random.seed(seed1) +# delta_immediate = sequential_sweep_immediate(couplings, local_fields, state_immediate, beta) + +# # run rowwise with the same RNG sequence +# np.random.seed(seed1) +# delta_rowwise = sequential_sweep_rowwise(couplings, local_fields, state_rowwise, beta) + +# # Compare energies and final states +# assert np.isclose(delta_immediate, delta_rowwise), f"delta_energy differs: immediate={delta_immediate}, rowwise={delta_rowwise}" +# assert np.array_equal(state_immediate, state_rowwise), f"states differ after sweep: immediate={state_immediate}, rowwise={state_rowwise}" +# print("Rowwise and Immediate sweeps are equivalent.") + +# for i in range(5): +# test_rowwise_vs_immediate_equivalence(seed1=i) + +# run several time to check parallelism +for _ in range(4): + np.random.seed(random_seed) + test_sequential_sweep_simulation_qubo(n_vars=16) diff --git a/tests/utils/print_vec_mat.hpp b/tests/utils/print_vec_mat.hpp index 7a95242ed..1f9770cbf 100644 --- a/tests/utils/print_vec_mat.hpp +++ b/tests/utils/print_vec_mat.hpp @@ -90,7 +90,7 @@ void print_vector( os << it->second; (void) ++it; } else if( x_size > 0 ) { - os << 0; + os << '-'; } size_t next_nnz, position; next_nnz = it == end ? limit : it->first; @@ -100,14 +100,18 @@ void print_vector( // print sequence of zeroes for( ; position < zero_streak; ++position ) { os << ", "; - os << 0; + os << '-'; } if( position < limit ) { os << ", "; os << it->second; (void) ++position; - (void) ++it; - next_nnz = it->first; + if( it != end ){ + (void) ++it; + next_nnz = it->first; + }else{ + next_nnz = limit; + } } } os << std::endl << "==============" << std::endl << std::endl;