Skip to content

Commit 105b973

Browse files
improve insertion performance in SortedDoubleLoop
1 parent ef9ad88 commit 105b973

File tree

2 files changed

+14
-3
lines changed

2 files changed

+14
-3
lines changed

include/macis/asci/iteration.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
namespace macis {
1515

16-
template <size_t N, typename index_t = int32_t>
16+
template <size_t N, typename index_t>
1717
auto asci_iter(ASCISettings asci_settings, MCSCFSettings mcscf_settings,
1818
size_t ndets_max, double E0, std::vector<wfn_t<N>> wfn,
1919
std::vector<double> X, HamiltonianGenerator<wfn_t<N>>& ham_gen,

include/macis/hamiltonian_generator/sorted_double_loop.hpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,13 @@ class SortedDoubleLoopHamiltonianGenerator
8484
// size_t skip1 = 0;
8585
// size_t skip2 = 0;
8686

87-
std::mutex coo_mat_thread_mutex;
88-
87+
auto fast_insert = [](auto& old_vec, auto&& new_vec) {
88+
if(old_vec.size() == 0) old_vec = std::move(new_vec);
89+
else {
90+
old_vec.reserve(old_vec.size() + new_vec.size());
91+
old_vec.insert(old_vec.end(), new_vec.begin(), new_vec.end());
92+
}
93+
};
8994
// Loop over uniq alphas in bra/ket
9095
auto pop_st = std::chrono::high_resolution_clock::now();
9196
#pragma omp parallel
@@ -197,12 +202,18 @@ class SortedDoubleLoopHamiltonianGenerator
197202
// Atomically insert into larger matrix arrays
198203
#pragma omp critical
199204
{
205+
#if 0
200206
row_ind.insert(row_ind.end(), row_ind_loc.begin(), row_ind_loc.end());
201207
// row_ind_loc.clear(); row_ind_loc.shrink_to_fit();
202208
col_ind.insert(col_ind.end(), col_ind_loc.begin(), col_ind_loc.end());
203209
// col_ind_loc.clear(); col_ind_loc.shrink_to_fit();
204210
nz_val.insert(nz_val.end(), nz_val_loc.begin(), nz_val_loc.end());
205211
// nz_val_loc.clear(); nz_val_loc.shrink_to_fit();
212+
#else
213+
fast_insert(row_ind, row_ind_loc);
214+
fast_insert(col_ind, col_ind_loc);
215+
fast_insert(nz_val, nz_val_loc );
216+
#endif
206217
}
207218

208219
} // OpenMP

0 commit comments

Comments
 (0)