|
| 1 | +// VoronoiCFBatch.h — C++20 std::span-based batch CF evaluation interface |
| 2 | +// |
| 3 | +// Wraps the existing ic2cf / vcfunction / Vcontacts pipeline with zero-copy |
| 4 | +// std::span views for chromosome gene arrays and atom buffers. Designed for |
| 5 | +// the GA's inner evaluation loop where we need to score a large population of |
| 6 | +// chromosomes without redundant copies. |
| 7 | +// |
| 8 | +// Key features |
| 9 | +// • std::span<const gene> — read-only view of one chromosome's genes |
| 10 | +// • std::span<atom> — per-thread mutable atom view (no extra alloc) |
| 11 | +// • batch_eval() — parallel evaluation of chromosome population |
| 12 | +// • benchmark() — wall-clock comparison: serial vs OpenMP |
| 13 | +// |
| 14 | +// Apache-2.0 © 2026 Le Bonhomme Pharma |
| 15 | + |
| 16 | +#pragma once |
| 17 | + |
| 18 | +#include <span> |
| 19 | +#include <vector> |
| 20 | +#include <functional> |
| 21 | +#include <chrono> |
| 22 | +#include <cstring> |
| 23 | +#include <cassert> |
| 24 | +#include <cmath> |
| 25 | + |
| 26 | +#include "flexaid.h" // cfstr, FA_Global, VC_Global, atom, resid, gene, genlim |
| 27 | +#include "gaboom.h" // chromosome, eval_chromosome, get_apparent_cf_evalue |
| 28 | +#include "Vcontacts.h" // atomsas, ca_struct, contactlist, ptindex, vertex, plane, edgevector |
| 29 | + |
| 30 | +#ifdef _OPENMP |
| 31 | +# include <omp.h> |
| 32 | +#endif |
| 33 | + |
| 34 | +namespace voronoi_cf { |
| 35 | + |
| 36 | +// ─── Span-based eval_chromosome overload ───────────────────────────────────── |
| 37 | +// |
| 38 | +// Identical semantics to the C-style eval_chromosome() in gaboom.cpp, but |
| 39 | +// accepts std::span for gene and genlim parameters, removing the reliance on |
| 40 | +// implicit size from GB->num_genes. |
| 41 | +// |
| 42 | +// The caller owns all pointed-to data; spans are non-owning views. |
| 43 | + |
| 44 | +inline cfstr eval_span( |
| 45 | + FA_Global* FA, |
| 46 | + GB_Global* GB, |
| 47 | + VC_Global* VC, |
| 48 | + std::span<const genlim> gene_lim, |
| 49 | + std::span<atom> atoms, |
| 50 | + std::span<resid> residue, |
| 51 | + gridpoint* cleftgrid, |
| 52 | + std::span<const gene> john, |
| 53 | + cfstr (*function)(FA_Global*, VC_Global*, atom*, resid*, gridpoint*, int, double*)) |
| 54 | +{ |
| 55 | + assert(john.size() == gene_lim.size()); |
| 56 | + assert(john.size() <= MAX_NUM_GENES); |
| 57 | + |
| 58 | + double icv[MAX_NUM_GENES] = {}; |
| 59 | + for (std::size_t i = 0; i < john.size(); ++i) { |
| 60 | + double val = john[i].to_ic; |
| 61 | + if (val > gene_lim[i].max) val = gene_lim[i].max; |
| 62 | + if (val < gene_lim[i].min) val = gene_lim[i].min; |
| 63 | + icv[i] = val; |
| 64 | + } |
| 65 | + return (*function)(FA, VC, |
| 66 | + atoms.data(), residue.data(), |
| 67 | + cleftgrid, |
| 68 | + static_cast<int>(john.size()), |
| 69 | + icv); |
| 70 | +} |
| 71 | + |
| 72 | +// ─── Per-thread workspace ───────────────────────────────────────────────────── |
| 73 | +// |
| 74 | +// Holds private copies of every mutable buffer that ic2cf/vcfunction/Vcontacts |
| 75 | +// write to, allowing lock-free parallel evaluation. |
| 76 | + |
| 77 | +struct ThreadWorkspace { |
| 78 | + FA_Global fa; // shallow copy; pointer fields redirected |
| 79 | + VC_Global vc; // shallow copy; pointer fields redirected |
| 80 | + |
| 81 | + std::vector<atom> atoms; |
| 82 | + std::vector<resid> residue; |
| 83 | + std::vector<int> contacts; |
| 84 | + std::vector<float> contributions; |
| 85 | + std::vector<OptRes> optres; |
| 86 | + std::vector<atomsas> calc; |
| 87 | + std::vector<int> calclist; |
| 88 | + std::vector<int> ca_index; |
| 89 | + std::vector<ca_struct> ca_rec; |
| 90 | + std::vector<int> seed; |
| 91 | + std::vector<contactlist> contlist; |
| 92 | + std::vector<ptindex> ptorder; |
| 93 | + std::vector<vertex> centerpt; |
| 94 | + std::vector<vertex> poly; |
| 95 | + std::vector<plane> cont; |
| 96 | + std::vector<edgevector> vedge; |
| 97 | + |
| 98 | + // Construct and wire all buffers for the given FA/VC reference state. |
| 99 | + ThreadWorkspace(const FA_Global* FA, const VC_Global* VC, |
| 100 | + const atom* ref_atoms, const resid* ref_residue) |
| 101 | + { |
| 102 | + const int natm = FA->atm_cnt; |
| 103 | + const int natmr = FA->atm_cnt_real; |
| 104 | + const int nres = FA->res_cnt; |
| 105 | + const int nopt = FA->num_optres; |
| 106 | + const int nctb = FA->ntypes * FA->ntypes; |
| 107 | + |
| 108 | + atoms .assign(ref_atoms, ref_atoms + natm); |
| 109 | + residue .assign(ref_residue, ref_residue + nres); |
| 110 | + contacts .assign(100000, 0); |
| 111 | + contributions.assign(static_cast<std::size_t>(nctb), 0.0f); |
| 112 | + optres .assign(FA->optres, FA->optres + nopt); |
| 113 | + calc .resize(static_cast<std::size_t>(natmr)); |
| 114 | + calclist .resize(static_cast<std::size_t>(natmr)); |
| 115 | + ca_index .assign(static_cast<std::size_t>(natmr), -1); |
| 116 | + ca_rec .resize(static_cast<std::size_t>(VC->ca_recsize)); |
| 117 | + seed .resize(static_cast<std::size_t>(3 * natmr)); |
| 118 | + contlist .resize(10000); |
| 119 | + ptorder .resize(MAX_PT); |
| 120 | + centerpt .resize(MAX_PT); |
| 121 | + poly .resize(MAX_POLY); |
| 122 | + cont .resize(MAX_PT); |
| 123 | + vedge .resize(MAX_POLY); |
| 124 | + |
| 125 | + fa = *FA; |
| 126 | + fa.contacts = contacts .data(); |
| 127 | + fa.contributions = contributions.data(); |
| 128 | + fa.optres = optres .data(); |
| 129 | + |
| 130 | + vc = *VC; |
| 131 | + vc.Calc = calc .data(); |
| 132 | + vc.Calclist = calclist.data(); |
| 133 | + vc.ca_index = ca_index.data(); |
| 134 | + vc.ca_rec = ca_rec .data(); |
| 135 | + vc.seed = seed .data(); |
| 136 | + vc.contlist = contlist.data(); |
| 137 | + vc.ptorder = ptorder .data(); |
| 138 | + vc.centerpt = centerpt.data(); |
| 139 | + vc.poly = poly .data(); |
| 140 | + vc.cont = cont .data(); |
| 141 | + vc.vedge = vedge .data(); |
| 142 | + } |
| 143 | + |
| 144 | + // Reset per-chromosome mutable state without re-allocating. |
| 145 | + void reset(const FA_Global* FA, const atom* ref_atoms, const resid* ref_residue) |
| 146 | + { |
| 147 | + const int nopt = FA->num_optres; |
| 148 | + std::copy(ref_atoms, ref_atoms + FA->atm_cnt, atoms .begin()); |
| 149 | + std::copy(ref_residue, ref_residue + FA->res_cnt, residue.begin()); |
| 150 | + for (int o = 0; o < nopt; ++o) { |
| 151 | + optres[o].cf.com = 0.0; |
| 152 | + optres[o].cf.wal = 0.0; |
| 153 | + optres[o].cf.sas = 0.0; |
| 154 | + optres[o].cf.totsas = 0.0; |
| 155 | + optres[o].cf.con = 0.0; |
| 156 | + optres[o].cf.rclash = 0; |
| 157 | + } |
| 158 | + vc.numcarec = 0; |
| 159 | + } |
| 160 | +}; |
| 161 | + |
| 162 | +// ─── Batch result ───────────────────────────────────────────────────────────── |
| 163 | + |
| 164 | +struct BatchResult { |
| 165 | + std::vector<cfstr> cf; // one cfstr per chromosome |
| 166 | + std::vector<double> app_evalue; // apparent CF evalue for quick ranking |
| 167 | + double wall_ms; // wall-clock evaluation time (ms) |
| 168 | +}; |
| 169 | + |
| 170 | +// ─── batch_eval ────────────────────────────────────────────────────────────── |
| 171 | +// |
| 172 | +// Evaluate a population of chromosomes using OpenMP + std::span zero-copy |
| 173 | +// views. Results are written into BatchResult (does NOT modify chrom[]). |
| 174 | +// |
| 175 | +// chroms: read-only view of the chromosome array (status != 'n' are skipped) |
| 176 | +// gene_lim: gene boundary array (std::span, bounds-safe) |
| 177 | +// function: the CF scoring function (ic2cf or cffunction) |
| 178 | + |
| 179 | +inline BatchResult batch_eval( |
| 180 | + std::span<const chromosome> chroms, |
| 181 | + FA_Global* FA, |
| 182 | + GB_Global* GB, |
| 183 | + VC_Global* VC, |
| 184 | + std::span<const genlim> gene_lim, |
| 185 | + const atom* ref_atoms, |
| 186 | + const resid* ref_residue, |
| 187 | + gridpoint* cleftgrid, |
| 188 | + cfstr (*function)(FA_Global*, VC_Global*, atom*, resid*, gridpoint*, int, double*)) |
| 189 | +{ |
| 190 | + const int N = static_cast<int>(chroms.size()); |
| 191 | + BatchResult result; |
| 192 | + result.cf .resize(static_cast<std::size_t>(N)); |
| 193 | + result.app_evalue.resize(static_cast<std::size_t>(N)); |
| 194 | + |
| 195 | + const int n_thr = |
| 196 | +#ifdef _OPENMP |
| 197 | + omp_get_max_threads(); |
| 198 | +#else |
| 199 | + 1; |
| 200 | +#endif |
| 201 | + |
| 202 | + // Build per-thread workspaces (heap-allocated; one per thread). |
| 203 | + std::vector<ThreadWorkspace> ws; |
| 204 | + ws.reserve(static_cast<std::size_t>(n_thr)); |
| 205 | + for (int t = 0; t < n_thr; ++t) |
| 206 | + ws.emplace_back(FA, VC, ref_atoms, ref_residue); |
| 207 | + |
| 208 | + auto t0 = std::chrono::steady_clock::now(); |
| 209 | + |
| 210 | +#ifdef _OPENMP |
| 211 | +#pragma omp parallel for schedule(dynamic, 4) default(none) \ |
| 212 | + shared(chroms, result, ws, gene_lim, cleftgrid, function, FA, GB, \ |
| 213 | + ref_atoms, ref_residue, N) |
| 214 | +#endif |
| 215 | + for (int i = 0; i < N; ++i) { |
| 216 | + if (chroms[i].status == 'n') continue; |
| 217 | +#ifdef _OPENMP |
| 218 | + const int tid = omp_get_thread_num(); |
| 219 | +#else |
| 220 | + const int tid = 0; |
| 221 | +#endif |
| 222 | + ws[tid].reset(FA, ref_atoms, ref_residue); |
| 223 | + |
| 224 | + std::span<const gene> gene_span(chroms[i].genes, |
| 225 | + static_cast<std::size_t>(GB->num_genes)); |
| 226 | + std::span<atom> atom_span(ws[tid].atoms); |
| 227 | + std::span<resid> res_span (ws[tid].residue); |
| 228 | + |
| 229 | + result.cf[static_cast<std::size_t>(i)] = eval_span( |
| 230 | + &ws[tid].fa, GB, &ws[tid].vc, |
| 231 | + gene_lim, atom_span, res_span, |
| 232 | + cleftgrid, gene_span, function); |
| 233 | + |
| 234 | + result.app_evalue[static_cast<std::size_t>(i)] = |
| 235 | + get_apparent_cf_evalue(&result.cf[static_cast<std::size_t>(i)]); |
| 236 | + } |
| 237 | + |
| 238 | + auto t1 = std::chrono::steady_clock::now(); |
| 239 | + result.wall_ms = |
| 240 | + std::chrono::duration<double, std::milli>(t1 - t0).count(); |
| 241 | + return result; |
| 242 | +} |
| 243 | + |
| 244 | +// ─── benchmark ──────────────────────────────────────────────────────────────── |
| 245 | +// |
| 246 | +// Times serial vs OpenMP batch_eval over `n_reps` repetitions and prints a |
| 247 | +// summary table to stdout. Returns the parallel result for correctness checks. |
| 248 | + |
| 249 | +struct BenchmarkReport { |
| 250 | + double serial_ms; |
| 251 | + double parallel_ms; |
| 252 | + double speedup; |
| 253 | + int n_threads; |
| 254 | + int n_chroms; |
| 255 | + BatchResult parallel_result; // kept for correctness validation |
| 256 | +}; |
| 257 | + |
| 258 | +inline BenchmarkReport benchmark( |
| 259 | + std::span<const chromosome> chroms, |
| 260 | + FA_Global* FA, |
| 261 | + GB_Global* GB, |
| 262 | + VC_Global* VC, |
| 263 | + std::span<const genlim> gene_lim, |
| 264 | + const atom* ref_atoms, |
| 265 | + const resid* ref_residue, |
| 266 | + gridpoint* cleftgrid, |
| 267 | + cfstr (*function)(FA_Global*, VC_Global*, atom*, resid*, gridpoint*, int, double*), |
| 268 | + int n_reps = 3) |
| 269 | +{ |
| 270 | + BenchmarkReport report{}; |
| 271 | + report.n_chroms = static_cast<int>(chroms.size()); |
| 272 | + |
| 273 | +#ifdef _OPENMP |
| 274 | + report.n_threads = omp_get_max_threads(); |
| 275 | +#else |
| 276 | + report.n_threads = 1; |
| 277 | +#endif |
| 278 | + |
| 279 | + // Serial baseline (force 1 thread) |
| 280 | + double serial_total = 0.0; |
| 281 | + for (int r = 0; r < n_reps; ++r) { |
| 282 | +#ifdef _OPENMP |
| 283 | + omp_set_num_threads(1); |
| 284 | +#endif |
| 285 | + auto res = batch_eval(chroms, FA, GB, VC, gene_lim, |
| 286 | + ref_atoms, ref_residue, cleftgrid, function); |
| 287 | + serial_total += res.wall_ms; |
| 288 | + } |
| 289 | + report.serial_ms = serial_total / n_reps; |
| 290 | + |
| 291 | + // Parallel (all cores) |
| 292 | + double par_total = 0.0; |
| 293 | + for (int r = 0; r < n_reps; ++r) { |
| 294 | +#ifdef _OPENMP |
| 295 | + omp_set_num_threads(report.n_threads); |
| 296 | +#endif |
| 297 | + report.parallel_result = batch_eval(chroms, FA, GB, VC, gene_lim, |
| 298 | + ref_atoms, ref_residue, |
| 299 | + cleftgrid, function); |
| 300 | + par_total += report.parallel_result.wall_ms; |
| 301 | + } |
| 302 | + report.parallel_ms = par_total / n_reps; |
| 303 | + report.speedup = report.serial_ms / report.parallel_ms; |
| 304 | + |
| 305 | + // Print benchmark table to stdout |
| 306 | + std::printf( |
| 307 | + "\n╔══ VoronoiCFBatch benchmark ══════════════════════════════════════╗\n" |
| 308 | + "║ Chromosomes : %6d ║\n" |
| 309 | + "║ Threads : %6d (OpenMP) ║\n" |
| 310 | + "║ Repetitions : %6d ║\n" |
| 311 | + "║──────────────────────────────────────────────────────────────────║\n" |
| 312 | + "║ Serial : %8.2f ms ║\n" |
| 313 | + "║ Parallel : %8.2f ms ║\n" |
| 314 | + "║ Speedup : %8.2f × ║\n" |
| 315 | + "╚══════════════════════════════════════════════════════════════════╝\n", |
| 316 | + report.n_chroms, report.n_threads, n_reps, |
| 317 | + report.serial_ms, report.parallel_ms, report.speedup); |
| 318 | + |
| 319 | + return report; |
| 320 | +} |
| 321 | + |
| 322 | +} // namespace voronoi_cf |
0 commit comments