@@ -106,6 +106,7 @@ struct AlignerGlobalMyersBanded::InternalData
106106 InternalData (const DefaultDeviceAllocator& allocator, cudaStream_t stream)
107107 : seq_d(0 , allocator, stream)
108108 , seq_starts_d(0 , allocator, stream)
109+ , max_bandwidths_d(0 , allocator, stream)
109110 , scheduling_index_d(0 , allocator, stream)
110111 , scheduling_atomic_d(0 , allocator, stream)
111112 , results_d(0 , allocator, stream)
@@ -117,13 +118,15 @@ struct AlignerGlobalMyersBanded::InternalData
117118
118119 pinned_host_vector<char > seq_h;
119120 pinned_host_vector<int64_t > seq_starts_h;
121+ pinned_host_vector<int32_t > max_bandwidths_h;
120122 pinned_host_vector<int32_t > scheduling_index_h;
121123 pinned_host_vector<int8_t > results_h;
122124 pinned_host_vector<int32_t > result_counts_h;
123125 pinned_host_vector<int32_t > result_lengths_h;
124126 pinned_host_vector<int64_t > result_starts_h;
125127 device_buffer<char > seq_d;
126128 device_buffer<int64_t > seq_starts_d;
129+ device_buffer<int32_t > max_bandwidths_d;
127130 device_buffer<int32_t > scheduling_index_d;
128131 device_buffer<int32_t > scheduling_atomic_d;
129132 device_buffer<int8_t > results_d;
@@ -143,6 +146,7 @@ void AlignerGlobalMyersBanded::reallocate_internal_data(InternalData* const data
143146
144147 data->seq_h .clear ();
145148 data->seq_starts_h .clear ();
149+ data->max_bandwidths_h .clear ();
146150 data->scheduling_index_h .clear ();
147151 data->results_h .clear ();
148152 data->result_counts_h .clear ();
@@ -154,12 +158,14 @@ void AlignerGlobalMyersBanded::reallocate_internal_data(InternalData* const data
154158 data->seq_h .resize (2 * mem.sequence_memory / sizeof (char ));
155159
156160 data->seq_starts_h .reserve (2 * n_alignments_initial + 1 );
161+ data->max_bandwidths_h .reserve (n_alignments_initial);
157162 data->scheduling_index_h .reserve (n_alignments_initial);
158163 data->result_lengths_h .reserve (n_alignments_initial);
159164 data->result_starts_h .reserve (n_alignments_initial + 1 );
160165
161166 data->seq_d .free ();
162167 data->seq_starts_d .free ();
168+ data->max_bandwidths_d .free ();
163169 data->scheduling_index_d .free ();
164170 data->scheduling_atomic_d .free ();
165171 data->results_d .free ();
@@ -186,6 +192,7 @@ void AlignerGlobalMyersBanded::reallocate_internal_data(InternalData* const data
186192
187193 data->seq_d .clear_and_resize (2 * mem.sequence_memory / sizeof (char ));
188194 data->seq_starts_d .clear_and_resize (2 * n_alignments_initial + 1 );
195+ data->max_bandwidths_d .clear_and_resize (n_alignments_initial);
189196 data->scheduling_index_d .clear_and_resize (n_alignments_initial);
190197 data->scheduling_atomic_d .clear_and_resize (1 );
191198 data->results_d .clear_and_resize (mem.results_memory / sizeof (char ));
@@ -215,16 +222,23 @@ AlignerGlobalMyersBanded::AlignerGlobalMyersBanded(int64_t max_device_memory, in
215222AlignerGlobalMyersBanded::~AlignerGlobalMyersBanded () = default ;
216223
217224StatusType AlignerGlobalMyersBanded::add_alignment (const char * query, int32_t query_length, const char * target, int32_t target_length, bool reverse_complement_query, bool reverse_complement_target)
225+ {
226+ return AlignerGlobalMyersBanded::add_alignment (max_bandwidth_, query, query_length, target, target_length, reverse_complement_query, reverse_complement_target);
227+ }
228+
229+ StatusType AlignerGlobalMyersBanded::add_alignment (int32_t max_bandwidth, const char * query, int32_t query_length, const char * target, int32_t target_length, bool reverse_complement_query, bool reverse_complement_target)
218230{
219231 GW_NVTX_RANGE (profiler, " AlignerGlobalMyersBanded::add_alignment" );
232+ throw_on_negative (max_bandwidth, " query_length should not be negative" );
220233 throw_on_negative (query_length, " query_length should not be negative" );
221234 throw_on_negative (target_length, " target_length should not be negative" );
222235 if (query == nullptr || target == nullptr )
223236 return StatusType::generic_error;
224237
225- auto & seq_h = data_->seq_h ;
226- auto & seq_starts_h = data_->seq_starts_h ;
227- auto & result_starts_h = data_->result_starts_h ;
238+ auto & seq_h = data_->seq_h ;
239+ auto & seq_starts_h = data_->seq_starts_h ;
240+ auto & max_bandwidths_h = data_->max_bandwidths_h ;
241+ auto & result_starts_h = data_->result_starts_h ;
228242
229243 batched_device_matrices<WordType>& pvs = data_->pvs ;
230244 batched_device_matrices<WordType>& mvs = data_->mvs ;
@@ -243,7 +257,13 @@ StatusType AlignerGlobalMyersBanded::add_alignment(const char* query, int32_t qu
243257 assert (mvs.number_of_matrices () == scores.number_of_matrices ());
244258 assert (query_patterns.number_of_matrices () == scores.number_of_matrices ());
245259
246- const int64_t matrix_size = compute_matrix_size_for_alignment (query_length, target_length, max_bandwidth_);
260+ if (max_bandwidth > query_length)
261+ {
262+ // we need to quarantee max_bandwidth % word_size != 1.
263+ max_bandwidth = (query_length % word_size == 1 ? query_length + 1 : query_length);
264+ }
265+
266+ const int64_t matrix_size = compute_matrix_size_for_alignment (query_length, target_length, max_bandwidth);
247267 const int32_t n_words_query = ceiling_divide (query_length, word_size);
248268 const int32_t query_pattern_size = n_words_query * 4 ;
249269
@@ -266,6 +286,7 @@ StatusType AlignerGlobalMyersBanded::add_alignment(const char* query, int32_t qu
266286 genomeutils::copy_sequence (target, target_length, seq_h.data () + seq_start + query_length, reverse_complement_target);
267287 seq_starts_h.push_back (seq_start + query_length);
268288 seq_starts_h.push_back (seq_start + query_length + target_length);
289+ max_bandwidths_h.push_back (max_bandwidth);
269290 result_starts_h.push_back (result_starts_h.back () + query_length + target_length);
270291
271292 bool success = pvs.append_matrix (matrix_size);
@@ -299,13 +320,15 @@ StatusType AlignerGlobalMyersBanded::align_all()
299320 data_->scores .construct_device_matrices_async (stream_);
300321 data_->query_patterns .construct_device_matrices_async (stream_);
301322
302- const auto & seq_h = data_->seq_h ;
303- const auto & seq_starts_h = data_->seq_starts_h ;
304- auto & scheduling_index_h = data_->scheduling_index_h ;
305- const auto & result_starts_h = data_->result_starts_h ;
323+ const auto & seq_h = data_->seq_h ;
324+ const auto & seq_starts_h = data_->seq_starts_h ;
325+ const auto & max_bandwidths_h = data_->max_bandwidths_h ;
326+ auto & scheduling_index_h = data_->scheduling_index_h ;
327+ const auto & result_starts_h = data_->result_starts_h ;
306328
307329 auto & seq_d = data_->seq_d ;
308330 auto & seq_starts_d = data_->seq_starts_d ;
331+ auto & max_bandwidths_d = data_->max_bandwidths_d ;
309332 auto & scheduling_index_d = data_->scheduling_index_d ;
310333 auto & results_d = data_->results_d ;
311334 auto & result_counts_d = data_->result_counts_d ;
@@ -322,6 +345,10 @@ StatusType AlignerGlobalMyersBanded::align_all()
322345 {
323346 result_starts_d.clear_and_resize (n_alignments + 1 );
324347 }
348+ if (get_size (max_bandwidths_d) < n_alignments)
349+ {
350+ max_bandwidths_d.clear_and_resize (n_alignments);
351+ }
325352 if (get_size (result_lengths_d) < n_alignments)
326353 {
327354 result_lengths_d.clear_and_resize (n_alignments);
@@ -331,20 +358,22 @@ StatusType AlignerGlobalMyersBanded::align_all()
331358 scheduling_index_d.clear_and_resize (n_alignments);
332359 }
333360
361+ device_copy_n_async (seq_h.data (), seq_starts_h.back (), seq_d.data (), stream_);
362+ device_copy_n_async (seq_starts_h.data (), 2 * n_alignments + 1 , seq_starts_d.data (), stream_);
363+ device_copy_n_async (max_bandwidths_h.data (), n_alignments, max_bandwidths_d.data (), stream_);
364+ device_copy_n_async (result_starts_h.data (), n_alignments + 1 , result_starts_d.data (), stream_);
365+
334366 // Create an index of alignment tasks, which determines the processing
335367 // order. Sort this index by the sum of query and target lengths such
336368 // that large alignments get processed first.
337369 scheduling_index_h.resize (n_alignments);
338370 std::iota (begin (scheduling_index_h), end (scheduling_index_h), 0 );
339371 std::sort (begin (scheduling_index_h), end (scheduling_index_h), [&seq_starts_h](int32_t i, int32_t j) { return seq_starts_h[2 * i + 2 ] - seq_starts_h[2 * i] > seq_starts_h[2 * j + 2 ] - seq_starts_h[2 * j]; });
340372
341- device_copy_n_async (seq_h.data (), seq_starts_h.back (), seq_d.data (), stream_);
342- device_copy_n_async (seq_starts_h.data (), 2 * n_alignments + 1 , seq_starts_d.data (), stream_);
343373 device_copy_n_async (scheduling_index_h.data (), n_alignments, scheduling_index_d.data (), stream_);
344- device_copy_n_async (result_starts_h.data (), n_alignments + 1 , result_starts_d.data (), stream_);
345374
346375 myers_banded_gpu (results_d.data (), result_counts_d.data (), result_lengths_d.data (), result_starts_d.data (),
347- seq_d.data (), seq_starts_d.data (), scheduling_index_d.data (), data_->scheduling_atomic_d .data (), n_alignments, max_bandwidth_ ,
376+ seq_d.data (), seq_starts_d.data (), max_bandwidths_d. data (), scheduling_index_d.data (), data_->scheduling_atomic_d .data (), n_alignments,
348377 data_->pvs , data_->mvs , data_->scores , data_->query_patterns ,
349378 stream_);
350379 return StatusType::success;
@@ -434,6 +463,7 @@ void AlignerGlobalMyersBanded::reset_max_bandwidth(const int32_t max_bandwidth)
434463void AlignerGlobalMyersBanded::reset_data ()
435464{
436465 data_->seq_starts_h .clear ();
466+ data_->max_bandwidths_h .clear ();
437467 data_->result_lengths_h .clear ();
438468 data_->result_starts_h .clear ();
439469
0 commit comments