3838#include < bio/io/seq/reader.hpp>
3939#include < bio/ranges/views/complement.hpp>
4040#include < bio/ranges/views/translate_join.hpp>
41+ #if __cpp_lib_ranges <= 202106L
42+ # include < bio/ranges/views/persist.hpp>
43+ #endif
4144
4245#include < fmindex-collection/DenseCSA.h>
4346#include < fmindex-collection/locate.h>
@@ -260,6 +263,42 @@ void loadDbIndexFromDisk(
260263// Function loadQuery()
261264// --------------------------------------------------------------------------
262265
266+ template <DbIndexType c_indexType,
267+ AlphabetEnum c_origSbjAlph,
268+ AlphabetEnum c_transAlph,
269+ AlphabetEnum c_redAlph,
270+ AlphabetEnum c_origQryAlph>
271+ void loadQuery (GlobalDataHolder<c_indexType, c_origSbjAlph, c_transAlph, c_redAlph, c_origQryAlph> & globalHolder,
272+ LambdaOptions const & options)
273+ {
274+ using TGH = GlobalDataHolder<c_indexType, c_origSbjAlph, c_transAlph, c_redAlph, c_origQryAlph>;
275+
276+ double start = sysTime ();
277+
278+ std::string strIdent = " Loading Query Sequences..." ;
279+ myPrint (options, 1 , strIdent);
280+
281+ bio::io::seq::record r{.id = std::string{}, .seq = std::vector<typename TGH::TOrigQryAlph>{}, .qual = std::ignore};
282+ bio::io::seq::reader reader{options.queryFile , bio::io::seq::reader_options{.record = r}};
283+ for (auto & rec : reader)
284+ {
285+ globalHolder.qryIds .push_back (std::move (rec.id ));
286+ globalHolder.qrySeqs .push_back (std::move (rec.seq ));
287+ }
288+
289+ // parse the file completely and get count in one line:
290+ globalHolder.queryTotal = globalHolder.qrySeqs .size ();
291+
292+ // batch-size as set in options (unless too few sequences)
293+ globalHolder.records_per_batch = std::max<size_t >(
294+ std::min<size_t >(globalHolder.queryTotal / (options.threads * 10 ), options.maximumQueryBlockSize ),
295+ 1 );
296+ double finish = sysTime () - start;
297+ myPrint (options, 1 , " done.\n " );
298+
299+ myPrint (options, 2 , " Runtime: " , finish, " s \n\n " );
300+ }
301+
263302template <DbIndexType c_indexType,
264303 AlphabetEnum c_origSbjAlph,
265304 AlphabetEnum c_transAlph,
@@ -270,7 +309,7 @@ void countQuery(GlobalDataHolder<c_indexType, c_origSbjAlph, c_transAlph, c_redA
270309{
271310 double start = sysTime ();
272311
273- std::string strIdent = " Counting Query Sequences ..." ;
312+ std::string strIdent = " Counting Query Sequences..." ;
274313 myPrint (options, 1 , strIdent);
275314
276315 // TODO potentially optimise this for fasta/fastq with simple 'grep -c'
@@ -295,6 +334,26 @@ void countQuery(GlobalDataHolder<c_indexType, c_origSbjAlph, c_transAlph, c_redA
295334 myPrint (options, 2 , " Runtime: " , finish, " s \n\n " );
296335}
297336
337+ template <DbIndexType c_indexType,
338+ AlphabetEnum c_origSbjAlph,
339+ AlphabetEnum c_transAlph,
340+ AlphabetEnum c_redAlph,
341+ AlphabetEnum c_origQryAlph>
342+ auto createQryView (GlobalDataHolder<c_indexType, c_origSbjAlph, c_transAlph, c_redAlph, c_origQryAlph> & globalHolder,
343+ LambdaOptions const & options)
344+ {
345+ using TGH = GlobalDataHolder<c_indexType, c_origSbjAlph, c_transAlph, c_redAlph, c_origQryAlph>;
346+ bio::io::seq::record r{.id = std::string{}, .seq = std::vector<typename TGH::TOrigQryAlph>{}, .qual = std::ignore};
347+ bio::io::seq::reader reader{options.queryFile , bio::io::seq::reader_options{.record = r}};
348+
349+ #if __cpp_lib_ranges <= 202106L
350+ return std::move (reader) | bio::views::persist |
351+ views::async_input_buffer (globalHolder.records_per_batch * options.threads );
352+ #else
353+ return std::move (reader) | views::async_input_buffer (globalHolder.records_per_batch * options.threads );
354+ #endif
355+ }
356+
298357// / THREAD LOCAL STUFF
299358
300359// --------------------------------------------------------------------------
@@ -1284,8 +1343,8 @@ void iterativeSearchPost(auto & lH)
12841343 assert (subr2.size () == successfulCount);
12851344 }
12861345
1287- lH.qryIds . resize (lH.qryIds .size () - successfulCount);
1288- lH.qrySeqs . resize (lH.qrySeqs .size () - successfulCount);
1346+ lH.qryIds = lH. qryIds | std::views::take (lH.qryIds .size () - successfulCount);
1347+ lH.qrySeqs = lH. qrySeqs | std::views::take (lH.qrySeqs .size () - successfulCount);
12891348
12901349 /* only switch to PHASE2 if there are any left */
12911350 if (!lH.qryIds .empty ())
0 commit comments