@@ -213,7 +213,11 @@ thread_local! {
213213 static CACHE : std:: cell:: RefCell <( Cache , Vec <S >, Vec <S >) > = std:: cell:: RefCell :: new( Default :: default ( ) ) ;
214214}
215215
216- pub struct Builder < ' h , const CANONICAL : bool , H : KmerHasher , SkPos , const SYNCMER : bool > {
216+ /// `CANONICAL`: true for canonical minimizers.
217+ /// `H`: the kmer hasher to use.
218+ /// `SkPos`: type of super-k-mer position storage. Use `()` to disable super-k-mers.
219+ /// `SYNCMER`: 0 for minimizers, 1 for closed syncmers, 2 for open syncmers.
220+ pub struct Builder < ' h , const CANONICAL : bool , H : KmerHasher , SkPos , const SYNCMER : u8 > {
217221 k : usize ,
218222 w : usize ,
219223 hasher : Option < & ' h H > ,
@@ -228,7 +232,7 @@ pub struct Output<'o, const CANONICAL: bool, S> {
228232}
229233
230234#[ must_use]
231- pub const fn minimizers ( k : usize , w : usize ) -> Builder < ' static , false , NtHasher < false > , ( ) , false > {
235+ pub const fn minimizers ( k : usize , w : usize ) -> Builder < ' static , false , NtHasher < false > , ( ) , 0 > {
232236 Builder {
233237 k,
234238 w,
@@ -241,7 +245,7 @@ pub const fn minimizers(k: usize, w: usize) -> Builder<'static, false, NtHasher<
241245pub const fn canonical_minimizers (
242246 k : usize ,
243247 w : usize ,
244- ) -> Builder < ' static , true , NtHasher < true > , ( ) , false > {
248+ ) -> Builder < ' static , true , NtHasher < true > , ( ) , 0 > {
245249 Builder {
246250 k,
247251 w,
@@ -250,14 +254,17 @@ pub const fn canonical_minimizers(
250254 }
251255}
252256
253- /// Return positions/values of syncmers of length `k+w-1`.
257+ /// Return positions/values of *closed* syncmers of length `k+w-1`.
254258///
255259/// These are windows with the minimizer at the start or end of the window.
256260///
257261/// `k` here corresponds to `s` in original syncmer notation: the minimizer length.
258262/// `k+w-1` corresponds to `k` in original syncmer notation: the length of the extracted string.
259263#[ must_use]
260- pub const fn syncmers ( k : usize , w : usize ) -> Builder < ' static , false , NtHasher < false > , ( ) , true > {
264+ pub const fn closed_syncmers (
265+ k : usize ,
266+ w : usize ,
267+ ) -> Builder < ' static , false , NtHasher < false > , ( ) , 1 > {
261268 Builder {
262269 k,
263270 w,
@@ -267,10 +274,10 @@ pub const fn syncmers(k: usize, w: usize) -> Builder<'static, false, NtHasher<fa
267274}
268275
269276#[ must_use]
270- pub const fn canonical_syncmers (
277+ pub const fn canonical_closed_syncmers (
271278 k : usize ,
272279 w : usize ,
273- ) -> Builder < ' static , true , NtHasher < true > , ( ) , true > {
280+ ) -> Builder < ' static , true , NtHasher < true > , ( ) , 1 > {
274281 Builder {
275282 k,
276283 w,
@@ -279,7 +286,36 @@ pub const fn canonical_syncmers(
279286 }
280287}
281288
282- impl < const CANONICAL : bool , const SYNCMERS : bool >
289+ /// Return positions/values of *open* syncmers of length `k+w-1`.
290+ ///
291+ /// These are windows with the minimizer in the middle of the window. This requires `w` to be odd.
292+ ///
293+ /// `k` here corresponds to `s` in original syncmer notation: the minimizer length.
294+ /// `k+w-1` corresponds to `k` in original syncmer notation: the length of the extracted string.
295+ #[ must_use]
296+ pub const fn open_syncmers ( k : usize , w : usize ) -> Builder < ' static , false , NtHasher < false > , ( ) , 2 > {
297+ Builder {
298+ k,
299+ w,
300+ hasher : None ,
301+ sk_pos : ( ) ,
302+ }
303+ }
304+
305+ #[ must_use]
306+ pub const fn canonical_open_syncmers (
307+ k : usize ,
308+ w : usize ,
309+ ) -> Builder < ' static , true , NtHasher < true > , ( ) , 2 > {
310+ Builder {
311+ k,
312+ w,
313+ hasher : None ,
314+ sk_pos : ( ) ,
315+ }
316+ }
317+
318+ impl < const CANONICAL : bool , const SYNCMERS : u8 >
283319 Builder < ' static , CANONICAL , NtHasher < CANONICAL > , ( ) , SYNCMERS >
284320{
285321 #[ must_use]
@@ -295,14 +331,12 @@ impl<const CANONICAL: bool, const SYNCMERS: bool>
295331 }
296332 }
297333}
298- impl < ' h , const CANONICAL : bool , H : KmerHasher , const SYNCMERS : bool >
299- Builder < ' h , CANONICAL , H , ( ) , SYNCMERS >
300- {
334+ impl < ' h , const CANONICAL : bool , H : KmerHasher > Builder < ' h , CANONICAL , H , ( ) , 0 > {
301335 #[ must_use]
302336 pub const fn super_kmers < ' o2 > (
303337 & self ,
304338 sk_pos : & ' o2 mut Vec < u32 > ,
305- ) -> Builder < ' h , CANONICAL , H , & ' o2 mut Vec < u32 > , SYNCMERS > {
339+ ) -> Builder < ' h , CANONICAL , H , & ' o2 mut Vec < u32 > , 0 > {
306340 Builder {
307341 k : self . k ,
308342 w : self . w ,
@@ -313,7 +347,7 @@ impl<'h, const CANONICAL: bool, H: KmerHasher, const SYNCMERS: bool>
313347}
314348
315349/// Without-superkmer version
316- impl < ' h , const CANONICAL : bool , H : KmerHasher , const SYNCMERS : bool >
350+ impl < ' h , const CANONICAL : bool , H : KmerHasher , const SYNCMERS : u8 >
317351 Builder < ' h , CANONICAL , H , ( ) , SYNCMERS >
318352{
319353 pub fn run_scalar_once < ' s , SEQ : Seq < ' s > > ( & self , seq : SEQ ) -> Vec < u32 > {
@@ -355,35 +389,50 @@ impl<'h, const CANONICAL: bool, H: KmerHasher, const SYNCMERS: bool>
355389 . unwrap_or_else ( || default_hasher. as_ref ( ) . unwrap ( ) ) ;
356390
357391 CACHE . with_borrow_mut ( |cache| match ( SIMD , CANONICAL , SYNCMERS ) {
358- ( false , false , false ) => collect_and_dedup_into_scalar (
392+ ( false , false , 0 ) => collect_and_dedup_into_scalar (
359393 minimizers_seq_scalar ( seq, hasher, self . w , & mut cache. 0 ) ,
360394 min_pos,
361395 ) ,
362- ( false , false , true ) => collect_syncmers_scalar (
396+ ( false , false , 1 ) => collect_syncmers_scalar :: < false > (
363397 self . w ,
364398 minimizers_seq_scalar ( seq, hasher, self . w , & mut cache. 0 ) ,
365399 min_pos,
366400 ) ,
367- ( false , true , false ) => collect_and_dedup_into_scalar (
401+ ( false , false , 2 ) => collect_syncmers_scalar :: < true > (
402+ self . w ,
403+ minimizers_seq_scalar ( seq, hasher, self . w , & mut cache. 0 ) ,
404+ min_pos,
405+ ) ,
406+ ( false , true , 0 ) => collect_and_dedup_into_scalar (
368407 canonical_minimizers_seq_scalar ( seq, hasher, self . w , & mut cache. 0 ) ,
369408 min_pos,
370409 ) ,
371- ( false , true , true ) => collect_syncmers_scalar (
410+ ( false , true , 1 ) => collect_syncmers_scalar :: < false > (
372411 self . w ,
373412 canonical_minimizers_seq_scalar ( seq, hasher, self . w , & mut cache. 0 ) ,
374413 min_pos,
375414 ) ,
376- ( true , false , false ) => minimizers_seq_simd ( seq, hasher, self . w , & mut cache. 0 )
415+ ( false , true , 2 ) => collect_syncmers_scalar :: < true > (
416+ self . w ,
417+ canonical_minimizers_seq_scalar ( seq, hasher, self . w , & mut cache. 0 ) ,
418+ min_pos,
419+ ) ,
420+ ( true , false , 0 ) => minimizers_seq_simd ( seq, hasher, self . w , & mut cache. 0 )
377421 . collect_and_dedup_into :: < false > ( min_pos) ,
378- ( true , false , true ) => minimizers_seq_simd ( seq, hasher, self . w , & mut cache. 0 )
379- . collect_syncmers_into ( self . w , min_pos) ,
380- ( true , true , false ) => canonical_minimizers_seq_simd ( seq, hasher, self . w , & mut cache. 0 )
422+ ( true , false , 1 ) => minimizers_seq_simd ( seq, hasher, self . w , & mut cache. 0 )
423+ . collect_syncmers_into :: < false > ( self . w , min_pos) ,
424+ ( true , false , 2 ) => minimizers_seq_simd ( seq, hasher, self . w , & mut cache. 0 )
425+ . collect_syncmers_into :: < true > ( self . w , min_pos) ,
426+ ( true , true , 0 ) => canonical_minimizers_seq_simd ( seq, hasher, self . w , & mut cache. 0 )
381427 . collect_and_dedup_into :: < false > ( min_pos) ,
382- ( true , true , true ) => canonical_minimizers_seq_simd ( seq, hasher, self . w , & mut cache. 0 )
383- . collect_syncmers_into ( self . w , min_pos) ,
428+ ( true , true , 1 ) => canonical_minimizers_seq_simd ( seq, hasher, self . w , & mut cache. 0 )
429+ . collect_syncmers_into :: < false > ( self . w , min_pos) ,
430+ ( true , true , 2 ) => canonical_minimizers_seq_simd ( seq, hasher, self . w , & mut cache. 0 )
431+ . collect_syncmers_into :: < true > ( self . w , min_pos) ,
432+ _ => unreachable ! ( "SYNCMERS generic must be 0 (no syncmers), 1 (closed syncmers), or 2 (open syncmers)." ) ,
384433 } ) ;
385434 Output {
386- len : if SYNCMERS {
435+ len : if SYNCMERS != 0 {
387436 self . k + self . w - 1
388437 } else {
389438 self . k
@@ -394,7 +443,7 @@ impl<'h, const CANONICAL: bool, H: KmerHasher, const SYNCMERS: bool>
394443 }
395444}
396445
397- impl < ' h , H : KmerHasher , const SYNCMERS : bool > Builder < ' h , true , H , ( ) , SYNCMERS > {
446+ impl < ' h , H : KmerHasher , const SYNCMERS : u8 > Builder < ' h , true , H , ( ) , SYNCMERS > {
398447 pub fn run_skip_ambiguous_windows_once < ' s > ( & self , nseq : PackedNSeq < ' s > ) -> Vec < u32 > {
399448 let mut min_pos = vec ! [ ] ;
400449 self . run_skip_ambiguous_windows ( nseq, & mut min_pos) ;
@@ -419,13 +468,18 @@ impl<'h, H: KmerHasher, const SYNCMERS: bool> Builder<'h, true, H, (), SYNCMERS>
419468 . hasher
420469 . unwrap_or_else ( || default_hasher. as_ref ( ) . unwrap ( ) ) ;
421470 match SYNCMERS {
422- false => canonical_minimizers_skip_ambiguous_windows ( nseq, hasher, self . w , cache)
471+ 0 => canonical_minimizers_skip_ambiguous_windows ( nseq, hasher, self . w , cache)
423472 . collect_and_dedup_into :: < true > ( min_pos) ,
424- true => canonical_minimizers_skip_ambiguous_windows ( nseq, hasher, self . w , cache)
425- . collect_syncmers_into ( self . w , min_pos) ,
473+ 1 => canonical_minimizers_skip_ambiguous_windows ( nseq, hasher, self . w , cache)
474+ . collect_syncmers_into :: < false > ( self . w , min_pos) ,
475+ 2 => canonical_minimizers_skip_ambiguous_windows ( nseq, hasher, self . w , cache)
476+ . collect_syncmers_into :: < true > ( self . w , min_pos) ,
477+ _ => panic ! (
478+ "SYNCMERS generic must be 0 (no syncmers), 1 (closed syncmers), or 2 (open syncmers)."
479+ ) ,
426480 }
427481 Output {
428- len : if SYNCMERS {
482+ len : if SYNCMERS != 0 {
429483 self . k + self . w - 1
430484 } else {
431485 self . k
@@ -440,7 +494,7 @@ impl<'h, H: KmerHasher, const SYNCMERS: bool> Builder<'h, true, H, (), SYNCMERS>
440494///
441495/// (does not work in combination with syncmers)
442496impl < ' h , ' o2 , const CANONICAL : bool , H : KmerHasher >
443- Builder < ' h , CANONICAL , H , & ' o2 mut Vec < u32 > , false >
497+ Builder < ' h , CANONICAL , H , & ' o2 mut Vec < u32 > , 0 >
444498{
445499 pub fn run_scalar_once < ' s , SEQ : Seq < ' s > > ( self , seq : SEQ ) -> Vec < u32 > {
446500 let mut min_pos = vec ! [ ] ;
0 commit comments