@@ -92,11 +92,7 @@ def build_kmer_table(self, seq_buff):
9292 assert self .db .mode in {"w" , "a" }, "Index must be opened in write mode."
9393 assert "/Index" not in self .db .db , "Index has already been computed"
9494
95- # build suffix array with option to translate the sequence buffer first
96- sa = self ._build_suffixarray (
97- self .alphabet .translate (seq_buff ), len (self .db ._db_Protein )
98- )
99- self ._build_kmer_table (seq_buff , sa )
95+ self ._build_kmer_table (seq_buff )
10096
10197 @staticmethod
10298 def _build_suffixarray (seqs , n ):
@@ -110,7 +106,7 @@ def _build_suffixarray(seqs, n):
110106 sa [:n ].sort () # Sort delimiters by position
111107 return sa
112108
113- def _build_kmer_table (self , seq_buff , sa ):
109+ def _build_kmer_table (self , seq_buff ):
114110 @numba .njit (parallel = True , nogil = True )
115111 def _compute_mask_and_filter (
116112 sa , sa_mask , sa_filter , k , n , prot2spoff , prot2hogoff , sp_filter
@@ -277,11 +273,21 @@ def cumulate_counts_nfams(
277273
278274 return hog_occ / idx [- 1 ]
279275
276+ # build suffix array with option to translate the sequence buffer first
277+ sa = self ._build_suffixarray (
278+ self .alphabet .translate (seq_buff ), len (self .db ._db_Protein )
279+ )
280+
280281 LOG .debug (" - filter suffix array and compute its HOG mask" )
281282 n = len (self .db ._db_Protein )
282283 sa_mask = np .zeros (sa .shape , dtype = np .uint32 )
283284 sa_filter = np .zeros (sa .shape , dtype = np .bool_ )
284285
286+ import psutil
287+
288+ process = psutil .Process ()
289+ print ("1) Current RAM:" , process .memory_info ().rss )
290+
285291 _compute_mask_and_filter (
286292 sa ,
287293 sa_mask ,
@@ -293,9 +299,19 @@ def cumulate_counts_nfams(
293299 self .sp_filter ,
294300 )
295301
302+ print ("2) Current RAM:" , process .memory_info ().rss )
303+
304+
296305 # before filtering the sa, reorder and reverse the suffix filter
297306 sa = sa [~ sa_filter [sa ]]
298307
308+ #mask = ~sa_filter[sa]
309+ #keep = np.nonzero(mask)[0]
310+ #sa[:len(keep)] = sa[keep]
311+ #sa = sa[:len(keep)]
312+
313+ print ("3) Current RAM:" , process .memory_info ().rss )
314+
299315 # filter and reorder the mask according to this filtered sa
300316 sa_mask = sa_mask [sa ]
301317
@@ -325,9 +341,12 @@ def cumulate_counts_nfams(
325341 self .alphabet .DIGITS_AA_LOOKUP ,
326342 )
327343
344+ print ("4) Current RAM:" , process .memory_info ().rss )
345+
328346 # remove extra space
329347 table_buff = table_buff [:ii_table_buff ]
330348
349+
331350 LOG .debug (" - write k-mer table" )
332351 idx = self .db .db .create_group ("/" , "Index" , "hog indexes" )
333352 idx ._f_setattr ("k" , self .k )
0 commit comments