Skip to content

Commit 1f61916

Browse files
committed
[WIP] debug output to test RAM consumption during table_idx creation
1 parent af72820 commit 1f61916

File tree

2 files changed

+26
-7
lines changed

2 files changed

+26
-7
lines changed

omamer/alphabets.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,6 @@ def DIGITS_AA(self):
122122
def get_transform(k, DIGITS_AA):
123123
# k-mer transformation
124124
t = np.zeros(k, dtype=np.uint64)
125-
for i in numba.prange(k):
125+
for i in range(k):
126126
t[i] = len(DIGITS_AA) ** (k - (i + 1))
127127
return t

omamer/index.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,7 @@ def build_kmer_table(self, seq_buff):
9292
assert self.db.mode in {"w", "a"}, "Index must be opened in write mode."
9393
assert "/Index" not in self.db.db, "Index has already been computed"
9494

95-
# build suffix array with option to translate the sequence buffer first
96-
sa = self._build_suffixarray(
97-
self.alphabet.translate(seq_buff), len(self.db._db_Protein)
98-
)
99-
self._build_kmer_table(seq_buff, sa)
95+
self._build_kmer_table(seq_buff)
10096

10197
@staticmethod
10298
def _build_suffixarray(seqs, n):
@@ -110,7 +106,7 @@ def _build_suffixarray(seqs, n):
110106
sa[:n].sort() # Sort delimiters by position
111107
return sa
112108

113-
def _build_kmer_table(self, seq_buff, sa):
109+
def _build_kmer_table(self, seq_buff):
114110
@numba.njit(parallel=True, nogil=True)
115111
def _compute_mask_and_filter(
116112
sa, sa_mask, sa_filter, k, n, prot2spoff, prot2hogoff, sp_filter
@@ -277,11 +273,21 @@ def cumulate_counts_nfams(
277273

278274
return hog_occ / idx[-1]
279275

276+
# build suffix array with option to translate the sequence buffer first
277+
sa = self._build_suffixarray(
278+
self.alphabet.translate(seq_buff), len(self.db._db_Protein)
279+
)
280+
280281
LOG.debug(" - filter suffix array and compute its HOG mask")
281282
n = len(self.db._db_Protein)
282283
sa_mask = np.zeros(sa.shape, dtype=np.uint32)
283284
sa_filter = np.zeros(sa.shape, dtype=np.bool_)
284285

286+
import psutil
287+
288+
process = psutil.Process()
289+
print("1) Current RAM:", process.memory_info().rss)
290+
285291
_compute_mask_and_filter(
286292
sa,
287293
sa_mask,
@@ -293,9 +299,19 @@ def cumulate_counts_nfams(
293299
self.sp_filter,
294300
)
295301

302+
print("2) Current RAM:", process.memory_info().rss)
303+
304+
296305
# before filtering the sa, reorder and reverse the suffix filter
297306
sa = sa[~sa_filter[sa]]
298307

308+
#mask = ~sa_filter[sa]
309+
#keep = np.nonzero(mask)[0]
310+
#sa[:len(keep)] = sa[keep]
311+
#sa = sa[:len(keep)]
312+
313+
print("3) Current RAM:", process.memory_info().rss)
314+
299315
# filter and reorder the mask according to this filtered sa
300316
sa_mask = sa_mask[sa]
301317

@@ -325,9 +341,12 @@ def cumulate_counts_nfams(
325341
self.alphabet.DIGITS_AA_LOOKUP,
326342
)
327343

344+
print("4) Current RAM:", process.memory_info().rss)
345+
328346
# remove extra space
329347
table_buff = table_buff[:ii_table_buff]
330348

349+
331350
LOG.debug(" - write k-mer table")
332351
idx = self.db.db.create_group("/", "Index", "hog indexes")
333352
idx._f_setattr("k", self.k)

0 commit comments

Comments
 (0)