Skip to content

Commit 0dba01c

Browse files
committed
Update perChromSqlite.py
1 parent f2a4917 commit 0dba01c

File tree

1 file changed

+24
-2
lines changed

1 file changed

+24
-2
lines changed

src/perChromSqlite.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,11 @@
1212
import perChrom
1313
import sqlite3
1414
import perChrom
15-
15+
try:
16+
import tqdm
17+
except:
18+
print('Cannot import tqdm. Will not use tqdm.')
19+
tqdm = False
1620
# # Global variables
1721
# con = None
1822
# df_mutations = None
@@ -211,6 +215,13 @@ def save_mutation_and_proteins(df_transcript3, outprefix):
211215
if pd.notnull(r['new_AA']) and r['new_AA'] != r['AA_seq']:
212216
fout.write('>{}\tchanged\n{}\n'.format(r['protein_id_fasta'], r['new_AA']))
213217
fout.close()
218+
# Define the wrapper function needed for imap
219+
def translate_wrapper(args):
220+
"""Unpacks arguments for translateCDSplusWithMut2"""
221+
row_series, mutations_df = args
222+
# Assuming perChrom.translateCDSplusWithMut2 exists and works like this
223+
return perChrom.translateCDSplusWithMut2(row_series, mutations_df)
224+
214225

215226
class PerChrom_sqlite(object):
216227
"""
@@ -292,6 +303,8 @@ def __init__(
292303
self.df_mutations['chr'] = chromosome
293304
elif chromosome.startswith('chr') and chromosome[3:] in chromosome_with_genomicLocs:
294305
self.df_mutations['chr'] = chromosome[3:]
306+
elif 'chr' + chromosome in chromosome_with_genomicLocs:
307+
self.df_mutations['chr'] = 'chr' + chromosome
295308
else:
296309
print(f'warning! chromosome {chromosome} not in file_sqlite')
297310

@@ -351,7 +364,16 @@ def run_perChrom(self, save_results = True):
351364

352365
if cpu_counts > 1:
353366
pool = Pool(cpu_counts)
354-
results = pool.starmap(perChrom.translateCDSplusWithMut2, [[r, df_mutations] for _,r in df_transcript3.iterrows()])
367+
starmap_args = [[r, df_mutations] for _,r in df_transcript3.iterrows()]
368+
369+
chunk_size = min(200, total_tasks // cpu_counts // 4)
370+
total_tasks = df_transcript3.shape[0]
371+
# results = pool.starmap(perChrom.translateCDSplusWithMut2, starmap_args, chunksize=100)
372+
imap_results = pool.imap(translate_wrapper, starmap_args, chunksize=chunk_size)
373+
if tqdm:
374+
results = list(tqdm.tqdm(imap_results, total=total_tasks, desc=f"Translating {chromosome}"))
375+
else:
376+
results = list(imap_results)
355377
pool.close()
356378
pool.join()
357379
else:

0 commit comments

Comments
 (0)