Skip to content

Commit 5a35c52

Browse files
authored
Update chemosanitizer.py
1 parent 9c274d8 commit 5a35c52

File tree

1 file changed

+3
-7
lines changed

1 file changed

+3
-7
lines changed

src/2_curating/2_editing/structure/3_processingAndEnriching/chemosanitizer.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,10 @@
4747
print('your dataframe is not empty :)')
4848

4949
df = df[df[smiles_column_header].notnull()]
50-
df_chunks = np.array_split(df, cpus)
50+
chunk_size = len(df) // cpus + 1
51+
df_chunks = [df.iloc[i:i+chunk_size] for i in range(0, len(df), chunk_size)]
5152
f = CleaningFunc(smiles_column_header).f
52-
53-
# old multiprocessing version
54-
# with multiprocessing.Pool(cpus) as pool:
55-
# processed_df = pd.concat(pool.map(f, df_chunks), ignore_index=True)
56-
57-
with concurrent.futures.ThreadPoolExecutor(max_workers=cpus) as executor:
53+
with concurrent.futures.ProcessPoolExecutor(max_workers=cpus) as executor:
5854
processed_list = list(tqdm(executor.map(f, df_chunks), total=len(df_chunks)))
5955

6056
processed_df = pd.concat(processed_list, ignore_index=True)

0 commit comments

Comments
 (0)