File tree Expand file tree Collapse file tree 1 file changed +3
-7
lines changed
src/2_curating/2_editing/structure/3_processingAndEnriching Expand file tree Collapse file tree 1 file changed +3
-7
lines changed Original file line number Diff line number Diff line change 4747 print ('your dataframe is not empty :)' )
4848
4949 df = df [df [smiles_column_header ].notnull ()]
50- df_chunks = np .array_split (df , cpus )
50+ chunk_size = len (df ) // cpus + 1
51+ df_chunks = [df .iloc [i :i + chunk_size ] for i in range (0 , len (df ), chunk_size )]
5152 f = CleaningFunc (smiles_column_header ).f
52-
53- # old multiprocessing version
54- # with multiprocessing.Pool(cpus) as pool:
55- # processed_df = pd.concat(pool.map(f, df_chunks), ignore_index=True)
56-
57- with concurrent .futures .ThreadPoolExecutor (max_workers = cpus ) as executor :
53+ with concurrent .futures .ProcessPoolExecutor (max_workers = cpus ) as executor :
5854 processed_list = list (tqdm (executor .map (f , df_chunks ), total = len (df_chunks )))
5955
6056 processed_df = pd .concat (processed_list , ignore_index = True )
You can’t perform that action at this time.
0 commit comments