11import copy
2- import csv
2+ import re
3+ from datetime import datetime
4+ from pathlib import Path
35
46import numba
57import numpy as np
6- from numpy .core .multiarray import ndarray
8+ import polars as pl
9+ from tqdm import tqdm
710
811from .PCA import PCA_subcluster
912
@@ -17,7 +20,8 @@ def CCA_subcluster(
1720 N_subcl_perc : float ,
1821 ext_case : int ,
1922 tolerance : float = 1e-7 ,
20- ) -> tuple [bool , bool ]:
23+ folder : str = "results" ,
24+ ) -> tuple [pl .DataFrame | None , bool , bool ]:
2125 CCA_OK = True
2226
2327 if N < 50 :
@@ -35,11 +39,9 @@ def CCA_subcluster(
3539 N , N_subcluster , R , DF , kf , tolerance
3640 )
3741
38- print ("==============================" )
39- print (" PCA DONE! " )
40- print ("==============================" )
4142 if not PCA_OK :
42- return CCA_OK , PCA_OK
43+ print ("PCA failed" )
44+ return None , CCA_OK , PCA_OK
4345
4446 I_total = int (n_clusters )
4547
@@ -50,17 +52,20 @@ def CCA_subcluster(
5052
5153 iteration = 1
5254 fill_xnew = 0
55+ progress_bar = tqdm (
56+ total = np .ceil (np .log2 (n_clusters )).astype (int ),
57+ desc = "I_total" ,
58+ )
59+ # TODO: Precalculate the I_total beforehand
60+ # and iterate over them in a foor loop
5361 while I_total > 1 :
5462 i_orden = sort_rows (i_orden )
5563
5664 ID_agglom , CCA_OK = generate_CCA_pairs (I_total , i_orden , X , Y , Z , R , DF , kf )
5765
5866 ID_mon = CCA_identify_monomers (i_orden )
5967
60- if int (np .mod (I_total , 2 )) == 0 :
61- number_pairs = int (I_total / 2 )
62- else :
63- number_pairs = np .floor (I_total / 2 ) + 1
68+ number_pairs = (I_total + 1 ) // 2
6469
6570 considered = np .zeros ((I_total ))
6671 X_next = np .zeros ((N ))
@@ -73,6 +78,7 @@ def CCA_subcluster(
7378
7479 other = 0
7580 i_orden = np .zeros ((int (number_pairs ), 3 ))
81+ # TODO: why not for loop? More secure!
7682 while k <= I_total :
7783 for i in range (ID_agglom [k - 1 , :].size ):
7884 if ID_agglom [k - 1 , i ] == 1 :
@@ -154,37 +160,33 @@ def CCA_subcluster(
154160 Z_next [i ] = Zn [i - fill_xnew ]
155161 R_next [i ] = Rn [i - fill_xnew ]
156162
157- if int (np .mod (I_total , 2 )) == 0 :
158- I_total = int (I_total / 2 )
159- print ("======================" )
160- print (f"{ I_total = } " )
161- print ("======================" )
162- else :
163- I_total = int (np .floor (I_total / 2 ) + 1 )
164- print ("======================" )
165- print (f"{ I_total = } " )
166- print ("======================" )
163+ I_total = (I_total + 1 ) // 2
167164
168165 X = X_next
169166 Y = Y_next
170167 Z = Z_next
171168 R = R_next
172169
173170 iteration += 1
171+ progress_bar .update (1 )
172+
173+ result = pl .DataFrame (
174+ {
175+ "x" : pl .Series (X ),
176+ "y" : pl .Series (Y ),
177+ "z" : pl .Series (Z ),
178+ "r" : pl .Series (R ),
179+ }
180+ )
174181
175- for k in range (N - 1 ):
176- if np .isnan (X [k ]) or np .isnan (Y [k ]):
177- CCA_OK = False
178- elif np .isnan (Z [k ]) or np .isnan (R [k ]):
179- CCA_OK = False
182+ CCA_OK = np .logical_not (np .isnan (result .to_numpy ())).any ().astype (bool )
180183
184+ filename = filename_generate (N , DF , kf )
181185 # save results
182- if not CCA_OK or not PCA_OK :
183- pass
184- else :
185- save_results (X , Y , Z , R , iter )
186+ if CCA_OK and PCA_OK :
187+ save_results (result , iter , filename = filename , folder = folder )
186188
187- return CCA_OK , PCA_OK
189+ return result , CCA_OK , PCA_OK
188190
189191
190192def generate_CCA_pairs (
@@ -332,7 +334,7 @@ def CCA_identify_monomers(i_orden: np.ndarray):
332334 return ID_mon
333335
334336
335- @numba .njit ()
337+ @numba .jit ()
336338def CCA_random_select_list (
337339 X1 : np .ndarray ,
338340 Y1 : np .ndarray ,
@@ -1158,7 +1160,7 @@ def CCA_2_sphere_intersec(sphere1: np.ndarray, sphere2: np.ndarray):
11581160 return x , y , z , vec0 , i_vec , j_vec
11591161
11601162
1161- @numba .jit (nopython = True )
1163+ @numba .jit ()
11621164def CCA_overlap_check (
11631165 n1 : int ,
11641166 n2 : int ,
@@ -1187,7 +1189,7 @@ def CCA_overlap_check(
11871189 return cov_max
11881190
11891191
1190- @numba .jit (nopython = True )
1192+ @numba .jit ()
11911193def CCA_sticking_process_v2 (
11921194 CM2 : np .ndarray ,
11931195 vec0 : np .ndarray ,
@@ -1255,18 +1257,24 @@ def CCA_sticking_process_v2(
12551257 return X2_new , Y2_new , Z2_new
12561258
12571259
1260+ def filename_generate (n : int , df : float , kf : float ) -> str :
1261+ now = datetime .now ().strftime ("%Y%m%d_%H%M%S" )
1262+ return re .sub (r"[\.\,\s]" , "_" , f"N{ n } -D{ df } -K{ kf } -{ now } " )
1263+
1264+
12581265def save_results (
1259- X : np .ndarray ,
1260- Y : np .ndarray ,
1261- Z : np .ndarray ,
1262- R : np .ndarray ,
1266+ data : pl .DataFrame ,
12631267 iteration : int ,
1264- res_name : str = "test" ,
1268+ folder : str = "" ,
1269+ filename : str = "test" ,
12651270):
1266- with open (res_name + str (iteration ) + ".csv" , "w" ) as f :
1267- writer = csv .writer (f )
1268- for i in range (X .size ):
1269- writer .writerow ([X [i ], Y [i ], Z [i ], R [i ]])
1271+ path = Path (folder )
1272+ print (path )
1273+ if not path .exists ():
1274+ print (f"`{ path } ` does not exist. Making the directory for you!" )
1275+ path .mkdir (parents = True , exist_ok = True )
1276+ path /= f"{ filename } _{ iteration } .csv"
1277+ data .write_csv (path )
12701278
12711279
12721280def sort_rows (i_orden : np .ndarray ):
@@ -1281,7 +1289,7 @@ def sort_rows(i_orden: np.ndarray):
12811289 return i_orden
12821290
12831291
1284- @numba .jit (nopython = True )
1292+ @numba .jit ()
12851293def my_norm (a : np .ndarray ) -> float :
12861294 n = np .sqrt (np .power (a [0 ], 2 ) + np .power (a [1 ], 2 ) + np .power (a [2 ], 2 ))
12871295 return n
0 commit comments