11import copy
2- import csv
2+ import re
3+ from datetime import datetime
4+ from pathlib import Path
35
46import numba
57import numpy as np
68import polars as pl
7- from numpy . core . multiarray import ndarray
9+ from tqdm import tqdm
810
911from .PCA import PCA_subcluster
1012
@@ -18,7 +20,8 @@ def CCA_subcluster(
1820 N_subcl_perc : float ,
1921 ext_case : int ,
2022 tolerance : float = 1e-7 ,
21- ) -> tuple [pl .DataFrame , bool , bool ]:
23+ folder : str = "" ,
24+ ) -> tuple [pl .DataFrame | None , bool , bool ]:
2225 CCA_OK = True
2326
2427 if N < 50 :
@@ -36,11 +39,9 @@ def CCA_subcluster(
3639 N , N_subcluster , R , DF , kf , tolerance
3740 )
3841
39- print ("==============================" )
40- print (" PCA DONE! " )
41- print ("==============================" )
4242 if not PCA_OK :
43- return CCA_OK , PCA_OK
43+ print ("PCA failed" )
44+ return None , CCA_OK , PCA_OK
4445
4546 I_total = int (n_clusters )
4647
@@ -51,17 +52,20 @@ def CCA_subcluster(
5152
5253 iteration = 1
5354 fill_xnew = 0
55+ progress_bar = tqdm (
56+ total = np .ceil (np .log2 (n_clusters )).astype (int ),
57+ desc = "I_total" ,
58+ )
59+ # TODO: Precalculate the I_total beforehand
60+ # and iterate over them in a foor loop
5461 while I_total > 1 :
5562 i_orden = sort_rows (i_orden )
5663
5764 ID_agglom , CCA_OK = generate_CCA_pairs (I_total , i_orden , X , Y , Z , R , DF , kf )
5865
5966 ID_mon = CCA_identify_monomers (i_orden )
6067
61- if int (np .mod (I_total , 2 )) == 0 :
62- number_pairs = int (I_total / 2 )
63- else :
64- number_pairs = np .floor (I_total / 2 ) + 1
68+ number_pairs = (I_total + 1 ) // 2
6569
6670 considered = np .zeros ((I_total ))
6771 X_next = np .zeros ((N ))
@@ -74,6 +78,7 @@ def CCA_subcluster(
7478
7579 other = 0
7680 i_orden = np .zeros ((int (number_pairs ), 3 ))
81+ # TODO: why not for loop? More secure!
7782 while k <= I_total :
7883 for i in range (ID_agglom [k - 1 , :].size ):
7984 if ID_agglom [k - 1 , i ] == 1 :
@@ -155,35 +160,15 @@ def CCA_subcluster(
155160 Z_next [i ] = Zn [i - fill_xnew ]
156161 R_next [i ] = Rn [i - fill_xnew ]
157162
158- if int (np .mod (I_total , 2 )) == 0 :
159- I_total = int (I_total / 2 )
160- print ("======================" )
161- print (f"{ I_total = } " )
162- print ("======================" )
163- else :
164- I_total = int (np .floor (I_total / 2 ) + 1 )
165- print ("======================" )
166- print (f"{ I_total = } " )
167- print ("======================" )
163+ I_total = (I_total + 1 ) // 2
168164
169165 X = X_next
170166 Y = Y_next
171167 Z = Z_next
172168 R = R_next
173169
174170 iteration += 1
175-
176- for k in range (N - 1 ):
177- if np .isnan (X [k ]) or np .isnan (Y [k ]):
178- CCA_OK = False
179- elif np .isnan (Z [k ]) or np .isnan (R [k ]):
180- CCA_OK = False
181-
182- # save results
183- if not CCA_OK or not PCA_OK :
184- pass
185- else :
186- save_results (X , Y , Z , R , iter )
171+ progress_bar .update (1 )
187172
188173 result = pl .DataFrame (
189174 {
@@ -194,6 +179,13 @@ def CCA_subcluster(
194179 }
195180 )
196181
182+ CCA_OK = np .logical_not (np .isnan (result .to_numpy ())).any ().astype (bool )
183+
184+ filename = filename_generate (N , DF , kf )
185+ # save results
186+ if CCA_OK and PCA_OK :
187+ save_results (result , iter , filename = filename , folder = folder )
188+
197189 return result , CCA_OK , PCA_OK
198190
199191
@@ -1265,18 +1257,32 @@ def CCA_sticking_process_v2(
12651257 return X2_new , Y2_new , Z2_new
12661258
12671259
1260+ def filename_generate (n : int , df : float , kf : float ) -> str :
1261+ now = datetime .now ().strftime ("%Y%m%d_%H%M%S" )
1262+ return re .sub (r"[\.\,\s]" , "_" , f"N{ n } -D{ df } -K{ kf } -{ now } " )
1263+
1264+
12681265def save_results (
1269- X : np .ndarray ,
1270- Y : np .ndarray ,
1271- Z : np .ndarray ,
1272- R : np .ndarray ,
1266+ # X: np.ndarray,
1267+ # Y: np.ndarray,
1268+ # Z: np.ndarray,
1269+ # R: np.ndarray,
1270+ data : pl .DataFrame ,
12731271 iteration : int ,
1274- res_name : str = "test" ,
1272+ folder : str = "" ,
1273+ filename : str = "test" ,
12751274):
1276- with open (res_name + str (iteration ) + ".csv" , "w" ) as f :
1277- writer = csv .writer (f )
1278- for i in range (X .size ):
1279- writer .writerow ([X [i ], Y [i ], Z [i ], R [i ]])
1275+ path = Path (folder )
1276+ print (path )
1277+ if not path .exists ():
1278+ print (f"`{ path } ` does not exist. Making the directory for you!" )
1279+ path .mkdir (parents = True , exist_ok = True )
1280+ path /= f"{ filename } _{ iteration } .csv"
1281+ data .write_csv (path )
1282+ # with open(res_name + str(iteration) + ".csv", "w") as f:
1283+ # writer = csv.writer(f)
1284+ # for i in range(X.size):
1285+ # writer.writerow([X[i], Y[i], Z[i], R[i]])
12801286
12811287
12821288def sort_rows (i_orden : np .ndarray ):
0 commit comments