Refactor some loops and add progress bars

arunoruto · arunoruto · commit d81ab3463748 · 2024-12-23T02:13:44.000+01:00
diff --git a/pyfracval/CCA.py b/pyfracval/CCA.py
@@ -1,10 +1,12 @@
 import copy
-import csv
+import re
+from datetime import datetime
+from pathlib import Path
 
 import numba
 import numpy as np
 import polars as pl
-from numpy.core.multiarray import ndarray
+from tqdm import tqdm
 
 from .PCA import PCA_subcluster
 
@@ -18,7 +20,8 @@ def CCA_subcluster(
     N_subcl_perc: float,
     ext_case: int,
     tolerance: float = 1e-7,
-) -> tuple[pl.DataFrame, bool, bool]:
+    folder: str = "",
+) -> tuple[pl.DataFrame | None, bool, bool]:
     CCA_OK = True
 
     if N < 50:
@@ -36,11 +39,9 @@ def CCA_subcluster(
         N, N_subcluster, R, DF, kf, tolerance
     )
 
-    print("==============================")
-    print("         PCA DONE!            ")
-    print("==============================")
     if not PCA_OK:
-        return CCA_OK, PCA_OK
+        print("PCA failed")
+        return None, CCA_OK, PCA_OK
 
     I_total = int(n_clusters)
 
@@ -51,17 +52,20 @@ def CCA_subcluster(
 
     iteration = 1
     fill_xnew = 0
+    progress_bar = tqdm(
+        total=np.ceil(np.log2(n_clusters)).astype(int),
+        desc="I_total",
+    )
+    # TODO: Precalculate the I_total beforehand
+    # and iterate over them in a foor loop
     while I_total > 1:
         i_orden = sort_rows(i_orden)
 
         ID_agglom, CCA_OK = generate_CCA_pairs(I_total, i_orden, X, Y, Z, R, DF, kf)
 
         ID_mon = CCA_identify_monomers(i_orden)
 
-        if int(np.mod(I_total, 2)) == 0:
-            number_pairs = int(I_total / 2)
-        else:
-            number_pairs = np.floor(I_total / 2) + 1
+        number_pairs = (I_total + 1) // 2
 
         considered = np.zeros((I_total))
         X_next = np.zeros((N))
@@ -74,6 +78,7 @@ def CCA_subcluster(
 
         other = 0
         i_orden = np.zeros((int(number_pairs), 3))
+        # TODO: why not for loop? More secure!
         while k <= I_total:
             for i in range(ID_agglom[k - 1, :].size):
                 if ID_agglom[k - 1, i] == 1:
@@ -155,35 +160,15 @@ def CCA_subcluster(
                 Z_next[i] = Zn[i - fill_xnew]
                 R_next[i] = Rn[i - fill_xnew]
 
-        if int(np.mod(I_total, 2)) == 0:
-            I_total = int(I_total / 2)
-            print("======================")
-            print(f"{I_total = }")
-            print("======================")
-        else:
-            I_total = int(np.floor(I_total / 2) + 1)
-            print("======================")
-            print(f"{I_total = }")
-            print("======================")
+        I_total = (I_total + 1) // 2
 
         X = X_next
         Y = Y_next
         Z = Z_next
         R = R_next
 
         iteration += 1
-
-    for k in range(N - 1):
-        if np.isnan(X[k]) or np.isnan(Y[k]):
-            CCA_OK = False
-        elif np.isnan(Z[k]) or np.isnan(R[k]):
-            CCA_OK = False
-
-    # save results
-    if not CCA_OK or not PCA_OK:
-        pass
-    else:
-        save_results(X, Y, Z, R, iter)
+        progress_bar.update(1)
 
     result = pl.DataFrame(
         {
@@ -194,6 +179,13 @@ def CCA_subcluster(
         }
     )
 
+    CCA_OK = np.logical_not(np.isnan(result.to_numpy())).any().astype(bool)
+
+    filename = filename_generate(N, DF, kf)
+    # save results
+    if CCA_OK and PCA_OK:
+        save_results(result, iter, filename=filename, folder=folder)
+
     return result, CCA_OK, PCA_OK
 
 
@@ -1265,18 +1257,32 @@ def CCA_sticking_process_v2(
     return X2_new, Y2_new, Z2_new
 
 
+def filename_generate(n: int, df: float, kf: float) -> str:
+    now = datetime.now().strftime("%Y%m%d_%H%M%S")
+    return re.sub(r"[\.\,\s]", "_", f"N{n}-D{df}-K{kf}-{now}")
+
+
 def save_results(
-    X: np.ndarray,
-    Y: np.ndarray,
-    Z: np.ndarray,
-    R: np.ndarray,
+    # X: np.ndarray,
+    # Y: np.ndarray,
+    # Z: np.ndarray,
+    # R: np.ndarray,
+    data: pl.DataFrame,
     iteration: int,
-    res_name: str = "test",
+    folder: str = "",
+    filename: str = "test",
 ):
-    with open(res_name + str(iteration) + ".csv", "w") as f:
-        writer = csv.writer(f)
-        for i in range(X.size):
-            writer.writerow([X[i], Y[i], Z[i], R[i]])
+    path = Path(folder)
+    print(path)
+    if not path.exists():
+        print(f"`{path}` does not exist. Making the directory for you!")
+        path.mkdir(parents=True, exist_ok=True)
+    path /= f"{filename}_{iteration}.csv"
+    data.write_csv(path)
+    # with open(res_name + str(iteration) + ".csv", "w") as f:
+    #     writer = csv.writer(f)
+    #     for i in range(X.size):
+    #         writer.writerow([X[i], Y[i], Z[i], R[i]])
 
 
 def sort_rows(i_orden: np.ndarray):
diff --git a/pyfracval/PCA.py b/pyfracval/PCA.py
@@ -4,7 +4,7 @@
 import numba
 import numpy as np
 import pyvista as pv
-from tqdm import tqdm
+from tqdm import trange
 
 
 def PCA(
@@ -203,7 +203,7 @@ def PCA_subcluster(
 
     i_orden = np.zeros((N_clusters, 3))
     data = np.zeros((N, 4))
-    for i in tqdm(range(1, N_clusters + 1)):
+    for i in trange(1, N_clusters + 1, desc="PCA Loop"):
         number = int(N_subcluster_m[i - 1])
         radius = R[Na - 1 : Na + number - 1]
         mass = np.zeros((number))