Improved main body.

fnunnari · fnunnari · commit 602d408cec0e · 2023-05-12T16:21:13.000+02:00
diff --git a/PostProcessing/PostProcessVideos.py b/PostProcessing/PostProcessVideos.py
@@ -6,6 +6,7 @@
 
 import pandas as pd
 import cv2
+import re
 
 from dataframes import compute_time_range, trim_into_interval, repair_dropped_frames
 
@@ -66,28 +67,38 @@ def extract(input_dir, output_dir):
             cap.release()
 
 
-
+#
+#
+#
 def main(input_dir: Path, output_dir: Path):
 
     # input_dir = Path("/Users/tbc/Desktop/videos/")
     # output_dir = Path("/Users/tbc/Desktop/output_videos/")
 
-    #
-    # Find all CSV files in the directory and read it into a data frame (DONE)
+    print(f"Scanning dir {str(input_dir)}...")
 
     #
+    # Find all CSV files in the directory and read it into a data frame
+    clientIDpattern = "[\\da-f]" * 16
+    patt = re.compile("^" + clientIDpattern + "$")
+
     clientIDs: List[str] = []
     for p in input_dir.iterdir():
-        print("Found client -->", p.stem)
-        # TODO -- we could also check if the ClientID complies to the numerical format (using regex).
-        clientIDs.append(p.stem)
+        res = patt.match(p.stem)
+        if res:
+            print("Found client -->", p.stem)
+            # TODO -- we could also check if the ClientID complies to the numerical format (using regex).
+            clientIDs.append(p.stem)
+        else:
+            print("Discarding ", p.stem)
 
-    # Will be filled with key=clientID:str, data=Tuple[csv:DataFrame, videofile:str]
-    clients_data: Dict[str, Tuple[pd.DataFrame, str]] = dict()
+    n_clients = len(clientIDs)
 
-    df_list = []
+    # Will accumulate the list of dataframes and mp4 files in the same order of the client IDs.
+    df_list: List[pd.DataFrame] = []
+    mp4_list: List[str] = []
 
-    for cID in clientIDs[1:]:
+    for cID in clientIDs:
         client_dir = input_dir / cID
         CSVs = list(client_dir.glob("*.csv"))
         MP4s = list(client_dir.glob("*.mp4"))
@@ -104,40 +115,54 @@ def main(input_dir: Path, output_dir: Path):
 
         df: pd.DataFrame = pd.read_csv(csv_file, header=None)
 
-        clients_data[cID] = (df, str(mp4_file))
         df_list.append(df)
+        mp4_list.append(str(mp4_file))
 
-    # Define the path to the directory containing the CSV files
-    # csv_path = "/Users/tbc/Desktop/test_data/"
+    #
+    # Print collected info
+    for i in range(n_clients):
+        cID = clientIDs[i]
+        df = df_list[i]
+        mp4 = mp4_list[i]
+        print(f"For client ID {cID}: {len(df)} frames for file {mp4}")
 
     #
     # Repair CSVs (TODO - Mina)
-    # repaired_client_data = dict()
-    # for cID, (df, mp4) in clients_data:
-    #     repaired_df = repair_dropped_frames(df)
-    #     repaired_client_data[cID] = repaired_df, mp4
+    repaired_df_list: List[pd.DataFrame] = []
+    for cID, df in zip(clientIDs, df_list):
+        repaired_df = repair_dropped_frames(df)
+        repaired_df_list.append(repaired_df)
 
+    assert len(clientIDs) == len(df_list) == len(mp4_list) == len(repaired_df_list)
 
     #
     # Find time ranges (Saurabh, To test better)
     # Compute the time range
-    #dfs = [df for k, (df, _) in clients_data] 
-    min_common, max_common = compute_time_range(df_list)
+    min_common, max_common = compute_time_range(repaired_df_list)
 
     #
     # Trim CSVs (TODO)
     # Trim the data frames to the time range and save to new CSV files
-    csv_path = output_dir / "test"
     # TODO -- actually, we don't need to save them. We could just return them as DataFrame instances
-    trimmed_dataframes = trim_into_interval(df_list, min_common, max_common, THRESHOLD_NS)
-    
+    trimmed_dataframes = trim_into_interval(repaired_df_list, min_common, max_common, THRESHOLD_NS)
+
+    assert len(clientIDs) == len(trimmed_dataframes), f"Expected {len(clientIDs)} trimmed dataframes. Found f{len(trimmed_dataframes)}"
+
+    client0ID = clientIDs[0]
+    client0size = len(trimmed_dataframes[0])
+    print(f"For client {client0ID}: {client0size} frames")
+    for cID, df in zip(clientIDs[1:], trimmed_dataframes[1:]):
+        dfsize = len(df)
+        if client0size != dfsize:
+            raise Exception(f"For client {cID}: expecting {client0size}, found {dfsize}")
+
+    print("Good. All trimmed dataframes have the same number of entries.")
 
     #
     # Extract the frames from the original videos
     # and rename the file names to the timestamps (DONE)
     # extract(input_dir, output_dir)
 
-
     #
     # Reconstruct videos (TODO)
 
@@ -152,11 +177,11 @@ def main(input_dir: Path, output_dir: Path):
                     "with missing/dropped frames inserted as (black) artificial data."
     )
     parser.add_argument(
-        "--infolder", type=str, help="The folder containing the collected videos and CSV files with the timestamps.",
+        "--infolder", "-i", type=str, help="The folder containing the collected videos and CSV files with the timestamps.",
         required=True
     )
     parser.add_argument(
-        "--outfolder", type=str, help="The folder where the repaired and aligned frames will be stored.",
+        "--outfolder", "-o", type=str, help="The folder where the repaired and aligned frames will be stored.",
         required=True
     )
 
@@ -168,7 +193,7 @@ def main(input_dir: Path, output_dir: Path):
     if not infolder.exists():
         raise Exception(f"Input folder '{infolder}' doesn't exist.")
 
-    if not infolder.exists():
+    if not outfolder.exists():
         raise Exception(f"Output folder '{outfolder}' doesn't exist.")
 
     main(infolder, outfolder)
diff --git a/PostProcessing/dataframes.py b/PostProcessing/dataframes.py
@@ -2,8 +2,9 @@
 
 from typing import Tuple
 
+
 def repair_dropped_frames(df: pd.DataFrame) -> pd.DataFrame:
-    pass
+    return df
 
 
 # Function to find the largest value in the first entry of all dataframes