Merge branch 'post_processing_saurabh' of https://github.com/DFKI-SignLanguage/RecSync-android into post_processing_saurabh

Saurabh Pandey · Saurabh Pandey · commit be7a143decdc · 2023-05-16T12:30:37.000+02:00
diff --git a/PostProcessing/PostProcessVideos.py b/PostProcessing/PostProcessVideos.py
@@ -6,6 +6,7 @@
 
 import pandas as pd
 import cv2
+import re
 
 from dataframes import compute_time_range, trim_into_interval, repair_dropped_frames
 
@@ -65,29 +66,32 @@ def extract(input_dir, output_dir):
             # Release the video file
             cap.release()
 
-
-
-def main(input_dir: Path, output_dir: Path):
-
-    # input_dir = Path("/Users/tbc/Desktop/videos/")
-    # output_dir = Path("/Users/tbc/Desktop/output_videos/")
-
+#
+#
+def scan_session_dir(input_dir: Path) -> Tuple[List[str], List[pd.DataFrame], List[str]]:
     #
-    # Find all CSV files in the directory and read it into a data frame (DONE)
+    # Find all CSV files in the directory and read it into a data frame
+    # Use the following regular expression to check of the client ID is a 16-digit hexadecimal.
+    clientIDpattern = "[\\da-f]" * 16
+    patt = re.compile("^" + clientIDpattern + "$")
 
-    #
+    # Fill this list with the client IDs found n the directory
     clientIDs: List[str] = []
     for p in input_dir.iterdir():
-        print("Found client -->", p.stem)
-        # TODO -- we could also check if the ClientID complies to the numerical format (using regex).
-        clientIDs.append(p.stem)
-
-    # Will be filled with key=clientID:str, data=Tuple[csv:DataFrame, videofile:str]
-    clients_data: Dict[str, Tuple[pd.DataFrame, str]] = dict()
+        # Check if the ClientID complies to the numerical format (using regex).
+        res = patt.match(p.stem)
+        if res:
+            print("Found client -->", p.stem)
+            clientIDs.append(p.stem)
+        else:
+            print("Discarding ", p.stem)
 
-    df_list = []
+    #
+    # Accumulates the list of dataframes and mp4 files in the same order of the client IDs.
+    df_list: List[pd.DataFrame] = []
+    mp4_list: List[str] = []
 
-    for cID in clientIDs[1:]:
+    for cID in clientIDs:
         client_dir = input_dir / cID
         CSVs = list(client_dir.glob("*.csv"))
         MP4s = list(client_dir.glob("*.mp4"))
@@ -104,40 +108,66 @@ def main(input_dir: Path, output_dir: Path):
 
         df: pd.DataFrame = pd.read_csv(csv_file, header=None)
 
-        clients_data[cID] = (df, str(mp4_file))
         df_list.append(df)
+        mp4_list.append(str(mp4_file))
 
-    # Define the path to the directory containing the CSV files
-    # csv_path = "/Users/tbc/Desktop/test_data/"
+    return clientIDs, df_list, mp4_list
+
+#
+#
+#
+def main(input_dir: Path, output_dir: Path):
+
+    print(f"Scanning dir {str(input_dir)}...")
+    clientIDs, df_list, mp4_list = scan_session_dir(input_dir)
+
+    n_clients = len(clientIDs)
+
+
+    #
+    # Print collected info
+    for i in range(n_clients):
+        cID = clientIDs[i]
+        df = df_list[i]
+        mp4 = mp4_list[i]
+        print(f"For client ID {cID}: {len(df)} frames for file {mp4}")
 
     #
     # Repair CSVs (TODO - Mina)
-    # repaired_client_data = dict()
-    # for cID, (df, mp4) in clients_data:
-    #     repaired_df = repair_dropped_frames(df)
-    #     repaired_client_data[cID] = repaired_df, mp4
+    repaired_df_list: List[pd.DataFrame] = []
+    for cID, df in zip(clientIDs, df_list):
+        repaired_df = repair_dropped_frames(df)
+        repaired_df_list.append(repaired_df)
 
+    assert len(clientIDs) == len(df_list) == len(mp4_list) == len(repaired_df_list)
 
     #
     # Find time ranges (Saurabh, To test better)
     # Compute the time range
-    #dfs = [df for k, (df, _) in clients_data] 
-    min_common, max_common = compute_time_range(df_list)
+    min_common, max_common = compute_time_range(repaired_df_list)
 
     #
     # Trim CSVs (TODO)
-    # Trim the data frames to the time range and save to new CSV files
-    csv_path = output_dir / "test"
-    # TODO -- actually, we don't need to save them. We could just return them as DataFrame instances
-    trimmed_dataframes = trim_into_interval(df_list, min_common, max_common, THRESHOLD_NS)
-    
+    # Trim the data frames to the time range
+    trimmed_dataframes = trim_into_interval(repaired_df_list, min_common, max_common, THRESHOLD_NS)
+
+    assert len(clientIDs) == len(trimmed_dataframes), f"Expected {len(clientIDs)} trimmed dataframes. Found f{len(trimmed_dataframes)}"
+
+    client0ID = clientIDs[0]
+    client0size = len(trimmed_dataframes[0])
+    print(f"For client {client0ID}: {client0size} frames")
+    for cID, df in zip(clientIDs[1:], trimmed_dataframes[1:]):
+        dfsize = len(df)
+        if client0size != dfsize:
+            raise Exception(f"For client {cID}: expecting {client0size}, found {dfsize}")
+
+    print("Good. All trimmed dataframes have the same number of entries.")
 
     #
     # Extract the frames from the original videos
     # and rename the file names to the timestamps (DONE)
     # extract(input_dir, output_dir)
 
-
     #
     # Reconstruct videos (TODO)
 
@@ -152,11 +182,11 @@ def main(input_dir: Path, output_dir: Path):
                     "with missing/dropped frames inserted as (black) artificial data."
     )
     parser.add_argument(
-        "--infolder", type=str, help="The folder containing the collected videos and CSV files with the timestamps.",
+        "--infolder", "-i", type=str, help="The folder containing the collected videos and CSV files with the timestamps.",
         required=True
     )
     parser.add_argument(
-        "--outfolder", type=str, help="The folder where the repaired and aligned frames will be stored.",
+        "--outfolder", "-o", type=str, help="The folder where the repaired and aligned frames will be stored.",
         required=True
     )
 
@@ -168,7 +198,7 @@ def main(input_dir: Path, output_dir: Path):
     if not infolder.exists():
         raise Exception(f"Input folder '{infolder}' doesn't exist.")
 
-    if not infolder.exists():
+    if not outfolder.exists():
         raise Exception(f"Output folder '{outfolder}' doesn't exist.")
 
     main(infolder, outfolder)
diff --git a/PostProcessing/dataframes.py b/PostProcessing/dataframes.py
@@ -2,8 +2,9 @@
 
 from typing import Tuple
 
+
 def repair_dropped_frames(df: pd.DataFrame) -> pd.DataFrame:
-    pass
+    return df
 
 def save_dataframes(dataframes, prefix='df'):
     # Generate filenames based on a pattern or numbering scheme
diff --git a/PostProcessing/video.py b/PostProcessing/video.py
@@ -1,3 +1,8 @@
 import cv2
 # or ffmpeg
 
+import pandas as pd
+
+
+def extract_frames(video_file: str, timestamps: pd.DataFrame, output_dir: str):
+    pass
diff --git a/remote_control/requirements.txt b/remote_control/requirements.txt
@@ -13,7 +13,7 @@ itsdangerous==2.1.2
 Jinja2==3.1.2
 MarkupSafe==2.1.1
 multipart==0.2.4
-numpy==1.21.6
+numpy==1.24.3
 opencv-python==4.7.0.72
 packaging==23.1
 pandas==1.3.5