compute and trim function work in progress

Saurabh Pandey · Saurabh Pandey · commit cfd255652bd8 · 2023-05-12T15:15:44.000+02:00
diff --git a/PostProcessing/PostProcessVideos.py b/PostProcessing/PostProcessVideos.py
@@ -85,11 +85,12 @@ def main(input_dir: Path, output_dir: Path):
     # Will be filled with key=clientID:str, data=Tuple[csv:DataFrame, videofile:str]
     clients_data: Dict[str, Tuple[pd.DataFrame, str]] = dict()
 
-    for cID in clientIDs:
+    df_list = []
+
+    for cID in clientIDs[1:]:
         client_dir = input_dir / cID
         CSVs = list(client_dir.glob("*.csv"))
         MP4s = list(client_dir.glob("*.mp4"))
-
         #
         # Consistency check. Each clientID folder must have exactly 1 CSV and 1 mp4.
         if len(CSVs) != 1:
@@ -99,36 +100,37 @@ def main(input_dir: Path, output_dir: Path):
             raise Exception(f"Expecting 1 MP4 file for client {cID}. Found {len(MP4s)}.")
 
         csv_file = CSVs[0]
-        mp4_file = MP4s[1]
+        mp4_file = MP4s[0]
 
         df: pd.DataFrame = pd.read_csv(csv_file, header=None)
 
         clients_data[cID] = (df, str(mp4_file))
-
+        df_list.append(df)
 
     # Define the path to the directory containing the CSV files
     # csv_path = "/Users/tbc/Desktop/test_data/"
 
     #
     # Repair CSVs (TODO - Mina)
-    repaired_client_data = dict()
-    for cID, (df, mp4) in clients_data:
-        repaired_df = repair_dropped_frames(df)
-        repaired_client_data[cID] = repaired_df, mp4
+    # repaired_client_data = dict()
+    # for cID, (df, mp4) in clients_data:
+    #     repaired_df = repair_dropped_frames(df)
+    #     repaired_client_data[cID] = repaired_df, mp4
+
 
     #
     # Find time ranges (Saurabh, To test better)
     # Compute the time range
-    dfs = [df for k, (df, _) in clients_data]
-    min_common, max_common = compute_time_range(dfs)
+    #dfs = [df for k, (df, _) in clients_data] 
+    min_common, max_common = compute_time_range(df_list)
 
     #
     # Trim CSVs (TODO)
     # Trim the data frames to the time range and save to new CSV files
     csv_path = output_dir / "test"
     # TODO -- actually, we don't need to save them. We could just return them as DataFrame instances
-    trim_into_interval(csv_path, dfs, min_common, max_common)
-
+    trimmed_dataframes = trim_into_interval(df_list, min_common, max_common, THRESHOLD_NS)
+    
 
     #
     # Extract the frames from the original videos
diff --git a/PostProcessing/dataframes.py b/PostProcessing/dataframes.py
@@ -4,24 +4,40 @@
 def repair_dropped_frames(df: pd.DataFrame) -> pd.DataFrame:
     pass
 
-
+# Function to find the largest value in the first entry of all dataframes
+def find_largest_first_entry(dfs):
+    largest_value = float('-inf')
+    for df in dfs:
+        first_entry = df.iloc[0, 0]
+        if first_entry > largest_value:
+            largest_value = first_entry
+    return largest_value
+
+# Function to find the smallest value in the last entry of selected dataframes
+def find_smallest_last_entry(dfs):
+    smallest_value = float('inf')
+    for df in dfs:
+        last_entry = df.iloc[-1, 0]
+        if last_entry < smallest_value:
+            smallest_value = last_entry
+    return smallest_value
+
+# Function to find the largest & smallest value in the first and last entry of dataframes
 def compute_time_range(dfs):
     # Find the lowest and highest numbers in all the data frames
-    min_common = max(df.iloc[:,0].min() for df in dfs)
-    max_common = min(df.iloc[:,0].max() for df in dfs)
-
-    # Print the results
-    print(f"The lowest common number is {min_common}")
-    print(f"The highest common number is {max_common}")
-
-    return (min_common, max_common)
-
-
-def trim_into_interval(csv_path, dfs, min_common, max_common):
-    # Trim each data frame to the min_common and max_common interval and save to a new file
-    for i, df in enumerate(dfs):
-        df_trimmed = df[(df.iloc[:,0] >= min_common) & (df.iloc[:,0] <= max_common)]
-        df_trimmed.to_csv(f"{csv_path}trimmed_df_{i+1}.csv", header=False, index=False)
-
-    # Print the results
-    print(f"{len(dfs)} data frames trimmed and saved to {csv_path}")
+    lower_value = find_largest_first_entry(dfs)
+    higher_value = find_smallest_last_entry(dfs)
+
+    # return the results
+    return (lower_value, higher_value)
+
+# Function to trim dataframes based on specified values
+def trim_into_interval(dfs, min_common, max_common, threshold):
+    trimmed_dataframes = []
+    # import pdb;pdb.set_trace()
+    for df in dfs:
+        start = df[(df.iloc[:, 0] >= min_common - threshold) & (df.iloc[:, 0] <= min_common + threshold)]
+        end = df[(df.iloc[:, 0] >= max_common - threshold) & (df.iloc[:, 0] <= max_common + threshold)]
+        trimmed_df = df[(df.iloc[:, 0] >= start.iloc[0, 0]) & (df.iloc[:, 0] <= end.iloc[0, 0])].reset_index(drop=True)
+        trimmed_dataframes.append(trimmed_df)
+    return trimmed_dataframes