|
1 |
| -import pandas as pd |
2 |
| -import glob |
3 |
| -import csv |
4 | 1 | import os
|
5 |
| -import cv2 |
6 |
| - |
| 2 | +import csv |
7 | 3 | import argparse
|
8 |
| - |
9 | 4 | from pathlib import Path
|
| 5 | +from typing import List, Dict, Tuple |
| 6 | + |
| 7 | +import pandas as pd |
| 8 | +import cv2 |
| 9 | + |
| 10 | +from dataframes import compute_time_range, trim_into_interval, repair_dropped_frames |
10 | 11 |
|
11 |
| -from dataframes import compute_time_range, trim_into_interval |
12 | 12 |
|
13 | 13 | THRESHOLD_NS = 10 * 1000 * 1000
|
14 | 14 |
|
@@ -67,36 +67,79 @@ def extract(input_dir, output_dir):
|
67 | 67 |
|
68 | 68 |
|
69 | 69 |
|
70 |
| -def main(): |
| 70 | +def main(input_dir: Path, output_dir: Path): |
71 | 71 |
|
72 |
| - # input_dir = "/Users/tbc/Desktop/videos/" |
73 |
| - # output_dir = "/Users/tbc/Desktop/output_videos/" |
74 |
| - # extract(input_dir, output_dir) |
75 |
| - # exit() |
76 |
| - # Define the path to the directory containing the CSV files |
77 |
| - csv_path = "/Users/tbc/Desktop/test_data/" |
| 72 | + # input_dir = Path("/Users/tbc/Desktop/videos/") |
| 73 | + # output_dir = Path("/Users/tbc/Desktop/output_videos/") |
| 74 | + |
| 75 | + # |
| 76 | + # Find all CSV files in the directory and read it into a data frame (DONE) |
78 | 77 |
|
79 |
| - # Find all CSV files in the directory |
80 |
| - csv_files = glob.glob(csv_path + "*.csv") |
| 78 | + # |
| 79 | + clientIDs: List[str] = [] |
| 80 | + for p in input_dir.iterdir(): |
| 81 | + print("Found client -->", p.stem) |
| 82 | + # TODO -- we could also check if the ClientID complies to the numerical format (using regex). |
| 83 | + clientIDs.append(p.stem) |
| 84 | + |
| 85 | + # Will be filled with key=clientID:str, data=Tuple[csv:DataFrame, videofile:str] |
| 86 | + clients_data: Dict[str, Tuple[pd.DataFrame, str]] = dict() |
| 87 | + |
| 88 | + for cID in clientIDs: |
| 89 | + client_dir = input_dir / cID |
| 90 | + CSVs = list(client_dir.glob("*.csv")) |
| 91 | + MP4s = list(client_dir.glob("*.mp4")) |
| 92 | + |
| 93 | + # |
| 94 | + # Consistency check. Each clientID folder must have exactly 1 CSV and 1 mp4. |
| 95 | + if len(CSVs) != 1: |
| 96 | + raise Exception(f"Expecting 1 CSV file for client {cID}. Found {len(CSVs)}.") |
81 | 97 |
|
82 |
| - # Create an empty list to hold the data frames |
83 |
| - dfs = [] |
| 98 | + if len(MP4s) != 1: |
| 99 | + raise Exception(f"Expecting 1 MP4 file for client {cID}. Found {len(MP4s)}.") |
84 | 100 |
|
85 |
| - # Loop through each CSV file and read it into a data frame |
86 |
| - for file in csv_files: |
87 |
| - df = pd.read_csv(file, header=None) |
88 |
| - dfs.append(df) |
89 |
| - |
90 |
| - # Print total number of dataframes |
91 |
| - print(len(dfs)) |
| 101 | + csv_file = CSVs[0] |
| 102 | + mp4_file = MP4s[1] |
92 | 103 |
|
| 104 | + df: pd.DataFrame = pd.read_csv(csv_file, header=None) |
| 105 | + |
| 106 | + clients_data[cID] = (df, str(mp4_file)) |
| 107 | + |
| 108 | + |
| 109 | + # Define the path to the directory containing the CSV files |
| 110 | + # csv_path = "/Users/tbc/Desktop/test_data/" |
| 111 | + |
| 112 | + # |
| 113 | + # Repair CSVs (TODO - Mina) |
| 114 | + repaired_client_data = dict() |
| 115 | + for cID, (df, mp4) in clients_data: |
| 116 | + repaired_df = repair_dropped_frames(df) |
| 117 | + repaired_client_data[cID] = repaired_df, mp4 |
| 118 | + |
| 119 | + # |
| 120 | + # Find time ranges (Saurabh, To test better) |
93 | 121 | # Compute the time range
|
| 122 | + dfs = [df for k, (df, _) in clients_data] |
94 | 123 | min_common, max_common = compute_time_range(dfs)
|
95 | 124 |
|
| 125 | + # |
| 126 | + # Trim CSVs (TODO) |
96 | 127 | # Trim the data frames to the time range and save to new CSV files
|
| 128 | + csv_path = output_dir / "test" |
| 129 | + # TODO -- actually, we don't need to save them. We could just return them as DataFrame instances |
97 | 130 | trim_into_interval(csv_path, dfs, min_common, max_common)
|
98 | 131 |
|
99 | 132 |
|
| 133 | + # |
| 134 | + # Extract the frames from the original videos |
| 135 | + # and rename the file names to the timestamps (DONE) |
| 136 | + # extract(input_dir, output_dir) |
| 137 | + |
| 138 | + |
| 139 | + # |
| 140 | + # Reconstruct videos (TODO) |
| 141 | + |
| 142 | + |
100 | 143 | #
|
101 | 144 | # MAIN
|
102 | 145 | if __name__ == "__main__":
|
@@ -126,21 +169,4 @@ def main():
|
126 | 169 | if not infolder.exists():
|
127 | 170 | raise Exception(f"Output folder '{outfolder}' doesn't exist.")
|
128 | 171 |
|
129 |
| - # |
130 |
| - # Find all CSV files in the directory and read it into a data frame (DONE) |
131 |
| - |
132 |
| - # |
133 |
| - # Find time ranges (Saurabh, To test better) |
134 |
| - |
135 |
| - # |
136 |
| - # Trim CSVs (TODO) |
137 |
| - |
138 |
| - # |
139 |
| - # Repair CSVs (TODO - Mina) |
140 |
| - |
141 |
| - # |
142 |
| - # Extract the frames from the original videos |
143 |
| - # and rename the file names to the timestamps (DONE) |
144 |
| - |
145 |
| - # |
146 |
| - # Reconstruct videos (TODO) |
| 172 | + main(infolder, outfolder) |
0 commit comments