6
6
7
7
import pandas as pd
8
8
import cv2
9
+ import re
9
10
10
11
from dataframes import compute_time_range , trim_into_interval , repair_dropped_frames
11
12
@@ -65,29 +66,32 @@ def extract(input_dir, output_dir):
65
66
# Release the video file
66
67
cap .release ()
67
68
68
-
69
-
70
- def main (input_dir : Path , output_dir : Path ):
71
-
72
- # input_dir = Path("/Users/tbc/Desktop/videos/")
73
- # output_dir = Path("/Users/tbc/Desktop/output_videos/")
74
-
69
+ #
70
+ #
71
+ def scan_session_dir (input_dir : Path ) -> Tuple [List [str ], List [pd .DataFrame ], List [str ]]:
75
72
#
76
- # Find all CSV files in the directory and read it into a data frame (DONE)
73
+ # Find all CSV files in the directory and read it into a data frame
74
+ # Use the following regular expression to check of the client ID is a 16-digit hexadecimal.
75
+ clientIDpattern = "[\\ da-f]" * 16
76
+ patt = re .compile ("^" + clientIDpattern + "$" )
77
77
78
- #
78
+ # Fill this list with the client IDs found n the directory
79
79
clientIDs : List [str ] = []
80
80
for p in input_dir .iterdir ():
81
- print ("Found client -->" , p .stem )
82
- # TODO -- we could also check if the ClientID complies to the numerical format (using regex).
83
- clientIDs .append (p .stem )
84
-
85
- # Will be filled with key=clientID:str, data=Tuple[csv:DataFrame, videofile:str]
86
- clients_data : Dict [str , Tuple [pd .DataFrame , str ]] = dict ()
81
+ # Check if the ClientID complies to the numerical format (using regex).
82
+ res = patt .match (p .stem )
83
+ if res :
84
+ print ("Found client -->" , p .stem )
85
+ clientIDs .append (p .stem )
86
+ else :
87
+ print ("Discarding " , p .stem )
87
88
88
- df_list = []
89
+ #
90
+ # Accumulates the list of dataframes and mp4 files in the same order of the client IDs.
91
+ df_list : List [pd .DataFrame ] = []
92
+ mp4_list : List [str ] = []
89
93
90
- for cID in clientIDs [ 1 :] :
94
+ for cID in clientIDs :
91
95
client_dir = input_dir / cID
92
96
CSVs = list (client_dir .glob ("*.csv" ))
93
97
MP4s = list (client_dir .glob ("*.mp4" ))
@@ -104,40 +108,66 @@ def main(input_dir: Path, output_dir: Path):
104
108
105
109
df : pd .DataFrame = pd .read_csv (csv_file , header = None )
106
110
107
- clients_data [cID ] = (df , str (mp4_file ))
108
111
df_list .append (df )
112
+ mp4_list .append (str (mp4_file ))
109
113
110
- # Define the path to the directory containing the CSV files
111
- # csv_path = "/Users/tbc/Desktop/test_data/"
114
+ return clientIDs , df_list , mp4_list
115
+
116
+ #
117
+ #
118
+ #
119
+ def main (input_dir : Path , output_dir : Path ):
120
+
121
+ print (f"Scanning dir { str (input_dir )} ..." )
122
+ clientIDs , df_list , mp4_list = scan_session_dir (input_dir )
123
+
124
+ n_clients = len (clientIDs )
125
+
126
+
127
+ #
128
+ # Print collected info
129
+ for i in range (n_clients ):
130
+ cID = clientIDs [i ]
131
+ df = df_list [i ]
132
+ mp4 = mp4_list [i ]
133
+ print (f"For client ID { cID } : { len (df )} frames for file { mp4 } " )
112
134
113
135
#
114
136
# Repair CSVs (TODO - Mina)
115
- # repaired_client_data = dict()
116
- # for cID, (df, mp4) in clients_data :
117
- # repaired_df = repair_dropped_frames(df)
118
- # repaired_client_data[cID] = repaired_df, mp4
137
+ repaired_df_list : List [ pd . DataFrame ] = []
138
+ for cID , df in zip ( clientIDs , df_list ) :
139
+ repaired_df = repair_dropped_frames (df )
140
+ repaired_df_list . append ( repaired_df )
119
141
142
+ assert len (clientIDs ) == len (df_list ) == len (mp4_list ) == len (repaired_df_list )
120
143
121
144
#
122
145
# Find time ranges (Saurabh, To test better)
123
146
# Compute the time range
124
- #dfs = [df for k, (df, _) in clients_data]
125
- min_common , max_common = compute_time_range (df_list )
147
+ min_common , max_common = compute_time_range (repaired_df_list )
126
148
127
149
#
128
150
# Trim CSVs (TODO)
129
- # Trim the data frames to the time range and save to new CSV files
130
- csv_path = output_dir / "test"
131
- # TODO -- actually, we don't need to save them. We could just return them as DataFrame instances
132
- trimmed_dataframes = trim_into_interval (df_list , min_common , max_common , THRESHOLD_NS )
133
-
151
+ # Trim the data frames to the time range
152
+ trimmed_dataframes = trim_into_interval (repaired_df_list , min_common , max_common , THRESHOLD_NS )
153
+
154
+ assert len (clientIDs ) == len (trimmed_dataframes ), f"Expected { len (clientIDs )} trimmed dataframes. Found f{ len (trimmed_dataframes )} "
155
+
156
+ client0ID = clientIDs [0 ]
157
+ client0size = len (trimmed_dataframes [0 ])
158
+ print (f"For client { client0ID } : { client0size } frames" )
159
+ for cID , df in zip (clientIDs [1 :], trimmed_dataframes [1 :]):
160
+ dfsize = len (df )
161
+ if client0size != dfsize :
162
+ raise Exception (f"For client { cID } : expecting { client0size } , found { dfsize } " )
163
+
164
+ print ("Good. All trimmed dataframes have the same number of entries." )
134
165
135
166
#
136
167
# Extract the frames from the original videos
137
168
# and rename the file names to the timestamps (DONE)
138
169
# extract(input_dir, output_dir)
139
170
140
-
141
171
#
142
172
# Reconstruct videos (TODO)
143
173
@@ -152,11 +182,11 @@ def main(input_dir: Path, output_dir: Path):
152
182
"with missing/dropped frames inserted as (black) artificial data."
153
183
)
154
184
parser .add_argument (
155
- "--infolder" , type = str , help = "The folder containing the collected videos and CSV files with the timestamps." ,
185
+ "--infolder" , "-i" , type = str , help = "The folder containing the collected videos and CSV files with the timestamps." ,
156
186
required = True
157
187
)
158
188
parser .add_argument (
159
- "--outfolder" , type = str , help = "The folder where the repaired and aligned frames will be stored." ,
189
+ "--outfolder" , "-o" , type = str , help = "The folder where the repaired and aligned frames will be stored." ,
160
190
required = True
161
191
)
162
192
@@ -168,7 +198,7 @@ def main(input_dir: Path, output_dir: Path):
168
198
if not infolder .exists ():
169
199
raise Exception (f"Input folder '{ infolder } ' doesn't exist." )
170
200
171
- if not infolder .exists ():
201
+ if not outfolder .exists ():
172
202
raise Exception (f"Output folder '{ outfolder } ' doesn't exist." )
173
203
174
204
main (infolder , outfolder )
0 commit comments