6
6
7
7
import pandas as pd
8
8
import cv2
9
+ import re
9
10
10
11
from dataframes import compute_time_range , trim_into_interval , repair_dropped_frames
11
12
@@ -66,28 +67,38 @@ def extract(input_dir, output_dir):
66
67
cap .release ()
67
68
68
69
69
-
70
+ #
71
+ #
72
+ #
70
73
def main (input_dir : Path , output_dir : Path ):
71
74
72
75
# input_dir = Path("/Users/tbc/Desktop/videos/")
73
76
# output_dir = Path("/Users/tbc/Desktop/output_videos/")
74
77
75
- #
76
- # Find all CSV files in the directory and read it into a data frame (DONE)
78
+ print (f"Scanning dir { str (input_dir )} ..." )
77
79
78
80
#
81
+ # Find all CSV files in the directory and read it into a data frame
82
+ clientIDpattern = "[\\ da-f]" * 16
83
+ patt = re .compile ("^" + clientIDpattern + "$" )
84
+
79
85
clientIDs : List [str ] = []
80
86
for p in input_dir .iterdir ():
81
- print ("Found client -->" , p .stem )
82
- # TODO -- we could also check if the ClientID complies to the numerical format (using regex).
83
- clientIDs .append (p .stem )
87
+ res = patt .match (p .stem )
88
+ if res :
89
+ print ("Found client -->" , p .stem )
90
+ # TODO -- we could also check if the ClientID complies to the numerical format (using regex).
91
+ clientIDs .append (p .stem )
92
+ else :
93
+ print ("Discarding " , p .stem )
84
94
85
- # Will be filled with key=clientID:str, data=Tuple[csv:DataFrame, videofile:str]
86
- clients_data : Dict [str , Tuple [pd .DataFrame , str ]] = dict ()
95
+ n_clients = len (clientIDs )
87
96
88
- df_list = []
97
+ # Will accumulate the list of dataframes and mp4 files in the same order of the client IDs.
98
+ df_list : List [pd .DataFrame ] = []
99
+ mp4_list : List [str ] = []
89
100
90
- for cID in clientIDs [ 1 :] :
101
+ for cID in clientIDs :
91
102
client_dir = input_dir / cID
92
103
CSVs = list (client_dir .glob ("*.csv" ))
93
104
MP4s = list (client_dir .glob ("*.mp4" ))
@@ -104,40 +115,54 @@ def main(input_dir: Path, output_dir: Path):
104
115
105
116
df : pd .DataFrame = pd .read_csv (csv_file , header = None )
106
117
107
- clients_data [cID ] = (df , str (mp4_file ))
108
118
df_list .append (df )
119
+ mp4_list .append (str (mp4_file ))
109
120
110
- # Define the path to the directory containing the CSV files
111
- # csv_path = "/Users/tbc/Desktop/test_data/"
121
+ #
122
+ # Print collected info
123
+ for i in range (n_clients ):
124
+ cID = clientIDs [i ]
125
+ df = df_list [i ]
126
+ mp4 = mp4_list [i ]
127
+ print (f"For client ID { cID } : { len (df )} frames for file { mp4 } " )
112
128
113
129
#
114
130
# Repair CSVs (TODO - Mina)
115
- # repaired_client_data = dict()
116
- # for cID, (df, mp4) in clients_data :
117
- # repaired_df = repair_dropped_frames(df)
118
- # repaired_client_data[cID] = repaired_df, mp4
131
+ repaired_df_list : List [ pd . DataFrame ] = []
132
+ for cID , df in zip ( clientIDs , df_list ) :
133
+ repaired_df = repair_dropped_frames (df )
134
+ repaired_df_list . append ( repaired_df )
119
135
136
+ assert len (clientIDs ) == len (df_list ) == len (mp4_list ) == len (repaired_df_list )
120
137
121
138
#
122
139
# Find time ranges (Saurabh, To test better)
123
140
# Compute the time range
124
- #dfs = [df for k, (df, _) in clients_data]
125
- min_common , max_common = compute_time_range (df_list )
141
+ min_common , max_common = compute_time_range (repaired_df_list )
126
142
127
143
#
128
144
# Trim CSVs (TODO)
129
145
# Trim the data frames to the time range and save to new CSV files
130
- csv_path = output_dir / "test"
131
146
# TODO -- actually, we don't need to save them. We could just return them as DataFrame instances
132
- trimmed_dataframes = trim_into_interval (df_list , min_common , max_common , THRESHOLD_NS )
133
-
147
+ trimmed_dataframes = trim_into_interval (repaired_df_list , min_common , max_common , THRESHOLD_NS )
148
+
149
+ assert len (clientIDs ) == len (trimmed_dataframes ), f"Expected { len (clientIDs )} trimmed dataframes. Found f{ len (trimmed_dataframes )} "
150
+
151
+ client0ID = clientIDs [0 ]
152
+ client0size = len (trimmed_dataframes [0 ])
153
+ print (f"For client { client0ID } : { client0size } frames" )
154
+ for cID , df in zip (clientIDs [1 :], trimmed_dataframes [1 :]):
155
+ dfsize = len (df )
156
+ if client0size != dfsize :
157
+ raise Exception (f"For client { cID } : expecting { client0size } , found { dfsize } " )
158
+
159
+ print ("Good. All trimmed dataframes have the same number of entries." )
134
160
135
161
#
136
162
# Extract the frames from the original videos
137
163
# and rename the file names to the timestamps (DONE)
138
164
# extract(input_dir, output_dir)
139
165
140
-
141
166
#
142
167
# Reconstruct videos (TODO)
143
168
@@ -152,11 +177,11 @@ def main(input_dir: Path, output_dir: Path):
152
177
"with missing/dropped frames inserted as (black) artificial data."
153
178
)
154
179
parser .add_argument (
155
- "--infolder" , type = str , help = "The folder containing the collected videos and CSV files with the timestamps." ,
180
+ "--infolder" , "-i" , type = str , help = "The folder containing the collected videos and CSV files with the timestamps." ,
156
181
required = True
157
182
)
158
183
parser .add_argument (
159
- "--outfolder" , type = str , help = "The folder where the repaired and aligned frames will be stored." ,
184
+ "--outfolder" , "-o" , type = str , help = "The folder where the repaired and aligned frames will be stored." ,
160
185
required = True
161
186
)
162
187
@@ -168,7 +193,7 @@ def main(input_dir: Path, output_dir: Path):
168
193
if not infolder .exists ():
169
194
raise Exception (f"Input folder '{ infolder } ' doesn't exist." )
170
195
171
- if not infolder .exists ():
196
+ if not outfolder .exists ():
172
197
raise Exception (f"Output folder '{ outfolder } ' doesn't exist." )
173
198
174
199
main (infolder , outfolder )
0 commit comments