@@ -79,22 +79,25 @@ def main(input_dir: Path, output_dir: Path):
79
79
80
80
#
81
81
# Find all CSV files in the directory and read it into a data frame
82
+ # Use the following regular expression to check of the client ID is a 16-digit hexadecimal.
82
83
clientIDpattern = "[\\ da-f]" * 16
83
84
patt = re .compile ("^" + clientIDpattern + "$" )
84
85
86
+ # Fill this list with the client IDs found n the directory
85
87
clientIDs : List [str ] = []
86
88
for p in input_dir .iterdir ():
89
+ # Check if the ClientID complies to the numerical format (using regex).
87
90
res = patt .match (p .stem )
88
91
if res :
89
92
print ("Found client -->" , p .stem )
90
- # TODO -- we could also check if the ClientID complies to the numerical format (using regex).
91
93
clientIDs .append (p .stem )
92
94
else :
93
95
print ("Discarding " , p .stem )
94
96
95
97
n_clients = len (clientIDs )
96
98
97
- # Will accumulate the list of dataframes and mp4 files in the same order of the client IDs.
99
+ #
100
+ # Accumulates the list of dataframes and mp4 files in the same order of the client IDs.
98
101
df_list : List [pd .DataFrame ] = []
99
102
mp4_list : List [str ] = []
100
103
@@ -142,8 +145,7 @@ def main(input_dir: Path, output_dir: Path):
142
145
143
146
#
144
147
# Trim CSVs (TODO)
145
- # Trim the data frames to the time range and save to new CSV files
146
- # TODO -- actually, we don't need to save them. We could just return them as DataFrame instances
148
+ # Trim the data frames to the time range
147
149
trimmed_dataframes = trim_into_interval (repaired_df_list , min_common , max_common , THRESHOLD_NS )
148
150
149
151
assert len (clientIDs ) == len (trimmed_dataframes ), f"Expected { len (clientIDs )} trimmed dataframes. Found f{ len (trimmed_dataframes )} "
0 commit comments