@@ -18,41 +18,50 @@ def compute_time_step(video_timestamps: pd.DataFrame) -> float:
18
18
"""
19
19
20
20
first_col_name = video_timestamps .columns [0 ]
21
+ # Retrieves the most frequent time different between consecutive lines.
21
22
time_step = (video_timestamps [first_col_name ].diff ()).dropna ().value_counts ().index [0 ]
22
23
23
24
return time_step
24
25
25
26
26
- def repair_dropped_frames (df : pd .DataFrame , time_step : float ) -> pd .DataFrame :
27
-
27
+ def repair_dropped_frames (df : pd .DataFrame , time_step : float ) -> pd .DataFrame :
28
+ # The name of the first column (can be anythign as the original df doesn't have header
28
29
first_col_name = df .columns [0 ]
29
30
31
+ # Forces the type of the timestamps to int64
30
32
df [first_col_name ] = pd .to_datetime (df [first_col_name ]).astype (np .int64 )
33
+ # Retrieves the timestamps into a Serie
31
34
timestamps = df [first_col_name ]
35
+ # Will accumulate the repaired rows
32
36
repaired_rows = []
33
37
34
38
# Check for missing timestamps and generate them
35
39
for i in range (len (timestamps ) - 1 ):
36
40
timestamp = timestamps .iloc [i ]
37
41
next_timestamp = timestamps .iloc [i + 1 ]
38
42
43
+ # The current timestamp is by definition original
39
44
repaired_rows .append ([timestamp , 'Original' ])
40
45
46
+ # If the next timestamp exceeds the expected time step
41
47
if next_timestamp - timestamp > time_step :
42
- missing_timestamps_count = int ((next_timestamp - timestamp )/ time_step ) - 1
48
+ # Estimate the number of missing frames
49
+ missing_timestamps_count = int ((next_timestamp - timestamp ) / time_step ) - 1
50
+ # Estimate a time interval between them (will be very similar to the input time_step
43
51
interval = (next_timestamp - timestamp ) / (missing_timestamps_count + 1 )
44
-
52
+ # Generate the missing lines
45
53
for j in range (1 , missing_timestamps_count + 1 ):
46
54
new_timestamp = (timestamp + j * interval ).astype (np .int64 )
47
55
repaired_rows .append ([new_timestamp , 'Generated' ])
48
56
49
57
# Add the last row
50
58
repaired_rows .append ([timestamps .iloc [- 1 ], 'Original' ])
59
+ # print(len(repaired_rows))
51
60
52
- print (len (repaired_rows ))
53
61
# Create a new DataFrame with repaired rows
54
62
columns = ['timestamp' , 'generated' ]
55
63
output_df = pd .DataFrame (repaired_rows , columns = columns )
64
+ # Forces the output timestamp type to int 64
56
65
output_df ['timestamp' ] = pd .to_datetime (output_df ['timestamp' ]).astype (np .int64 )
57
66
58
67
return output_df
0 commit comments