Skip to content

Commit bff9afd

Browse files
committed
Merge branch 'remotecontrolling' of https://github.com/DFKI-SignLanguage/RecSync-android into remotecontrolling
2 parents 3721aae + 68986dd commit bff9afd

File tree

7 files changed

+623
-6
lines changed

7 files changed

+623
-6
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,7 @@ opencamera-extended-firebase-adminsdk-yv5yz-e33a8ce5c1.json
4242
# OS
4343
.DS_Store
4444

45+
46+
#
47+
# Python stuff
48+
*.pyc
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
import argparse
2+
from pathlib import Path
3+
from typing import List, Tuple
4+
import tempfile
5+
6+
import pandas as pd
7+
import re
8+
9+
from dataframes import compute_time_range, trim_repaired_into_interval
10+
from dataframes import repair_dropped_frames, compute_time_step
11+
12+
from video import extract_frames
13+
from video import rebuild_video
14+
15+
16+
THRESHOLD_NS = 10 * 1000 * 1000 # millis * micros * nanos
17+
18+
19+
def scan_session_dir(input_dir: Path) -> Tuple[List[str], List[pd.DataFrame], List[str]]:
20+
#
21+
# Find all CSV files in the directory and read it into a data frame
22+
# Use the following regular expression to check of the client ID is a 16-digit hexadecimal.
23+
clientIDpattern = "[\\da-f]" * 16
24+
patt = re.compile("^" + clientIDpattern + "$")
25+
26+
# Fill this list with the client IDs found n the directory
27+
clientIDs: List[str] = []
28+
for p in input_dir.iterdir():
29+
# Check if the ClientID complies to the numerical format (using regex).
30+
res = patt.match(p.stem)
31+
if res:
32+
print("Found client -->", p.stem)
33+
clientIDs.append(p.stem)
34+
else:
35+
print("Discarding ", p.stem)
36+
37+
#
38+
# Accumulates the list of dataframes and mp4 files in the same order of the client IDs.
39+
df_list: List[pd.DataFrame] = []
40+
mp4_list: List[str] = []
41+
42+
for cID in clientIDs:
43+
client_dir = input_dir / cID
44+
CSVs = list(client_dir.glob("*.csv"))
45+
MP4s = list(client_dir.glob("*.mp4"))
46+
#
47+
# Consistency check. Each clientID folder must have exactly 1 CSV and 1 mp4.
48+
if len(CSVs) != 1:
49+
raise Exception(f"Expecting 1 CSV file for client {cID}. Found {len(CSVs)}.")
50+
51+
if len(MP4s) != 1:
52+
raise Exception(f"Expecting 1 MP4 file for client {cID}. Found {len(MP4s)}.")
53+
54+
csv_file = CSVs[0]
55+
mp4_file = MP4s[0]
56+
57+
df: pd.DataFrame = pd.read_csv(csv_file, header=None)
58+
59+
df_list.append(df)
60+
mp4_list.append(str(mp4_file))
61+
62+
return clientIDs, df_list, mp4_list
63+
64+
65+
#
66+
#
67+
#
68+
def main(input_dir: Path, output_dir: Path):
69+
70+
print(f"Scanning dir {str(input_dir)}...")
71+
clientIDs, df_list, mp4_list = scan_session_dir(input_dir)
72+
73+
n_clients = len(clientIDs)
74+
75+
76+
#
77+
# Print collected info
78+
for i in range(n_clients):
79+
cID = clientIDs[i]
80+
df = df_list[i]
81+
mp4 = mp4_list[i]
82+
print(f"For client ID {cID}: {len(df)} frames for file {mp4}")
83+
84+
#
85+
# Repair CSVs
86+
repaired_df_list: List[pd.DataFrame] = []
87+
for cID, df in zip(clientIDs, df_list):
88+
time_step = compute_time_step(df)
89+
repaired_df = repair_dropped_frames(df=df, time_step=time_step)
90+
repaired_df_list.append(repaired_df)
91+
92+
assert len(clientIDs) == len(df_list) == len(mp4_list) == len(repaired_df_list)
93+
94+
#
95+
# Trim CSVs
96+
# Find time ranges
97+
min_common, max_common = compute_time_range(repaired_df_list)
98+
# Trim the data frames to the time range
99+
trimmed_dataframes = trim_repaired_into_interval(repaired_df_list, min_common, max_common, THRESHOLD_NS)
100+
101+
assert len(clientIDs) == len(trimmed_dataframes), f"Expected {len(clientIDs)} trimmed dataframes. Found f{len(trimmed_dataframes)}"
102+
103+
# Check that all the resulting dataframes have the same number of rows
104+
client0ID = clientIDs[0]
105+
client0size = len(trimmed_dataframes[0])
106+
print(f"For client {client0ID}: {client0size} frames")
107+
for cID, df in zip(clientIDs[1:], trimmed_dataframes[1:]):
108+
dfsize = len(df)
109+
if client0size != dfsize:
110+
raise Exception(f"For client {cID}: expecting {client0size} frames, found {dfsize}")
111+
112+
print("Good. All trimmed dataframes have the same number of entries.")
113+
114+
#
115+
# Unpack the original videos, and repack them according to repaired and trimmed dataframes.
116+
for i, cID in enumerate(clientIDs):
117+
orig_df = df_list[i]
118+
trimmed_df = trimmed_dataframes[i]
119+
video_file = mp4_list[i]
120+
# Create a temporary directory for frames unpacking
121+
with tempfile.TemporaryDirectory(prefix="RecSyncNG", suffix=cID) as tmp_dir:
122+
# Extract the frames from the original videos
123+
# and rename the file names to the timestamps
124+
print(f"Extracting {len(orig_df)} frames from '{video_file}'...")
125+
extract_frames(video_file=video_file, timestamps_df=orig_df, output_dir=tmp_dir)
126+
127+
# Reconstruct videos
128+
video_out_filepath = output_dir / (cID + ".mp4")
129+
rebuild_video(dir=Path(tmp_dir), frames=trimmed_df, outfile=video_out_filepath)
130+
# And save also the CSV
131+
csv_out_filepath = video_out_filepath.with_suffix(".csv")
132+
trimmed_df.to_csv(path_or_buf=csv_out_filepath, header=True, index=False)
133+
134+
135+
#
136+
# MAIN
137+
if __name__ == "__main__":
138+
139+
parser = argparse.ArgumentParser(
140+
description="Fixes the videos produced by the RecSync recording sessions."
141+
"Output videos will have the same number of frames,"
142+
"with missing/dropped frames inserted as (black) artificial data."
143+
)
144+
parser.add_argument(
145+
"--infolder", "-i", type=str, help="The folder containing the collected videos and CSV files with the timestamps.",
146+
required=True
147+
)
148+
parser.add_argument(
149+
"--outfolder", "-o", type=str, help="The folder where the repaired and aligned frames will be stored.",
150+
required=True
151+
)
152+
153+
args = parser.parse_args()
154+
155+
infolder = Path(args.infolder)
156+
outfolder = Path(args.outfolder)
157+
158+
if not infolder.exists():
159+
raise Exception(f"Input folder '{infolder}' doesn't exist.")
160+
161+
if not outfolder.exists():
162+
raise Exception(f"Output folder '{outfolder}' doesn't exist.")
163+
164+
main(infolder, outfolder)

PostProcessing/README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# RecSync PostProcessing
2+
3+
This utility "repairs" the videos generated by the RecSync NG app:
4+
* Videos are unpacked
5+
* Missing frames are injected as black frames
6+
* Videos are re-packed, and they will all hve the same starting frame and the same number of frames
7+
* Optionally, a frame counter is added
8+
9+
10+
## Installing
11+
12+
Create an environment and install the packages listed in the `requirements.txt`
13+
14+
15+
## Testing
16+
17+
Export an environment variable with the root of your test material and run the `pytest` command.
18+
It is the directory containing the clients subdirs.
19+
20+
```bash
21+
export RECSYNCH_SESSION_DIR=path/to/my/stuff
22+
pytest
23+
```

PostProcessing/dataframes.py

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
import pandas as pd
2+
import numpy as np
3+
4+
from typing import Tuple, List
5+
6+
7+
def compute_time_step(video_timestamps: pd.DataFrame) -> float:
8+
"""
9+
Compute the time steps of a video based on its timestamps.
10+
11+
Parameters:
12+
video_timestamps (pd.DataFrame): A pandas DataFrame containing timestamps of a video.
13+
14+
Returns:
15+
float: The time step of the video time stamps.
16+
"""
17+
18+
first_col_name = video_timestamps.columns[0]
19+
# Retrieves the most frequent time different between consecutive lines.
20+
time_step = (video_timestamps[first_col_name].diff()).dropna().value_counts().index[0]
21+
22+
return time_step
23+
24+
25+
def repair_dropped_frames(df: pd.DataFrame, time_step: float) -> pd.DataFrame:
26+
# The name of the first column (can be anythign as the original df doesn't have header
27+
first_col_name = df.columns[0]
28+
29+
# Forces the type of the timestamps to int64
30+
df[first_col_name] = pd.to_datetime(df[first_col_name]).astype(np.int64)
31+
# Retrieves the timestamps into a Series
32+
timestamps = df[first_col_name]
33+
# Will accumulate the repaired rows
34+
repaired_rows = []
35+
36+
# Check for missing timestamps and generate them
37+
for i in range(len(timestamps) - 1):
38+
timestamp = timestamps.iloc[i]
39+
next_timestamp = timestamps.iloc[i + 1]
40+
41+
# The current timestamp is by definition original
42+
repaired_rows.append([timestamp, 'Original'])
43+
44+
# If the next timestamp exceeds the expected time step
45+
if next_timestamp - timestamp > time_step:
46+
# Estimate the number of missing frames
47+
missing_timestamps_count = round((next_timestamp - timestamp) / time_step) - 1
48+
# Estimate a time interval between them (will be very similar to the input time_step
49+
interval = (next_timestamp - timestamp) / (missing_timestamps_count + 1)
50+
# Generate the missing lines
51+
for j in range(1, missing_timestamps_count + 1):
52+
new_timestamp = (timestamp + j * interval).astype(np.int64)
53+
repaired_rows.append([new_timestamp, 'Generated'])
54+
55+
# Add the last row
56+
repaired_rows.append([timestamps.iloc[-1], 'Original'])
57+
# print(len(repaired_rows))
58+
59+
# Create a new DataFrame with repaired rows
60+
columns = ['timestamp', 'generated']
61+
output_df = pd.DataFrame(repaired_rows, columns=columns)
62+
# Forces the output timestamp type to int 64
63+
output_df['timestamp'] = pd.to_datetime(output_df['timestamp']).astype(np.int64)
64+
65+
return output_df
66+
67+
68+
def save_dataframes(dataframes, prefix='df') -> None:
69+
# Generate filenames based on a pattern or numbering scheme
70+
filenames = [f"{prefix}{i}.csv" for i in range(1, len(dataframes) + 1)]
71+
72+
# Save each DataFrame to a separate file
73+
for i, df in enumerate(dataframes):
74+
filename = filenames[i]
75+
df.to_csv(filename, index=False, header=False)
76+
print("DataFrames saved successfully.")
77+
78+
79+
# Function to find the largest value in the first entry of all dataframes
80+
def find_largest_first_entry(dfs):
81+
largest_value = float('-inf')
82+
for df in dfs:
83+
first_entry = df.iloc[0, 0]
84+
if first_entry > largest_value:
85+
largest_value = first_entry
86+
return largest_value
87+
88+
89+
# Function to find the smallest value in the last entry of selected dataframes
90+
def find_smallest_last_entry(dfs):
91+
smallest_value = float('inf')
92+
for df in dfs:
93+
last_entry = df.iloc[-1, 0]
94+
if last_entry < smallest_value:
95+
smallest_value = last_entry
96+
return smallest_value
97+
98+
99+
# Function to find the largest & smallest value in the first and last entry of dataframes
100+
def compute_time_range(dfs) -> Tuple[int, int]:
101+
# Find the lowest and highest numbers in all the data frames
102+
lower_value = find_largest_first_entry(dfs)
103+
higher_value = find_smallest_last_entry(dfs)
104+
105+
# return the results
106+
return lower_value, higher_value
107+
108+
109+
# Function to trim dataframes based on specified values
110+
def trim_repaired_into_interval(dfs, min_common, max_common, threshold) -> List[pd.DataFrame]:
111+
112+
trimmed_dataframes: List[pd.DataFrame] = []
113+
114+
lo_threshold = min_common - threshold
115+
hi_threshold = max_common + threshold
116+
117+
for df in dfs:
118+
119+
selection_mask = df["timestamp"].between(lo_threshold, hi_threshold, inclusive='both')
120+
trimmed_df = df[selection_mask]
121+
trimmed_dataframes.append(trimmed_df)
122+
123+
return trimmed_dataframes

0 commit comments

Comments
 (0)