Skip to content

Commit be7a143

Browse files
Saurabh PandeySaurabh Pandey
authored andcommitted
Merge branch 'post_processing_saurabh' of https://github.com/DFKI-SignLanguage/RecSync-android into post_processing_saurabh
2 parents 97ea371 + 7a1a2d9 commit be7a143

File tree

4 files changed

+73
-37
lines changed

4 files changed

+73
-37
lines changed

PostProcessing/PostProcessVideos.py

Lines changed: 65 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import pandas as pd
88
import cv2
9+
import re
910

1011
from dataframes import compute_time_range, trim_into_interval, repair_dropped_frames
1112

@@ -65,29 +66,32 @@ def extract(input_dir, output_dir):
6566
# Release the video file
6667
cap.release()
6768

68-
69-
70-
def main(input_dir: Path, output_dir: Path):
71-
72-
# input_dir = Path("/Users/tbc/Desktop/videos/")
73-
# output_dir = Path("/Users/tbc/Desktop/output_videos/")
74-
69+
#
70+
#
71+
def scan_session_dir(input_dir: Path) -> Tuple[List[str], List[pd.DataFrame], List[str]]:
7572
#
76-
# Find all CSV files in the directory and read it into a data frame (DONE)
73+
# Find all CSV files in the directory and read it into a data frame
74+
# Use the following regular expression to check of the client ID is a 16-digit hexadecimal.
75+
clientIDpattern = "[\\da-f]" * 16
76+
patt = re.compile("^" + clientIDpattern + "$")
7777

78-
#
78+
# Fill this list with the client IDs found n the directory
7979
clientIDs: List[str] = []
8080
for p in input_dir.iterdir():
81-
print("Found client -->", p.stem)
82-
# TODO -- we could also check if the ClientID complies to the numerical format (using regex).
83-
clientIDs.append(p.stem)
84-
85-
# Will be filled with key=clientID:str, data=Tuple[csv:DataFrame, videofile:str]
86-
clients_data: Dict[str, Tuple[pd.DataFrame, str]] = dict()
81+
# Check if the ClientID complies to the numerical format (using regex).
82+
res = patt.match(p.stem)
83+
if res:
84+
print("Found client -->", p.stem)
85+
clientIDs.append(p.stem)
86+
else:
87+
print("Discarding ", p.stem)
8788

88-
df_list = []
89+
#
90+
# Accumulates the list of dataframes and mp4 files in the same order of the client IDs.
91+
df_list: List[pd.DataFrame] = []
92+
mp4_list: List[str] = []
8993

90-
for cID in clientIDs[1:]:
94+
for cID in clientIDs:
9195
client_dir = input_dir / cID
9296
CSVs = list(client_dir.glob("*.csv"))
9397
MP4s = list(client_dir.glob("*.mp4"))
@@ -104,40 +108,66 @@ def main(input_dir: Path, output_dir: Path):
104108

105109
df: pd.DataFrame = pd.read_csv(csv_file, header=None)
106110

107-
clients_data[cID] = (df, str(mp4_file))
108111
df_list.append(df)
112+
mp4_list.append(str(mp4_file))
109113

110-
# Define the path to the directory containing the CSV files
111-
# csv_path = "/Users/tbc/Desktop/test_data/"
114+
return clientIDs, df_list, mp4_list
115+
116+
#
117+
#
118+
#
119+
def main(input_dir: Path, output_dir: Path):
120+
121+
print(f"Scanning dir {str(input_dir)}...")
122+
clientIDs, df_list, mp4_list = scan_session_dir(input_dir)
123+
124+
n_clients = len(clientIDs)
125+
126+
127+
#
128+
# Print collected info
129+
for i in range(n_clients):
130+
cID = clientIDs[i]
131+
df = df_list[i]
132+
mp4 = mp4_list[i]
133+
print(f"For client ID {cID}: {len(df)} frames for file {mp4}")
112134

113135
#
114136
# Repair CSVs (TODO - Mina)
115-
# repaired_client_data = dict()
116-
# for cID, (df, mp4) in clients_data:
117-
# repaired_df = repair_dropped_frames(df)
118-
# repaired_client_data[cID] = repaired_df, mp4
137+
repaired_df_list: List[pd.DataFrame] = []
138+
for cID, df in zip(clientIDs, df_list):
139+
repaired_df = repair_dropped_frames(df)
140+
repaired_df_list.append(repaired_df)
119141

142+
assert len(clientIDs) == len(df_list) == len(mp4_list) == len(repaired_df_list)
120143

121144
#
122145
# Find time ranges (Saurabh, To test better)
123146
# Compute the time range
124-
#dfs = [df for k, (df, _) in clients_data]
125-
min_common, max_common = compute_time_range(df_list)
147+
min_common, max_common = compute_time_range(repaired_df_list)
126148

127149
#
128150
# Trim CSVs (TODO)
129-
# Trim the data frames to the time range and save to new CSV files
130-
csv_path = output_dir / "test"
131-
# TODO -- actually, we don't need to save them. We could just return them as DataFrame instances
132-
trimmed_dataframes = trim_into_interval(df_list, min_common, max_common, THRESHOLD_NS)
133-
151+
# Trim the data frames to the time range
152+
trimmed_dataframes = trim_into_interval(repaired_df_list, min_common, max_common, THRESHOLD_NS)
153+
154+
assert len(clientIDs) == len(trimmed_dataframes), f"Expected {len(clientIDs)} trimmed dataframes. Found f{len(trimmed_dataframes)}"
155+
156+
client0ID = clientIDs[0]
157+
client0size = len(trimmed_dataframes[0])
158+
print(f"For client {client0ID}: {client0size} frames")
159+
for cID, df in zip(clientIDs[1:], trimmed_dataframes[1:]):
160+
dfsize = len(df)
161+
if client0size != dfsize:
162+
raise Exception(f"For client {cID}: expecting {client0size}, found {dfsize}")
163+
164+
print("Good. All trimmed dataframes have the same number of entries.")
134165

135166
#
136167
# Extract the frames from the original videos
137168
# and rename the file names to the timestamps (DONE)
138169
# extract(input_dir, output_dir)
139170

140-
141171
#
142172
# Reconstruct videos (TODO)
143173

@@ -152,11 +182,11 @@ def main(input_dir: Path, output_dir: Path):
152182
"with missing/dropped frames inserted as (black) artificial data."
153183
)
154184
parser.add_argument(
155-
"--infolder", type=str, help="The folder containing the collected videos and CSV files with the timestamps.",
185+
"--infolder", "-i", type=str, help="The folder containing the collected videos and CSV files with the timestamps.",
156186
required=True
157187
)
158188
parser.add_argument(
159-
"--outfolder", type=str, help="The folder where the repaired and aligned frames will be stored.",
189+
"--outfolder", "-o", type=str, help="The folder where the repaired and aligned frames will be stored.",
160190
required=True
161191
)
162192

@@ -168,7 +198,7 @@ def main(input_dir: Path, output_dir: Path):
168198
if not infolder.exists():
169199
raise Exception(f"Input folder '{infolder}' doesn't exist.")
170200

171-
if not infolder.exists():
201+
if not outfolder.exists():
172202
raise Exception(f"Output folder '{outfolder}' doesn't exist.")
173203

174204
main(infolder, outfolder)

PostProcessing/dataframes.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22

33
from typing import Tuple
44

5+
56
def repair_dropped_frames(df: pd.DataFrame) -> pd.DataFrame:
6-
pass
7+
return df
78

89
def save_dataframes(dataframes, prefix='df'):
910
# Generate filenames based on a pattern or numbering scheme

PostProcessing/video.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
11
import cv2
22
# or ffmpeg
33

4+
import pandas as pd
5+
6+
7+
def extract_frames(video_file: str, timestamps: pd.DataFrame, output_dir: str):
8+
pass

remote_control/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ itsdangerous==2.1.2
1313
Jinja2==3.1.2
1414
MarkupSafe==2.1.1
1515
multipart==0.2.4
16-
numpy==1.21.6
16+
numpy==1.24.3
1717
opencv-python==4.7.0.72
1818
packaging==23.1
1919
pandas==1.3.5

0 commit comments

Comments
 (0)