Skip to content

Commit c6be28a

Browse files
authored
Merge pull request #18 from aperture-data/add_video_features
Add video features
2 parents f19cc43 + 9e3b429 commit c6be28a

File tree

4 files changed

+304
-2
lines changed

4 files changed

+304
-2
lines changed

aperturedb/ImageDownloader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ class ImageDownloaderCSV(CSVParser.CSVParser):
2626
def __init__(self, filename, check_image=True):
2727

2828
self.has_filename = False
29-
self.check_image = check_image
29+
self.check_img = check_image
3030

3131
super().__init__(filename)
3232

aperturedb/NotebookHelpers.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@
33
import numpy as np
44

55
from PIL import Image
6+
from IPython.display import Video
67
from IPython.display import display as ds
78

8-
DESTINATION_FOLDER = "result_images"
9+
DESTINATION_FOLDER = "results"
910

1011
def check_folder(folder):
1112
if not os.path.exists(folder):
@@ -53,3 +54,14 @@ def draw_bboxes(image, boxes=[], tags=[], save=False):
5354
check_folder(DESTINATION_FOLDER)
5455
img_file = DESTINATION_FOLDER + '/res_bboxes.jpg'
5556
cv2.imwrite(img_file, cv_image)
57+
58+
def display_video_mp4(blob):
59+
60+
check_folder(DESTINATION_FOLDER)
61+
62+
name = DESTINATION_FOLDER + "/" + "video_tmp.mp4"
63+
fd = open(name, 'wb')
64+
fd.write(blob)
65+
fd.close()
66+
67+
ds(Video(name, embed=True))

aperturedb/VideoDownloader.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
import time
2+
import requests
3+
import os
4+
from os import path
5+
6+
import cv2
7+
import numpy as np
8+
9+
from aperturedb import ParallelLoader
10+
from aperturedb import CSVParser
11+
from aperturedb import ProgressBar
12+
13+
HEADER_PATH = "filename"
14+
HEADER_URL = "url"
15+
16+
class VideoDownloaderCSV(CSVParser.CSVParser):
17+
18+
'''
19+
ApertureDB Video Downloader.
20+
Expects a csv file with AT LEAST a "url" column, and
21+
optionally a "filename" field.
22+
If "filename" is not present, it is taken from the url.
23+
'''
24+
25+
def __init__(self, filename, check_video=True):
26+
27+
self.has_filename = False
28+
self.check_video = check_video
29+
30+
super().__init__(filename)
31+
32+
def __getitem__(self, idx):
33+
34+
url = self.df.loc[idx, HEADER_URL]
35+
36+
if self.has_filename:
37+
filename = self.df.loc[idx, HEADER_PATH]
38+
else:
39+
filename = self.url_to_filename(url)
40+
41+
return url, filename
42+
43+
def url_to_filename(self, url):
44+
45+
filename = url.split("/")[-1]
46+
folder = "/tmp/videos/"
47+
48+
return folder + filename
49+
50+
def validate(self):
51+
52+
self.header = list(self.df.columns.values)
53+
54+
if HEADER_URL not in self.header:
55+
raise Exception("Error with CSV file field: url. Must be a field")
56+
57+
if HEADER_PATH in self.header:
58+
self.has_filename = True
59+
60+
class VideoDownloader(ParallelLoader.ParallelLoader):
61+
62+
def __init__(self, db, dry_run=False):
63+
64+
super().__init__(db, dry_run=dry_run)
65+
66+
self.type = "video"
67+
68+
self.check_video = False
69+
70+
def check_if_video_is_ok(self, filename, url):
71+
72+
if not os.path.exists(filename):
73+
return False
74+
75+
try:
76+
a = cv2.VideoCapture(filename)
77+
if a.isOpened() == False:
78+
print("Video present but error reading it:", url)
79+
return False
80+
except:
81+
print("Video present but error decoding:", url)
82+
return False
83+
84+
return True
85+
86+
def download_video(self, url, filename):
87+
88+
start = time.time()
89+
90+
if self.check_video and self.check_if_video_is_ok(filename, url):
91+
return
92+
93+
folder = os.path.dirname(filename)
94+
if not os.path.exists(folder):
95+
os.makedirs(folder, exist_ok=True)
96+
97+
videodata = requests.get(url)
98+
if videodata.ok:
99+
fd = open(filename, "wb")
100+
fd.write(videodata.content)
101+
fd.close()
102+
103+
try:
104+
a = cv2.VideoCapture(filename)
105+
if a.isOpened() == False:
106+
print("Downloaded Video size error:", url)
107+
os.remove(filename)
108+
self.error_counter += 1
109+
except:
110+
print("Downloaded Video cannot be decoded:", url)
111+
os.remove(filename)
112+
self.error_counter += 1
113+
else:
114+
print("URL not found:", url)
115+
self.error_counter += 1
116+
117+
self.times_arr.append(time.time() - start)
118+
119+
def worker(self, thid, generator, start, end):
120+
121+
if thid == 0 and self.stats:
122+
pb = ProgressBar.ProgressBar("download_progress.txt")
123+
124+
for i in range(start, end):
125+
126+
url, filename = generator[i]
127+
128+
self.download_video(url, filename)
129+
130+
if thid == 0 and self.stats:
131+
pb.update((i - start) / (end - start))
132+
133+
if thid == 0 and self.stats:
134+
pb.update(1)
135+
136+
def print_stats(self):
137+
138+
print("====== ApertureDB VideoDownloader Stats ======")
139+
140+
times = np.array(self.times_arr)
141+
print("Avg Video download time(s):", np.mean(times))
142+
print("Img download time std:", np.std (times))
143+
print("Avg download throughput (videos/s)):",
144+
1 / np.mean(times) * self.numthreads)
145+
146+
print("Total time(s):", self.ingestion_time)
147+
print("Overall throughput (videos/s):",
148+
self.total_elements / self.ingestion_time)
149+
print("Total errors encountered:", self.error_counter)
150+
print("=============================================")

aperturedb/VideoLoader.py

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
import math
2+
import time
3+
from threading import Thread
4+
5+
import numpy as np
6+
import cv2
7+
8+
from aperturedb import Status
9+
from aperturedb import ParallelLoader
10+
from aperturedb import CSVParser
11+
12+
HEADER_PATH = "filename"
13+
PROPERTIES = "properties"
14+
CONSTRAINTS = "constraints"
15+
16+
class VideoGeneratorCSV(CSVParser.CSVParser):
17+
18+
'''
19+
ApertureDB Video Data loader.
20+
Expects a csv file with the following columns:
21+
22+
filename,PROP_NAME_1, ... PROP_NAME_N,constraint_PROP1
23+
24+
Example csv file:
25+
filename,id,label,constaint_id
26+
/home/user/file1.jpg,321423532,dog,321423532
27+
/home/user/file2.jpg,42342522,cat,4234252
28+
...
29+
'''
30+
31+
def __init__(self, filename, check_video=True):
32+
33+
super().__init__(filename)
34+
35+
self.check_video = check_video
36+
37+
self.props_keys = [x for x in self.header[1:] if not x.startswith(CSVParser.CONTRAINTS_PREFIX)]
38+
self.constraints_keys = [x for x in self.header[1:] if x.startswith(CSVParser.CONTRAINTS_PREFIX) ]
39+
40+
def __getitem__(self, idx):
41+
42+
filename = self.df.loc[idx, HEADER_PATH]
43+
data = {}
44+
45+
video_ok, video = self.load_video(filename)
46+
if not video_ok:
47+
print("Error loading video: " + filename )
48+
return data
49+
50+
data["video_blob"] = video
51+
52+
properties = self.parse_properties(self.df, idx)
53+
constraints = self.parse_constraints(self.df, idx)
54+
55+
if properties:
56+
data[PROPERTIES] = properties
57+
58+
if constraints:
59+
data[CONSTRAINTS] = constraints
60+
61+
return data
62+
63+
def load_video(self, filename):
64+
65+
if self.check_video:
66+
try:
67+
a = cv2.VideoCapture(filename)
68+
if a.isOpened() == False:
69+
print("Video reading Error:", filename)
70+
except:
71+
print("Video Error:", filename)
72+
73+
try:
74+
fd = open(filename, "rb")
75+
buff = fd.read()
76+
fd.close()
77+
return True, buff
78+
except:
79+
print("Video Error:", filename)
80+
81+
return False, None
82+
83+
def validate(self):
84+
85+
self.header = list(self.df.columns.values)
86+
87+
if self.header[0] != HEADER_PATH:
88+
raise Exception("Error with CSV file field: filename. Must be first field")
89+
90+
class VideoLoader(ParallelLoader.ParallelLoader):
91+
92+
'''
93+
ApertureDB Video Loader.
94+
95+
This class is to be used in combination with a "generator".
96+
The generator must be an iterable object that generated "image_data"
97+
elements:
98+
image_data = {
99+
"properties": properties,
100+
"constraints": constraints,
101+
"operations": operations,
102+
"video_blob": (bytes),
103+
}
104+
'''
105+
106+
def __init__(self, db, dry_run=False):
107+
108+
super().__init__(db, dry_run=dry_run)
109+
110+
self.type = "video"
111+
112+
def generate_batch(self, video_data):
113+
114+
q = []
115+
blobs = []
116+
117+
for data in video_data:
118+
119+
ai = {
120+
"AddVideo": {
121+
}
122+
}
123+
124+
if "properties" in data:
125+
ai["AddVideo"]["properties"] = data["properties"]
126+
if "constraints" in data:
127+
ai["AddVideo"]["if_not_found"] = data["constraints"]
128+
if "operations" in data:
129+
ai["AddVideo"]["operations"] = data["operations"]
130+
if "format" in data:
131+
ai["AddVideo"]["format"] = data["format"]
132+
133+
if "video_blob" not in data or len(data["video_blob"]) == 0:
134+
print("WARNING: Skipping empty video.")
135+
continue
136+
137+
blobs.append(data["video_blob"])
138+
q.append(ai)
139+
140+
return q, blobs

0 commit comments

Comments
 (0)