-
Notifications
You must be signed in to change notification settings - Fork 74
C++ implementation of crop transform #967
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b171309
3248f2f
74fe47f
c77fbd7
0511233
bba2696
4f121cb
07e7f60
67478dd
88bc94a
a15d458
8f4507b
fca8b83
8dfbee9
c1836b8
abb80eb
f819ac1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,9 +12,15 @@ | |
| import torch | ||
| from PIL import Image | ||
|
|
||
| from .utils import sanitize_filtergraph_expression | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that pulling in code from
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we expect to pull in more things from |
||
|
|
||
| # Run this script to update the resources used in unit tests. The resources are all derived | ||
| # from source media already checked into the repo. | ||
|
|
||
| SCRIPT_DIR = Path(__file__).resolve().parent | ||
| TORCHCODEC_PATH = SCRIPT_DIR.parent | ||
| RESOURCES_DIR = TORCHCODEC_PATH / "test" / "resources" | ||
|
|
||
|
|
||
| def convert_image_to_tensor(image_path): | ||
| image_path = Path(image_path) | ||
|
|
@@ -31,7 +37,18 @@ def convert_image_to_tensor(image_path): | |
| image_path.unlink() | ||
|
|
||
|
|
||
| def get_frame_by_index(video_path, frame, output_path, stream): | ||
| def get_frame_by_index(video_path, frame, output_path, stream, filters=None): | ||
| # Note that we have an exlicit format conversion to rgb24 in our filtergraph specification, | ||
| # which always happens BEFORE any of the filters that we receive as input. We do this to | ||
| # ensure that the color conversion happens BEFORE the filters, matching the behavior of the | ||
| # torchcodec filtergraph implementation. | ||
| # | ||
| # Not doing this would result in the color conversion happening AFTER the filters, which | ||
| # would result in different color values for the same frame. | ||
| filtergraph = f"select='eq(n\\,{frame})',format=rgb24" | ||
| if filters is not None: | ||
| filtergraph = filtergraph + f",{filters}" | ||
|
|
||
| cmd = [ | ||
| "ffmpeg", | ||
| "-y", | ||
|
|
@@ -40,11 +57,11 @@ def get_frame_by_index(video_path, frame, output_path, stream): | |
| "-map", | ||
| f"0:{stream}", | ||
| "-vf", | ||
| f"select=eq(n\\,{frame})", | ||
| "-vsync", | ||
| "vfr", | ||
| "-q:v", | ||
| "2", | ||
| filtergraph, | ||
| "-fps_mode", | ||
| "passthrough", | ||
| "-update", | ||
| "1", | ||
| output_path, | ||
| ] | ||
| subprocess.run(cmd, check=True) | ||
|
|
@@ -65,14 +82,9 @@ def get_frame_by_timestamp(video_path, timestamp, output_path): | |
| subprocess.run(cmd, check=True) | ||
|
|
||
|
|
||
| def main(): | ||
| SCRIPT_DIR = Path(__file__).resolve().parent | ||
| TORCHCODEC_PATH = SCRIPT_DIR.parent | ||
| RESOURCES_DIR = TORCHCODEC_PATH / "test" / "resources" | ||
| def generate_nasa_13013_references(): | ||
| VIDEO_PATH = RESOURCES_DIR / "nasa_13013.mp4" | ||
|
|
||
| # Last generated with ffmpeg version 4.3 | ||
| # | ||
| # Note: The naming scheme used here must match the naming scheme used to load | ||
| # tensors in ./utils.py. | ||
| STREAMS = [0, 3] | ||
|
|
@@ -95,6 +107,16 @@ def main(): | |
| get_frame_by_timestamp(VIDEO_PATH, timestamp, output_bmp) | ||
| convert_image_to_tensor(output_bmp) | ||
|
|
||
| # Extract frames with specific filters. We have tests that assume these exact filters. | ||
| FRAMES = [0, 15, 200, 389] | ||
| crop_filter = "crop=300:200:50:35:exact=1" | ||
| for frame in FRAMES: | ||
| output_bmp = f"{VIDEO_PATH}.{sanitize_filtergraph_expression(crop_filter)}.stream3.frame{frame:06d}.bmp" | ||
| get_frame_by_index(VIDEO_PATH, frame, output_bmp, stream=3, filters=crop_filter) | ||
| convert_image_to_tensor(output_bmp) | ||
|
|
||
|
|
||
| def generate_h265_video_references(): | ||
| # This video was generated by running the following: | ||
| # conda install -c conda-forge x265 | ||
| # ./configure --enable-nonfree --enable-gpl --prefix=$(readlink -f ../bin) --enable-libx265 --enable-rpath --extra-ldflags=-Wl,-rpath=$CONDA_PREFIX/lib --enable-filter=drawtext --enable-libfontconfig --enable-libfreetype --enable-libharfbuzz | ||
|
|
@@ -107,6 +129,8 @@ def main(): | |
| get_frame_by_index(VIDEO_PATH, frame, output_bmp, stream=0) | ||
| convert_image_to_tensor(output_bmp) | ||
|
|
||
|
|
||
| def generate_av1_video_references(): | ||
| # This video was generated by running the following: | ||
| # ffmpeg -f lavfi -i testsrc=duration=5:size=640x360:rate=25,format=yuv420p -c:v libaom-av1 -crf 30 -colorspace bt709 -color_primaries bt709 -color_trc bt709 av1_video.mkv | ||
| # Note that this video only has 1 stream, at index 0. | ||
|
|
@@ -119,5 +143,11 @@ def main(): | |
| convert_image_to_tensor(output_bmp) | ||
|
|
||
|
|
||
| def main(): | ||
| generate_nasa_13013_references() | ||
| generate_h265_video_references() | ||
| generate_av1_video_references() | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| main() | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Drive-by: when passing in a height and width, we should only be able to instantiate a
FrameDimsobject with positive values. If we want aFrameDimsobject that has 0 for both values, that's just the default constructor. We should never have aFrameDimsobject with negative values.