diff --git a/av/sidedata/encparams.pxd b/av/sidedata/encparams.pxd new file mode 100644 index 000000000..031b59a42 --- /dev/null +++ b/av/sidedata/encparams.pxd @@ -0,0 +1,11 @@ +cimport libav as lib + +from av.sidedata.sidedata cimport SideData + + +cdef class VideoEncParams(SideData): + pass + + +cdef class VideoBlockParams: + cdef lib.AVVideoBlockParams *ptr diff --git a/av/sidedata/encparams.pyi b/av/sidedata/encparams.pyi new file mode 100644 index 000000000..de962ba51 --- /dev/null +++ b/av/sidedata/encparams.pyi @@ -0,0 +1,27 @@ +from enum import IntEnum +from typing import Any, cast + +import numpy as np + +class VideoEncParamsType(IntEnum): + NONE = cast(int, ...) + VP9 = cast(int, ...) + H264 = cast(int, ...) + MPEG2 = cast(int, ...) + +class VideoEncParams: + nb_blocks: int + blocks_offset: int + block_size: int + codec_type: VideoEncParamsType + qp: int + delta_qp: int + def block_params(self, idx: int) -> VideoBlockParams: ... + def qp_map(self) -> np.ndarray[Any, Any]: ... + +class VideoBlockParams: + src_x: int + src_y: int + w: int + h: int + delta_qp: int diff --git a/av/sidedata/encparams.pyx b/av/sidedata/encparams.pyx new file mode 100644 index 000000000..380dc53d4 --- /dev/null +++ b/av/sidedata/encparams.pyx @@ -0,0 +1,165 @@ +cimport libav as lib +from libc.stdint cimport int32_t, uint8_t + +from enum import IntEnum + +VideoEncParamsType = IntEnum( + "AVVideoEncParamsType", + { + "NONE": lib.AV_VIDEO_ENC_PARAMS_NONE, + "VP9": lib.AV_VIDEO_ENC_PARAMS_VP9, + "H264": lib.AV_VIDEO_ENC_PARAMS_H264, + "MPEG2": lib.AV_VIDEO_ENC_PARAMS_MPEG2, + }, +) + +cdef class VideoEncParams(SideData): + def __repr__(self): + return f"" + + @property + def nb_blocks(self): + """ + Number of blocks in the array + May be 0, in which case no per-block information is present. In this case + the values of blocks_offset / block_size are unspecified and should not + be accessed. + """ + return ( self.ptr.data).nb_blocks + + @property + def blocks_offset(self): + """ + Offset in bytes from the beginning of this structure at which the array of blocks starts. + """ + return ( self.ptr.data).blocks_offset + + @property + def block_size(self): + """ + Size of each block in bytes. May not match sizeof(AVVideoBlockParams). + """ + return ( self.ptr.data).block_size + + @property + def codec_type(self): + """ + Type of the parameters (the codec they are used with). + """ + cdef lib.AVVideoEncParamsType t = ( self.ptr.data).type + return VideoEncParamsType(t) + + @property + def qp(self): + """ + Base quantisation parameter for the frame. The final quantiser for a + given block in a given plane is obtained from this value, possibly + combined with `delta_qp` and the per-block delta in a manner + documented for each type. + """ + return ( self.ptr.data).qp + + @property + def delta_qp(self): + """ + Quantisation parameter offset from the base (per-frame) qp for a given + plane (first index) and AC/DC coefficients (second index). + """ + cdef lib.AVVideoEncParams *p = self.ptr.data + return [[p.delta_qp[i][j] for j in range(2)] for i in range(4)] + + def block_params(self, idx): + """ + Get the encoding parameters for a given block + """ + # Validate given index + if idx < 0 or idx >= self.nb_blocks: + raise ValueError("Expected idx in range [0, nb_blocks)") + + return VideoBlockParams(self, idx) + + def qp_map(self): + """ + Convenience method that creates a 2-D map with the quantization parameters per macroblock. + Only for MPEG2 and H264 encoded videos. + """ + import numpy as np + + cdef int mb_h = (self.frame.ptr.height + 15) // 16 + cdef int mb_w = (self.frame.ptr.width + 15) // 16 + cdef int nb_mb = mb_h * mb_w + cdef int block_idx + cdef int y + cdef int x + cdef VideoBlockParams block + + # Validate number of blocks + if self.nb_blocks != nb_mb: + raise RuntimeError("Expected frame size to match number of blocks in side data") + + # Validate type + cdef lib.AVVideoEncParamsType type = ( self.ptr.data).type + if type != lib.AVVideoEncParamsType.AV_VIDEO_ENC_PARAMS_MPEG2 and type != lib.AVVideoEncParamsType.AV_VIDEO_ENC_PARAMS_H264: + raise ValueError("Expected MPEG2 or H264") + + # Create a 2-D map with the number of macroblocks + cdef int32_t[:, ::1] map = np.empty((mb_h, mb_w), dtype=np.int32) + + # Fill map with quantization parameter per macroblock + for block_idx in range(nb_mb): + block = VideoBlockParams(self, block_idx) + y = block.src_y // 16 + x = block.src_x // 16 + map[y, x] = self.qp + block.delta_qp + + return np.asarray(map) + + +cdef class VideoBlockParams: + def __init__(self, VideoEncParams video_enc_params, int idx) -> None: + cdef uint8_t* base = video_enc_params.ptr.data + cdef Py_ssize_t offset = video_enc_params.blocks_offset + idx * video_enc_params.block_size + self.ptr = (base + offset) + + def __repr__(self): + return f"" + + @property + def src_x(self): + """ + Horizontal distance in luma pixels from the top-left corner of the visible frame + to the top-left corner of the block. + Can be negative if top/right padding is present on the coded frame. + """ + return self.ptr.src_x + + @property + def src_y(self): + """ + Vertical distance in luma pixels from the top-left corner of the visible frame + to the top-left corner of the block. + Can be negative if top/right padding is present on the coded frame. + """ + return self.ptr.src_y + + @property + def w(self): + """ + Width of the block in luma pixels + """ + return self.ptr.w + + @property + def h(self): + """ + Height of the block in luma pixels + """ + return self.ptr.h + + @property + def delta_qp(self): + """ + Difference between this block's final quantization parameter and the + corresponding per-frame value. + """ + return self.ptr.delta_qp diff --git a/av/sidedata/sidedata.pyx b/av/sidedata/sidedata.pyx index 65b1387f0..96c3d72f5 100644 --- a/av/sidedata/sidedata.pyx +++ b/av/sidedata/sidedata.pyx @@ -3,6 +3,7 @@ from libc.stdint cimport int32_t from collections.abc import Mapping from enum import Enum +from av.sidedata.encparams import VideoEncParams from av.sidedata.motionvectors import MotionVectors @@ -49,6 +50,8 @@ class Type(Enum): cdef SideData wrap_side_data(Frame frame, int index): if frame.ptr.side_data[index].type == lib.AV_FRAME_DATA_MOTION_VECTORS: return MotionVectors(_cinit_bypass_sentinel, frame, index) + elif frame.ptr.side_data[index].type == lib.AV_FRAME_DATA_VIDEO_ENC_PARAMS: + return VideoEncParams(_cinit_bypass_sentinel, frame, index) else: return SideData(_cinit_bypass_sentinel, frame, index) diff --git a/include/libav.pxd b/include/libav.pxd index 568913208..5e074a1b8 100644 --- a/include/libav.pxd +++ b/include/libav.pxd @@ -7,6 +7,7 @@ include "libavutil/frame.pxd" include "libavutil/hwcontext.pxd" include "libavutil/samplefmt.pxd" include "libavutil/motion_vector.pxd" +include "libavutil/video_enc_params.pxd" include "libavcodec/avcodec.pxd" include "libavcodec/bsf.pxd" diff --git a/include/libavutil/video_enc_params.pxd b/include/libavutil/video_enc_params.pxd new file mode 100644 index 000000000..aec452666 --- /dev/null +++ b/include/libavutil/video_enc_params.pxd @@ -0,0 +1,25 @@ +from libc.stdint cimport uint32_t, int32_t +from libc.stddef cimport size_t + + +cdef extern from "libavutil/video_enc_params.h" nogil: + cdef enum AVVideoEncParamsType: + AV_VIDEO_ENC_PARAMS_NONE + AV_VIDEO_ENC_PARAMS_VP9 + AV_VIDEO_ENC_PARAMS_H264 + AV_VIDEO_ENC_PARAMS_MPEG2 + + cdef struct AVVideoEncParams: + uint32_t nb_blocks + size_t blocks_offset + size_t block_size + AVVideoEncParamsType type + int32_t qp + int32_t delta_qp[4][2] + + cdef struct AVVideoBlockParams: + int32_t src_x + int32_t src_y + int32_t w + int32_t h + int32_t delta_qp \ No newline at end of file diff --git a/tests/test_decode.py b/tests/test_decode.py index 7ad722bfd..685b743f5 100644 --- a/tests/test_decode.py +++ b/tests/test_decode.py @@ -2,6 +2,7 @@ import os import pathlib from fractions import Fraction +from typing import cast import numpy as np import pytest @@ -138,6 +139,32 @@ def test_decoded_motion_vectors_no_flag(self) -> None: assert vectors is None return + def test_decoded_video_enc_params(self) -> None: + container = av.open(fate_suite("h264/interlaced_crop.mp4")) + stream = container.streams.video[0] + stream.codec_context.options = {"export_side_data": "venc_params"} + + for frame in container.decode(stream): + video_enc_params = cast( + av.sidedata.encparams.VideoEncParams, + frame.side_data.get("VIDEO_ENC_PARAMS"), + ) + assert video_enc_params is not None + assert video_enc_params.nb_blocks == 40 * 24 + + first_block = video_enc_params.block_params(0) + assert video_enc_params.qp + first_block.delta_qp == 29 + return + + def test_decoded_video_enc_params_no_flag(self) -> None: + container = av.open(fate_suite("h264/interlaced_crop.mp4")) + stream = container.streams.video[0] + # When no additional flag is given, there should be no side data with the video encoding params + + for frame in container.decode(stream): + video_enc_params = frame.side_data.get("VIDEO_ENC_PARAMS") + assert video_enc_params is None + def test_decode_video_corrupt(self) -> None: # write an empty file path = self.sandboxed("empty.h264")