-
Notifications
You must be signed in to change notification settings - Fork 83
Add color handling to VideoEncoder GPU #1125
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
cd5f8aa
939240b
b538d13
0480f1f
351a55d
ffcf872
a671f31
a9ad8e0
261549d
f4d777c
da3a6d7
1dc7690
ce86d61
daf2fda
8c2bcee
193d7c9
e83c130
e63f118
5e8745f
d1cbaeb
ef03ad5
40f4c66
e28081f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -383,16 +383,120 @@ std::string CudaDeviceInterface::getDetails() { | |
| // Below are methods exclusive to video encoding: | ||
| // -------------------------------------------------------------------------- | ||
| namespace { | ||
| // RGB to NV12 color conversion matrix for BT.601 limited range. | ||
| // NPP ColorTwist function used below expects the limited range | ||
| // color conversion matrix, and this matches FFmpeg's default behavior. | ||
| const Npp32f defaultLimitedRangeRgbToNv12[3][4] = { | ||
| // Y = 16 + 0.859 * (0.299*R + 0.587*G + 0.114*B) | ||
| {0.257f, 0.504f, 0.098f, 16.0f}, | ||
| // U = -0.148*R - 0.291*G + 0.439*B + 128 (BT.601 coefficients) | ||
| {-0.148f, -0.291f, 0.439f, 128.0f}, | ||
| // V = 0.439*R - 0.368*G - 0.071*B + 128 (BT.601 coefficients) | ||
| {0.439f, -0.368f, -0.071f, 128.0f}}; | ||
| // Note: [RGB -> YUV Color Conversion, limited color range] | ||
| // | ||
| // For context on this subject, first read the note: | ||
| // [YUV -> RGB Color Conversion, color space and color range] | ||
| // https://github.com/meta-pytorch/torchcodec/blob/main/src/torchcodec/_core/CUDACommon.cpp#L63-L65 | ||
| // | ||
| // Lets encode RGB -> YUV in the limited color range for BT.601 color space. | ||
| // In limited range, the [0, 255] range is mapped into [16-235] for Y, and into | ||
| // [16-240] for U,V. | ||
| // To implement, we get the full range conversion matrix as before, then scale: | ||
| // - Y channel: scale by (235-16)/255 = 219/255 | ||
| // - U,V channels: scale by (240-16)/255 = 224/255 | ||
| // https://en.wikipedia.org/wiki/YCbCr#Y%E2%80%B2PbPr_to_Y%E2%80%B2CbCr | ||
| // | ||
| // ```py | ||
| // import torch | ||
| // kr, kg, kb = 0.299, 0.587, 0.114 # BT.601 luma coefficients | ||
| // u_scale = 2 * (1 - kb) | ||
| // v_scale = 2 * (1 - kr) | ||
| // | ||
| // rgb_to_yuv_full = torch.tensor([ | ||
| // [kr, kg, kb], | ||
| // [-kr/u_scale, -kg/u_scale, (1-kb)/u_scale], | ||
| // [(1-kr)/v_scale, -kg/v_scale, -kb/v_scale] | ||
| // ]) | ||
| // | ||
| // full_to_limited_y_scale = 219.0 / 255.0 | ||
| // full_to_limited_uv_scale = 224.0 / 255.0 | ||
| // | ||
| // rgb_to_yuv_limited = rgb_to_yuv_full * torch.tensor([ | ||
| // [full_to_limited_y_scale], | ||
| // [full_to_limited_uv_scale], | ||
| // [full_to_limited_uv_scale] | ||
| // ]) | ||
| // | ||
| // print("RGB->YUV matrix (Limited Range BT.601):") | ||
| // print(rgb_to_yuv_limited) | ||
| // ``` | ||
| // | ||
| // This yields: | ||
| // tensor([[ 0.2568, 0.5041, 0.0979], | ||
| // [-0.1482, -0.2910, 0.4392], | ||
| // [ 0.4392, -0.3678, -0.0714]]) | ||
| // | ||
| // Which matches https://fourcc.org/fccyvrgb.php | ||
| // | ||
| // To perform color conversion in NPP, we are required to provide these color | ||
| // conversion matrices to ColorTwist functions, for example, | ||
| // `nppiRGBToNV12_8u_ColorTwist32f_C3P2R_Ctx`. | ||
| // https://docs.nvidia.com/cuda/npp/image_color_conversion.html | ||
| // | ||
| // These offsets are added in the 4th column of each conversion matrix below. | ||
| // - In limited range, Y is offset by 16 to add the lower margin. | ||
| // - In both color ranges, U,V are offset by 128 to be centered around 0. | ||
| // | ||
| // RGB to YUV conversion matrices to use in NPP color conversion functions | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the note, it will be super useful if we ever need to go back to this in the future. |
||
| struct ColorConversionMatrices { | ||
| static constexpr Npp32f BT601_LIMITED[3][4] = { | ||
| {0.2568f, 0.5041f, 0.0979f, 16.0f}, | ||
| {-0.1482f, -0.2910f, 0.4392f, 128.0f}, | ||
| {0.4392f, -0.3678f, -0.0714f, 128.0f}}; | ||
|
|
||
| static constexpr Npp32f BT601_FULL[3][4] = { | ||
| {0.2990f, 0.5870f, 0.1140f, 0.0f}, | ||
| {-0.1687f, -0.3313f, 0.5000f, 128.0f}, | ||
| {0.5000f, -0.4187f, -0.0813f, 128.0f}}; | ||
|
|
||
| static constexpr Npp32f BT709_LIMITED[3][4] = { | ||
| {0.1826f, 0.6142f, 0.0620f, 16.0f}, | ||
| {-0.1006f, -0.3386f, 0.4392f, 128.0f}, | ||
| {0.4392f, -0.3989f, -0.0403f, 128.0f}}; | ||
|
|
||
| static constexpr Npp32f BT709_FULL[3][4] = { | ||
| {0.2126f, 0.7152f, 0.0722f, 0.0f}, | ||
| {-0.1146f, -0.3854f, 0.5000f, 128.0f}, | ||
| {0.5000f, -0.4542f, -0.0458f, 128.0f}}; | ||
|
|
||
| static constexpr Npp32f BT2020_LIMITED[3][4] = { | ||
| {0.2256f, 0.5823f, 0.0509f, 16.0f}, | ||
| {-0.1227f, -0.3166f, 0.4392f, 128.0f}, | ||
| {0.4392f, -0.4039f, -0.0353f, 128.0f}}; | ||
|
|
||
| static constexpr Npp32f BT2020_FULL[3][4] = { | ||
| {0.2627f, 0.6780f, 0.0593f, 0.0f}, | ||
| {-0.139630f, -0.360370f, 0.5000f, 128.0f}, | ||
| {0.5000f, -0.459786f, -0.040214f, 128.0f}}; | ||
| }; | ||
NicolasHug marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| // Returns conversion matrix based on codec context color space and range | ||
| const Npp32f (*getConversionMatrix(AVCodecContext* codecContext))[4] { | ||
| if (codecContext->color_range == AVCOL_RANGE_MPEG || // limited range | ||
| codecContext->color_range == AVCOL_RANGE_UNSPECIFIED) { | ||
| if (codecContext->colorspace == AVCOL_SPC_BT470BG) { | ||
| return ColorConversionMatrices::BT601_LIMITED; | ||
| } else if (codecContext->colorspace == AVCOL_SPC_BT709) { | ||
| return ColorConversionMatrices::BT709_LIMITED; | ||
| } else if (codecContext->colorspace == AVCOL_SPC_BT2020_NCL) { | ||
| return ColorConversionMatrices::BT2020_LIMITED; | ||
| } else { // default to BT.601 | ||
| return ColorConversionMatrices::BT601_LIMITED; | ||
| } | ||
| } else if (codecContext->color_range == AVCOL_RANGE_JPEG) { // full range | ||
| if (codecContext->colorspace == AVCOL_SPC_BT470BG) { | ||
| return ColorConversionMatrices::BT601_FULL; | ||
| } else if (codecContext->colorspace == AVCOL_SPC_BT709) { | ||
| return ColorConversionMatrices::BT709_FULL; | ||
| } else if (codecContext->colorspace == AVCOL_SPC_BT2020_NCL) { | ||
| return ColorConversionMatrices::BT2020_FULL; | ||
| } else { // default to BT.601 | ||
| return ColorConversionMatrices::BT601_FULL; | ||
| } | ||
| } | ||
| return ColorConversionMatrices::BT601_LIMITED; | ||
| } | ||
| } // namespace | ||
|
|
||
| UniqueAVFrame CudaDeviceInterface::convertCUDATensorToAVFrameForEncoding( | ||
|
|
@@ -437,26 +541,26 @@ UniqueAVFrame CudaDeviceInterface::convertCUDATensorToAVFrameForEncoding( | |
| torch::Tensor hwcFrame = tensor.permute({1, 2, 0}).contiguous(); | ||
|
|
||
| NppiSize oSizeROI = {width, height}; | ||
| NppStatus status = nppiRGBToNV12_8u_ColorTwist32f_C3P2R_Ctx( | ||
| NppStatus status; | ||
| // Convert to NV12, as CUDA_ENCODING_PIXEL_FORMAT is always NV12 currently | ||
| status = nppiRGBToNV12_8u_ColorTwist32f_C3P2R_Ctx( | ||
| static_cast<const Npp8u*>(hwcFrame.data_ptr()), | ||
| validateInt64ToInt( | ||
| hwcFrame.stride(0) * hwcFrame.element_size(), "nSrcStep"), | ||
| hwcFrame.stride(0) * hwcFrame.element_size(), | ||
| avFrame->data, | ||
| avFrame->linesize, | ||
| oSizeROI, | ||
| defaultLimitedRangeRgbToNv12, | ||
| getConversionMatrix(codecContext), | ||
| *nppCtx_); | ||
|
|
||
| TORCH_CHECK( | ||
| status == NPP_SUCCESS, | ||
| "Failed to convert RGB to NV12: NPP error code ", | ||
| "Failed to convert RGB to ", | ||
| av_get_pix_fmt_name(DeviceInterface::CUDA_ENCODING_PIXEL_FORMAT), | ||
| ": NPP error code ", | ||
| status); | ||
|
|
||
| // TODO-VideoEncoder: Enable configuration of color properties, similar to | ||
| // FFmpeg. Below are the default color properties used by FFmpeg. | ||
| avFrame->colorspace = AVCOL_SPC_SMPTE170M; // BT.601 | ||
| avFrame->color_range = AVCOL_RANGE_MPEG; // Limited range | ||
|
|
||
| avFrame->colorspace = codecContext->colorspace; | ||
| avFrame->color_range = codecContext->color_range; | ||
| return avFrame; | ||
| } | ||
|
|
||
|
|
@@ -474,9 +578,7 @@ void CudaDeviceInterface::setupHardwareFrameContextForEncoding( | |
| hwFramesCtxRef != nullptr, | ||
| "Failed to allocate hardware frames context for codec"); | ||
|
|
||
| // TODO-VideoEncoder: Enable user set pixel formats to be set | ||
| // (outPixelFormat_) and handled with the appropriate NPP function | ||
| codecContext->sw_pix_fmt = AV_PIX_FMT_NV12; | ||
| codecContext->sw_pix_fmt = DeviceInterface::CUDA_ENCODING_PIXEL_FORMAT; | ||
| // Always set pixel format to support CUDA encoding. | ||
| codecContext->pix_fmt = AV_PIX_FMT_CUDA; | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -782,23 +782,30 @@ void VideoEncoder::initializeEncoder( | |
| outHeight_ = inHeight_; | ||
|
|
||
| if (videoStreamOptions.pixelFormat.has_value()) { | ||
| // TODO-VideoEncoder: Enable pixel formats to be set by user | ||
| // and handled with the appropriate NPP function on GPU. | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I moved this TODO from The behavior is unchanged: If
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are we raising an error for
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Essentially yes. Because we do not understand the codec's behavior yet, we do not want the user to set or expect a pixel format. |
||
| if (frames_.device().is_cuda()) { | ||
| TORCH_CHECK( | ||
| false, | ||
| "GPU Video encoding currently only supports the NV12 pixel format. " | ||
| "Do not set pixel_format to use NV12."); | ||
| "Video encoding on GPU currently only supports the nv12 pixel format. " | ||
| "Do not set pixel_format to use nv12 by default."); | ||
| } | ||
| outPixelFormat_ = | ||
| validatePixelFormat(*avCodec, videoStreamOptions.pixelFormat.value()); | ||
| } else { | ||
| const AVPixelFormat* formats = getSupportedPixelFormats(*avCodec); | ||
| // Use first listed pixel format as default (often yuv420p). | ||
| // This is similar to FFmpeg's logic: | ||
| // https://www.ffmpeg.org/doxygen/4.0/decode_8c_source.html#l01087 | ||
| // If pixel formats are undefined for some reason, try yuv420p | ||
| outPixelFormat_ = (formats && formats[0] != AV_PIX_FMT_NONE) | ||
| ? formats[0] | ||
| : AV_PIX_FMT_YUV420P; | ||
| if (frames_.device().is_cuda()) { | ||
| // Default to nv12 pixel format when encoding on GPU. | ||
| outPixelFormat_ = DeviceInterface::CUDA_ENCODING_PIXEL_FORMAT; | ||
| } else { | ||
| const AVPixelFormat* formats = getSupportedPixelFormats(*avCodec); | ||
| // Use first listed pixel format as default (often yuv420p). | ||
| // This is similar to FFmpeg's logic: | ||
| // https://www.ffmpeg.org/doxygen/4.0/decode_8c_source.html#l01087 | ||
| // If pixel formats are undefined for some reason, try yuv420p | ||
| outPixelFormat_ = (formats && formats[0] != AV_PIX_FMT_NONE) | ||
| ? formats[0] | ||
| : AV_PIX_FMT_YUV420P; | ||
| } | ||
| } | ||
|
|
||
| // Configure codec parameters | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.