@@ -362,4 +362,125 @@ std::string CudaDeviceInterface::getDetails() {
362362 (usingCPUFallback_ ? " CPU fallback." : " NVDEC." );
363363}
364364
365+ // Below are methods for video encoding:
366+ namespace {
367+ // RGB to NV12 color conversion matrix for BT.601 limited range.
368+ // NPP ColorTwist function used below expects the limited range
369+ // color conversion matrix, and this matches FFmpeg's default behavior.
370+ const Npp32f defaultLimitedRangeRgbToNv12[3 ][4 ] = {
371+ // Y = 16 + 0.859 * (0.299*R + 0.587*G + 0.114*B)
372+ {0 .257f , 0 .504f , 0 .098f , 16 .0f },
373+ // U = -0.148*R - 0.291*G + 0.439*B + 128 (BT.601 coefficients)
374+ {-0 .148f , -0 .291f , 0 .439f , 128 .0f },
375+ // V = 0.439*R - 0.368*G - 0.071*B + 128 (BT.601 coefficients)
376+ {0 .439f , -0 .368f , -0 .071f , 128 .0f }};
377+ } // namespace
378+
379+ std::optional<UniqueAVFrame> CudaDeviceInterface::convertTensorToAVFrame (
380+ const torch::Tensor& tensor,
381+ [[maybe_unused]] AVPixelFormat targetFormat,
382+ int frameIndex,
383+ AVCodecContext* codecContext) {
384+ TORCH_CHECK (
385+ tensor.dim () == 3 && tensor.size (0 ) == 3 ,
386+ " Expected 3D RGB tensor (CHW format), got shape: " ,
387+ tensor.sizes ());
388+
389+ UniqueAVFrame avFrame (av_frame_alloc ());
390+ TORCH_CHECK (avFrame != nullptr , " Failed to allocate AVFrame" );
391+ int height = static_cast <int >(tensor.size (1 ));
392+ int width = static_cast <int >(tensor.size (2 ));
393+
394+ // TODO-VideoEncoder: Unify AVFrame creation with CPU version of this method
395+ avFrame->format = AV_PIX_FMT_CUDA;
396+ avFrame->height = height;
397+ avFrame->width = width;
398+ avFrame->pts = frameIndex;
399+
400+ // FFmpeg's av_hwframe_get_buffer is used to allocate memory on CUDA device.
401+ // TODO-VideoEncoder: Consider using pytorch to allocate CUDA memory for
402+ // efficiency
403+ int ret =
404+ av_hwframe_get_buffer (codecContext->hw_frames_ctx , avFrame.get (), 0 );
405+ TORCH_CHECK (
406+ ret >= 0 ,
407+ " Failed to allocate hardware frame: " ,
408+ getFFMPEGErrorStringFromErrorCode (ret));
409+
410+ TORCH_CHECK (
411+ avFrame != nullptr && avFrame->data [0 ] != nullptr ,
412+ " avFrame must be pre-allocated with CUDA memory" );
413+
414+ torch::Tensor hwcFrame = tensor.permute ({1 , 2 , 0 }).contiguous ();
415+
416+ at::cuda::CUDAStream currentStream =
417+ at::cuda::getCurrentCUDAStream (device_.index ());
418+
419+ nppCtx_->hStream = currentStream.stream ();
420+ cudaError_t cudaErr =
421+ cudaStreamGetFlags (nppCtx_->hStream , &nppCtx_->nStreamFlags );
422+ TORCH_CHECK (
423+ cudaErr == cudaSuccess,
424+ " cudaStreamGetFlags failed: " ,
425+ cudaGetErrorString (cudaErr));
426+
427+ NppiSize oSizeROI = {width, height};
428+ NppStatus status = nppiRGBToNV12_8u_ColorTwist32f_C3P2R_Ctx (
429+ static_cast <const Npp8u*>(hwcFrame.data_ptr ()),
430+ hwcFrame.stride (0 ) * hwcFrame.element_size (),
431+ avFrame->data ,
432+ avFrame->linesize ,
433+ oSizeROI,
434+ defaultLimitedRangeRgbToNv12,
435+ *nppCtx_);
436+
437+ TORCH_CHECK (
438+ status == NPP_SUCCESS,
439+ " Failed to convert RGB to NV12: NPP error code " ,
440+ status);
441+
442+ // TODO-VideoEncoder: Enable configuration of color properties, similar to
443+ // FFmpeg. Below are the default color properties used by FFmpeg.
444+ avFrame->colorspace = AVCOL_SPC_SMPTE170M; // BT.601
445+ avFrame->color_range = AVCOL_RANGE_MPEG; // Limited range
446+
447+ return avFrame;
448+ }
449+
450+ void CudaDeviceInterface::setupHardwareFrameContext (
451+ AVCodecContext* codecContext) {
452+ TORCH_CHECK (codecContext != nullptr , " codecContext is null" );
453+ TORCH_CHECK (
454+ hardwareDeviceCtx_, " Hardware device context has not been initialized" );
455+
456+ AVBufferRef* hwFramesCtxRef = av_hwframe_ctx_alloc (hardwareDeviceCtx_.get ());
457+ TORCH_CHECK (
458+ hwFramesCtxRef != nullptr ,
459+ " Failed to allocate hardware frames context for codec" );
460+
461+ // Always set pixel formats to options that support CUDA encoding.
462+ // TODO-VideoEncoder: Enable user set pixel formats to be set and properly
463+ // handled with NPP functions below
464+ codecContext->sw_pix_fmt = AV_PIX_FMT_NV12;
465+ codecContext->pix_fmt = AV_PIX_FMT_CUDA;
466+
467+ AVHWFramesContext* hwFramesCtx =
468+ reinterpret_cast <AVHWFramesContext*>(hwFramesCtxRef->data );
469+ hwFramesCtx->format = codecContext->pix_fmt ;
470+ hwFramesCtx->sw_format = codecContext->sw_pix_fmt ;
471+ hwFramesCtx->width = codecContext->width ;
472+ hwFramesCtx->height = codecContext->height ;
473+
474+ int ret = av_hwframe_ctx_init (hwFramesCtxRef);
475+ if (ret < 0 ) {
476+ av_buffer_unref (&hwFramesCtxRef);
477+ TORCH_CHECK (
478+ false ,
479+ " Failed to initialize CUDA frames context for codec: " ,
480+ getFFMPEGErrorStringFromErrorCode (ret));
481+ }
482+
483+ codecContext->hw_frames_ctx = hwFramesCtxRef;
484+ }
485+
365486} // namespace facebook::torchcodec
0 commit comments