Skip to content

Commit 9c7bae7

Browse files
committed
reduce files affected, add GpuEncoder.cpp
1 parent d5f2637 commit 9c7bae7

File tree

10 files changed

+117
-248
lines changed

10 files changed

+117
-248
lines changed

src/torchcodec/_core/BetaCudaDeviceInterface.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -833,16 +833,6 @@ void BetaCudaDeviceInterface::convertAVFrameToFrameOutput(
833833
gpuFrame, device_, nppCtx_, nvdecStream, preAllocatedOutputTensor);
834834
}
835835

836-
UniqueAVFrame BetaCudaDeviceInterface::convertTensorToAVFrame(
837-
[[maybe_unused]] const torch::Tensor& tensor,
838-
[[maybe_unused]] AVPixelFormat targetFormat,
839-
[[maybe_unused]] int frameIndex,
840-
[[maybe_unused]] AVCodecContext* codecContext) {
841-
TORCH_CHECK(
842-
false,
843-
"Beta CUDA device interface does not support video encoding currently.");
844-
}
845-
846836
std::string BetaCudaDeviceInterface::getDetails() {
847837
std::string details = "Beta CUDA Device Interface.";
848838
if (cpuFallback_) {

src/torchcodec/_core/BetaCudaDeviceInterface.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,6 @@ class BetaCudaDeviceInterface : public DeviceInterface {
4848
FrameOutput& frameOutput,
4949
std::optional<torch::Tensor> preAllocatedOutputTensor) override;
5050

51-
UniqueAVFrame convertTensorToAVFrame(
52-
const torch::Tensor& tensor,
53-
AVPixelFormat targetFormat,
54-
int frameIndex,
55-
AVCodecContext* codecContext) override;
56-
5751
int sendPacket(ReferenceAVPacket& packet) override;
5852
int sendEOFPacket() override;
5953
int receiveFrame(UniqueAVFrame& avFrame) override;

src/torchcodec/_core/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ function(make_torchcodec_libraries
100100
)
101101

102102
if(ENABLE_CUDA)
103-
list(APPEND core_sources CudaDeviceInterface.cpp BetaCudaDeviceInterface.cpp NVDECCache.cpp CUDACommon.cpp NVCUVIDRuntimeLoader.cpp)
103+
list(APPEND core_sources CudaDeviceInterface.cpp BetaCudaDeviceInterface.cpp NVDECCache.cpp CUDACommon.cpp NVCUVIDRuntimeLoader.cpp GpuEncoder.cpp)
104104
endif()
105105

106106
set(core_library_dependencies

src/torchcodec/_core/CpuDeviceInterface.cpp

Lines changed: 0 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -429,84 +429,6 @@ std::optional<torch::Tensor> CpuDeviceInterface::maybeFlushAudioBuffers() {
429429
/*dim=*/1, /*start=*/0, /*length=*/actualNumRemainingSamples);
430430
}
431431

432-
UniqueAVFrame CpuDeviceInterface::convertTensorToAVFrame(
433-
const torch::Tensor& frame,
434-
AVPixelFormat outPixelFormat,
435-
int frameIndex,
436-
[[maybe_unused]] AVCodecContext* codecContext) {
437-
int inHeight = static_cast<int>(frame.sizes()[1]);
438-
int inWidth = static_cast<int>(frame.sizes()[2]);
439-
440-
// For now, reuse input dimensions as output dimensions
441-
int outWidth = inWidth;
442-
int outHeight = inHeight;
443-
444-
// Input format is RGB planar (AV_PIX_FMT_GBRP after channel reordering)
445-
AVPixelFormat inPixelFormat = AV_PIX_FMT_GBRP;
446-
447-
// Initialize and cache scaling context if it does not exist
448-
if (!swsContext_) {
449-
swsContext_.reset(sws_getContext(
450-
inWidth,
451-
inHeight,
452-
inPixelFormat,
453-
outWidth,
454-
outHeight,
455-
outPixelFormat,
456-
SWS_BICUBIC, // Used by FFmpeg CLI
457-
nullptr,
458-
nullptr,
459-
nullptr));
460-
TORCH_CHECK(swsContext_ != nullptr, "Failed to create scaling context");
461-
}
462-
463-
UniqueAVFrame avFrame(av_frame_alloc());
464-
TORCH_CHECK(avFrame != nullptr, "Failed to allocate AVFrame");
465-
466-
// Set output frame properties
467-
avFrame->format = outPixelFormat;
468-
avFrame->width = outWidth;
469-
avFrame->height = outHeight;
470-
avFrame->pts = frameIndex;
471-
472-
int status = av_frame_get_buffer(avFrame.get(), 0);
473-
TORCH_CHECK(status >= 0, "Failed to allocate frame buffer");
474-
475-
// Need to convert/scale the frame
476-
// Create temporary frame with input format
477-
UniqueAVFrame inputFrame(av_frame_alloc());
478-
TORCH_CHECK(inputFrame != nullptr, "Failed to allocate input AVFrame");
479-
480-
inputFrame->format = inPixelFormat;
481-
inputFrame->width = inWidth;
482-
inputFrame->height = inHeight;
483-
484-
uint8_t* tensorData = static_cast<uint8_t*>(frame.data_ptr());
485-
486-
// TODO-VideoEncoder: Reorder tensor if in NHWC format
487-
int channelSize = inHeight * inWidth;
488-
// Reorder RGB -> GBR for AV_PIX_FMT_GBRP format
489-
// TODO-VideoEncoder: Determine if FFmpeg supports planar RGB input format
490-
inputFrame->data[0] = tensorData + channelSize;
491-
inputFrame->data[1] = tensorData + (2 * channelSize);
492-
inputFrame->data[2] = tensorData;
493-
494-
inputFrame->linesize[0] = inWidth;
495-
inputFrame->linesize[1] = inWidth;
496-
inputFrame->linesize[2] = inWidth;
497-
498-
status = sws_scale(
499-
swsContext_.get(),
500-
inputFrame->data,
501-
inputFrame->linesize,
502-
0,
503-
inputFrame->height,
504-
avFrame->data,
505-
avFrame->linesize);
506-
TORCH_CHECK(status == outHeight, "sws_scale failed");
507-
return avFrame;
508-
}
509-
510432
std::string CpuDeviceInterface::getDetails() {
511433
return std::string("CPU Device Interface.");
512434
}

src/torchcodec/_core/CpuDeviceInterface.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,6 @@ class CpuDeviceInterface : public DeviceInterface {
3838
FrameOutput& frameOutput,
3939
std::optional<torch::Tensor> preAllocatedOutputTensor) override;
4040

41-
UniqueAVFrame convertTensorToAVFrame(
42-
const torch::Tensor& tensor,
43-
AVPixelFormat targetFormat,
44-
int frameIndex,
45-
AVCodecContext* codecContext) override;
46-
4741
std::string getDetails() override;
4842

4943
private:

src/torchcodec/_core/CudaDeviceInterface.cpp

Lines changed: 6 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -146,40 +146,6 @@ void CudaDeviceInterface::registerHardwareDeviceWithCodec(
146146
codecContext->hw_device_ctx = av_buffer_ref(hardwareDeviceCtx_.get());
147147
}
148148

149-
void CudaDeviceInterface::setupEncodingContext(AVCodecContext* codecContext) {
150-
TORCH_CHECK(
151-
hardwareDeviceCtx_, "Hardware device context has not been initialized");
152-
TORCH_CHECK(codecContext != nullptr, "codecContext is null");
153-
// is there any way to preserve actual desired format?
154-
// codecContext->sw_pix_fmt = codecContext->pix_fmt;
155-
// Should we always produce AV_PIX_FMT_NV12?
156-
codecContext->sw_pix_fmt = AV_PIX_FMT_NV12;
157-
codecContext->pix_fmt = AV_PIX_FMT_CUDA;
158-
159-
AVBufferRef* hwFramesCtxRef = av_hwframe_ctx_alloc(hardwareDeviceCtx_.get());
160-
TORCH_CHECK(
161-
hwFramesCtxRef != nullptr,
162-
"Failed to allocate hardware frames context for codec");
163-
164-
AVHWFramesContext* hwFramesCtx =
165-
reinterpret_cast<AVHWFramesContext*>(hwFramesCtxRef->data);
166-
hwFramesCtx->format = codecContext->pix_fmt;
167-
hwFramesCtx->sw_format = codecContext->sw_pix_fmt;
168-
hwFramesCtx->width = codecContext->width;
169-
hwFramesCtx->height = codecContext->height;
170-
171-
int ret = av_hwframe_ctx_init(hwFramesCtxRef);
172-
if (ret < 0) {
173-
av_buffer_unref(&hwFramesCtxRef);
174-
TORCH_CHECK(
175-
false,
176-
"Failed to initialize CUDA frames context for codec: ",
177-
getFFMPEGErrorStringFromErrorCode(ret));
178-
}
179-
180-
codecContext->hw_frames_ctx = hwFramesCtxRef;
181-
}
182-
183149
UniqueAVFrame CudaDeviceInterface::maybeConvertAVFrameToNV12OrRGB24(
184150
UniqueAVFrame& avFrame) {
185151
// We need FFmpeg filters to handle those conversion cases which are not
@@ -365,39 +331,10 @@ void CudaDeviceInterface::convertAVFrameToFrameOutput(
365331
avFrame, device_, nppCtx_, nvdecStream, preAllocatedOutputTensor);
366332
}
367333

368-
namespace {
369-
// Helper function to check if a codec supports CUDA hardware acceleration
370-
bool codecSupportsCudaHardware(const AVCodec* codec) {
371-
const AVCodecHWConfig* config = nullptr;
372-
for (int j = 0; (config = avcodec_get_hw_config(codec, j)) != nullptr; ++j) {
373-
if (config->device_type == AV_HWDEVICE_TYPE_CUDA) {
374-
return true;
375-
}
376-
}
377-
return false;
378-
}
379-
} // namespace
380-
381334
// inspired by https://github.com/FFmpeg/FFmpeg/commit/ad67ea9
382335
// we have to do this because of an FFmpeg bug where hardware decoding is not
383336
// appropriately set, so we just go off and find the matching codec for the CUDA
384337
// device
385-
386-
std::optional<const AVCodec*> CudaDeviceInterface::findEncoder(
387-
const AVCodecID& codecId) {
388-
void* i = nullptr;
389-
const AVCodec* codec = nullptr;
390-
while ((codec = av_codec_iterate(&i)) != nullptr) {
391-
if (codec->id != codecId || !av_codec_is_encoder(codec)) {
392-
continue;
393-
}
394-
if (codecSupportsCudaHardware(codec)) {
395-
return codec;
396-
}
397-
}
398-
return std::nullopt;
399-
}
400-
401338
std::optional<const AVCodec*> CudaDeviceInterface::findDecoder(
402339
const AVCodecID& codecId) {
403340
void* i = nullptr;
@@ -407,52 +344,18 @@ std::optional<const AVCodec*> CudaDeviceInterface::findDecoder(
407344
continue;
408345
}
409346

410-
if (codecSupportsCudaHardware(codec)) {
411-
return codec;
347+
const AVCodecHWConfig* config = nullptr;
348+
for (int j = 0; (config = avcodec_get_hw_config(codec, j)) != nullptr;
349+
++j) {
350+
if (config->device_type == AV_HWDEVICE_TYPE_CUDA) {
351+
return codec;
352+
}
412353
}
413354
}
414355

415356
return std::nullopt;
416357
}
417358

418-
UniqueAVFrame CudaDeviceInterface::convertTensorToAVFrame(
419-
const torch::Tensor& frame,
420-
[[maybe_unused]] AVPixelFormat targetFormat,
421-
int frameIndex,
422-
AVCodecContext* codecContext) {
423-
TORCH_CHECK(frame.is_cuda(), "CUDA device interface requires CUDA tensors");
424-
TORCH_CHECK(
425-
frame.dim() == 3 && frame.size(0) == 3,
426-
"Expected 3D RGB tensor (CHW format), got shape: ",
427-
frame.sizes());
428-
429-
UniqueAVFrame avFrame(av_frame_alloc());
430-
TORCH_CHECK(avFrame != nullptr, "Failed to allocate AVFrame");
431-
432-
avFrame->format = AV_PIX_FMT_CUDA;
433-
avFrame->width = static_cast<int>(frame.size(2));
434-
avFrame->height = static_cast<int>(frame.size(1));
435-
avFrame->pts = frameIndex;
436-
437-
int ret = av_hwframe_get_buffer(
438-
codecContext ? codecContext->hw_frames_ctx : nullptr, avFrame.get(), 0);
439-
TORCH_CHECK(
440-
ret >= 0,
441-
"Failed to allocate hardware frame: ",
442-
getFFMPEGErrorStringFromErrorCode(ret));
443-
444-
at::cuda::CUDAStream currentStream =
445-
at::cuda::getCurrentCUDAStream(device_.index());
446-
447-
convertRGBTensorToNV12Frame(frame, avFrame, device_, nppCtx_, currentStream);
448-
449-
// Set color properties to FFmpeg defaults
450-
avFrame->colorspace = AVCOL_SPC_SMPTE170M; // BT.601
451-
avFrame->color_range = AVCOL_RANGE_MPEG; // Limited range
452-
453-
return avFrame;
454-
}
455-
456359
std::string CudaDeviceInterface::getDetails() {
457360
// Note: for this interface specifically the fallback is only known after a
458361
// frame has been decoded, not before: that's when FFmpeg decides to fallback,

src/torchcodec/_core/CudaDeviceInterface.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ class CudaDeviceInterface : public DeviceInterface {
1818

1919
virtual ~CudaDeviceInterface();
2020

21-
std::optional<const AVCodec*> findEncoder(const AVCodecID& codecId) override;
2221
std::optional<const AVCodec*> findDecoder(const AVCodecID& codecId) override;
2322

2423
void initialize(
@@ -35,19 +34,11 @@ class CudaDeviceInterface : public DeviceInterface {
3534

3635
void registerHardwareDeviceWithCodec(AVCodecContext* codecContext) override;
3736

38-
void setupEncodingContext(AVCodecContext* codecContext) override;
39-
4037
void convertAVFrameToFrameOutput(
4138
UniqueAVFrame& avFrame,
4239
FrameOutput& frameOutput,
4340
std::optional<torch::Tensor> preAllocatedOutputTensor) override;
4441

45-
UniqueAVFrame convertTensorToAVFrame(
46-
const torch::Tensor& tensor,
47-
AVPixelFormat targetFormat,
48-
int frameIndex,
49-
AVCodecContext* codecContext) override;
50-
5142
std::string getDetails() override;
5243

5344
private:

src/torchcodec/_core/DeviceInterface.h

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,6 @@ class DeviceInterface {
4646
return device_;
4747
};
4848

49-
virtual std::optional<const AVCodec*> findEncoder(
50-
[[maybe_unused]] const AVCodecID& codecId) {
51-
return std::nullopt;
52-
};
53-
5449
virtual std::optional<const AVCodec*> findDecoder(
5550
[[maybe_unused]] const AVCodecID& codecId) {
5651
return std::nullopt;
@@ -92,25 +87,11 @@ class DeviceInterface {
9287
virtual void registerHardwareDeviceWithCodec(
9388
[[maybe_unused]] AVCodecContext* codecContext) {}
9489

95-
// Setup device-specific encoding context (e.g., hardware frame contexts).
96-
// Called after registerHardwareDeviceWithCodec for encoders.
97-
// Default implementation does nothing (suitable for CPU and basic cases).
98-
virtual void setupEncodingContext(
99-
[[maybe_unused]] AVCodecContext* codecContext) {}
100-
10190
virtual void convertAVFrameToFrameOutput(
10291
UniqueAVFrame& avFrame,
10392
FrameOutput& frameOutput,
10493
std::optional<torch::Tensor> preAllocatedOutputTensor = std::nullopt) = 0;
10594

106-
// Convert tensor to AVFrame, implemented per device interface.
107-
// This is similar to convertAVFrameToFrameOutput for encoding
108-
virtual UniqueAVFrame convertTensorToAVFrame(
109-
const torch::Tensor& tensor,
110-
AVPixelFormat targetFormat,
111-
int frameIndex,
112-
AVCodecContext* codecContext) = 0;
113-
11495
// ------------------------------------------
11596
// Extension points for custom decoding paths
11697
// ------------------------------------------

0 commit comments

Comments
 (0)