Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/torchcodec/decoders/_core/CPUOnlyDevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ namespace facebook::torchcodec {
void convertAVFrameToDecodedOutputOnCuda(
const torch::Device& device,
const VideoDecoder::VideoStreamDecoderOptions& options,
const VideoDecoder::StreamMetadata& metadata,
VideoDecoder::RawDecodedOutput& rawOutput,
VideoDecoder::DecodedOutput& output,
std::optional<torch::Tensor> preAllocatedOutputTensor) {
Expand Down
12 changes: 4 additions & 8 deletions src/torchcodec/decoders/_core/CudaDevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,6 @@ void initializeContextOnCuda(
void convertAVFrameToDecodedOutputOnCuda(
const torch::Device& device,
const VideoDecoder::VideoStreamDecoderOptions& options,
const VideoDecoder::StreamMetadata& metadata,
VideoDecoder::RawDecodedOutput& rawOutput,
VideoDecoder::DecodedOutput& output,
std::optional<torch::Tensor> preAllocatedOutputTensor) {
Expand All @@ -197,11 +196,9 @@ void convertAVFrameToDecodedOutputOnCuda(
src->format == AV_PIX_FMT_CUDA,
"Expected format to be AV_PIX_FMT_CUDA, got " +
std::string(av_get_pix_fmt_name((AVPixelFormat)src->format)));
auto frameDims = getHeightAndWidthFromOptionsOrMetadata(options, metadata);
auto frameDims = getHeightAndWidthFromOptionsOrAVFrame(options, *src);
int height = frameDims.height;
int width = frameDims.width;
NppiSize oSizeROI = {width, height};
Npp8u* input[2] = {src->data[0], src->data[1]};
torch::Tensor& dst = output.frame;
if (preAllocatedOutputTensor.has_value()) {
dst = preAllocatedOutputTensor.value();
Expand All @@ -222,11 +219,10 @@ void convertAVFrameToDecodedOutputOnCuda(
// Use the user-requested GPU for running the NPP kernel.
c10::cuda::CUDAGuard deviceGuard(device);

auto start = std::chrono::high_resolution_clock::now();
NppiSize oSizeROI = {width, height};
Npp8u* input[2] = {src->data[0], src->data[1]};
Comment on lines +222 to +223
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a drive-by, I moved this down closer to where they're used


// TODO height and width info of output tensor comes from the metadata, which
// may not be accurate. How do we make sure we won't corrupt memory if the
// allocated tensor is too short/large?
auto start = std::chrono::high_resolution_clock::now();
NppStatus status = nppiNV12ToRGB_8u_P2C3R(
input,
src->linesize[0],
Expand Down
1 change: 0 additions & 1 deletion src/torchcodec/decoders/_core/DeviceInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ void initializeContextOnCuda(
void convertAVFrameToDecodedOutputOnCuda(
const torch::Device& device,
const VideoDecoder::VideoStreamDecoderOptions& options,
const VideoDecoder::StreamMetadata& metadata,
VideoDecoder::RawDecodedOutput& rawOutput,
VideoDecoder::DecodedOutput& output,
std::optional<torch::Tensor> preAllocatedOutputTensor = std::nullopt);
Expand Down
1 change: 0 additions & 1 deletion src/torchcodec/decoders/_core/VideoDecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -863,7 +863,6 @@ VideoDecoder::DecodedOutput VideoDecoder::convertAVFrameToDecodedOutput(
convertAVFrameToDecodedOutputOnCuda(
streamInfo.options.device,
streamInfo.options,
containerMetadata_.streams[streamIndex],
rawOutput,
output,
preAllocatedOutputTensor);
Expand Down
12 changes: 6 additions & 6 deletions src/torchcodec/decoders/_core/VideoDecoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -440,16 +440,16 @@ class VideoDecoder {
// AVFrame *before* it is resized. In theory, i.e. if there are no bugs within
// our code or within FFmpeg code, this should be exactly the same as
// getHeightAndWidthFromResizedAVFrame(). This is used by single-frame
// decoding APIs, on CPU, with swscale.
// decoding APIs, on CPU with swscale, and on GPU.
// - getHeightAndWidthFromOptionsOrMetadata(). This is the height and width from
// the user-specified options if they exist, or the height and width form the
// stream metadata, which itself got its value from the CodecContext, when the
// stream was added. This is used by batch decoding APIs, or by GPU-APIs (both
// batch and single-frames).
// stream was added. This is used by batch decoding APIs, for both GPU and
// CPU.
//
// The source of truth for height and width really is the (resized) AVFrame:
// it's the decoded ouptut from FFmpeg. The info from the metadata (i.e. from
// the CodecContext) may not be as accurate. However, the AVFrame is only
// The source of truth for height and width really is the (resized) AVFrame: it
// comes from the decoded ouptut of FFmpeg. The info from the metadata (i.e.
// from the CodecContext) may not be as accurate. However, the AVFrame is only
// available late in the call stack, when the frame is decoded, while the
// CodecContext is available early when a stream is added. This is why we use
// the CodecContext for pre-allocating batched output tensors (we could
Expand Down
Loading