Skip to content

Commit 390fd7c

Browse files
committed
Refac, simplify
1 parent b7bbfb2 commit 390fd7c

File tree

2 files changed

+19
-111
lines changed

2 files changed

+19
-111
lines changed

src/torchcodec/_core/BetaCudaDeviceInterface.cpp

Lines changed: 10 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ BetaCudaDeviceInterface::BetaCudaDeviceInterface(const torch::Device& device)
144144

145145
BetaCudaDeviceInterface::~BetaCudaDeviceInterface() {
146146
// TODONVDEC P0: we probably need to free the frames that have been decoded by
147-
// NVDEC but not yet "mapped" - i.e. those that are still in frameBuffer_?
147+
// NVDEC but not yet "mapped" - i.e. those that are still in readyFrames_?
148148

149149
if (decoder_) {
150150
NVDECCache::getCache(device_.index())
@@ -328,40 +328,34 @@ int BetaCudaDeviceInterface::frameReadyForDecoding(CUVIDPICPARAMS* picParams) {
328328

329329
// Send frame to be decoded by NVDEC - non-blocking call.
330330
CUresult result = cuvidDecodePicture(*decoder_.get(), picParams);
331-
if (result != CUDA_SUCCESS) {
332-
return 0; // Yes, you're reading that right, 0 means error.
333-
}
334331

335-
frameBuffer_.markAsBeingDecoded(/*slotId=*/picParams->CurrPicIdx);
336-
return 1;
332+
// Yes, you're reading that right, 0 means error, 1 means success
333+
return (result == CUDA_SUCCESS);
337334
}
338335

339336
int BetaCudaDeviceInterface::frameReadyInDisplayOrder(
340337
CUVIDPARSERDISPINFO* dispInfo) {
341-
frameBuffer_.markSlotReadyAndSetInfo(
342-
/*slotId=*/dispInfo->picture_index, dispInfo);
343-
return 1;
338+
readyFrames_.push(*dispInfo);
339+
return 1; // success
344340
}
345341

346342
// Moral equivalent of avcodec_receive_frame().
347343
int BetaCudaDeviceInterface::receiveFrame(UniqueAVFrame& avFrame) {
348-
FrameBuffer::Slot* slot = frameBuffer_.findReadySlotWithLowestPts();
349-
if (slot == nullptr) {
344+
if (readyFrames_.empty()) {
350345
// No frame found, instruct caller to try again later after sending more
351346
// packets.
352347
return AVERROR(EAGAIN);
353348
}
349+
CUVIDPARSERDISPINFO dispInfo = readyFrames_.front();
350+
readyFrames_.pop();
354351

355352
CUVIDPROCPARAMS procParams = {};
356-
CUVIDPARSERDISPINFO dispInfo = slot->dispInfo;
357353
procParams.progressive_frame = dispInfo.progressive_frame;
358354
procParams.top_field_first = dispInfo.top_field_first;
359355
procParams.unpaired_field = dispInfo.repeat_first_field < 0;
360356
CUdeviceptr framePtr = 0;
361357
unsigned int pitch = 0;
362358

363-
frameBuffer_.free(slot->slotId);
364-
365359
// We know the frame we want was sent to the hardware decoder, but now we need
366360
// to "map" it to an "output surface" before we can use its data. This is a
367361
// blocking calls that waits until the frame is fully decoded and ready to be
@@ -478,7 +472,8 @@ void BetaCudaDeviceInterface::flush() {
478472

479473
isFlushing_ = false;
480474

481-
frameBuffer_.clear();
475+
std::queue<CUVIDPARSERDISPINFO> emptyQueue;
476+
std::swap(readyFrames_, emptyQueue);
482477

483478
eofSent_ = false;
484479
}
@@ -512,57 +507,4 @@ void BetaCudaDeviceInterface::convertAVFrameToFrameOutput(
512507
preAllocatedOutputTensor);
513508
}
514509

515-
void BetaCudaDeviceInterface::FrameBuffer::markAsBeingDecoded(int slotId) {
516-
auto it = map_.find(slotId);
517-
TORCH_CHECK(
518-
it == map_.end(),
519-
"Slot ",
520-
slotId,
521-
" is already occupied. This should never happen.");
522-
523-
map_.emplace(slotId, Slot(slotId, SlotState::BEING_DECODED));
524-
}
525-
526-
void BetaCudaDeviceInterface::FrameBuffer::markSlotReadyAndSetInfo(
527-
int slotId,
528-
CUVIDPARSERDISPINFO* dispInfo) {
529-
auto it = map_.find(slotId);
530-
TORCH_CHECK(
531-
it != map_.end(),
532-
"Could not find matching slot with slotId ",
533-
slotId,
534-
". This should never happen.");
535-
536-
TORCH_CHECK(
537-
it->second.state == SlotState::BEING_DECODED,
538-
"Slot ",
539-
slotId,
540-
" is not in BEING_DECODED state. This should never happen.");
541-
it->second.state = SlotState::READY_FOR_OUTPUT;
542-
it->second.dispInfo = *dispInfo;
543-
}
544-
545-
void BetaCudaDeviceInterface::FrameBuffer::free(int slotId) {
546-
auto it = map_.find(slotId);
547-
TORCH_CHECK(
548-
it != map_.end(),
549-
"Tried to free non-existing slot with slotId",
550-
slotId,
551-
". This should never happen.");
552-
map_.erase(it);
553-
}
554-
555-
BetaCudaDeviceInterface::FrameBuffer::Slot*
556-
BetaCudaDeviceInterface::FrameBuffer::findReadySlotWithLowestPts() {
557-
Slot* outputSlot = nullptr;
558-
for (auto& [_, slot] : map_) {
559-
if (slot.state == SlotState::READY_FOR_OUTPUT &&
560-
(outputSlot == nullptr ||
561-
slot.dispInfo.timestamp < outputSlot->dispInfo.timestamp)) {
562-
outputSlot = &slot;
563-
}
564-
}
565-
return outputSlot;
566-
}
567-
568510
} // namespace facebook::torchcodec

src/torchcodec/_core/BetaCudaDeviceInterface.h

Lines changed: 9 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -64,40 +64,6 @@ class BetaCudaDeviceInterface : public DeviceInterface {
6464
// Apply bitstream filter, modifies packet in-place
6565
void applyBSF(ReferenceAVPacket& packet);
6666

67-
class FrameBuffer {
68-
public:
69-
enum class SlotState { BEING_DECODED, READY_FOR_OUTPUT };
70-
71-
struct Slot {
72-
CUVIDPARSERDISPINFO dispInfo;
73-
SlotState state;
74-
int slotId;
75-
76-
explicit Slot(int id, SlotState s) : state(s), slotId(id) {
77-
std::memset(&dispInfo, 0, sizeof(dispInfo));
78-
TORCH_CHECK(
79-
state == SlotState::BEING_DECODED,
80-
"Programmer: are you sure you want to create a slot that is not BEING_DECODED?");
81-
}
82-
};
83-
84-
FrameBuffer() = default;
85-
~FrameBuffer() = default;
86-
87-
void markAsBeingDecoded(int slotId);
88-
void markSlotReadyAndSetInfo(int slotId, CUVIDPARSERDISPINFO* dispInfo);
89-
void free(int slotId);
90-
Slot* findReadySlotWithLowestPts();
91-
92-
void clear() {
93-
map_.clear();
94-
}
95-
96-
private:
97-
// Map of slotId to Slot
98-
std::unordered_map<int, Slot> map_;
99-
};
100-
10167
UniqueAVFrame convertCudaFrameToAVFrame(
10268
CUdeviceptr framePtr,
10369
unsigned int pitch,
@@ -107,7 +73,7 @@ class BetaCudaDeviceInterface : public DeviceInterface {
10773
UniqueCUvideodecoder decoder_;
10874
CUVIDEOFORMAT videoFormat_ = {};
10975

110-
FrameBuffer frameBuffer_;
76+
std::queue<CUVIDPARSERDISPINFO> readyFrames_;
11177

11278
bool eofSent_ = false;
11379

@@ -168,25 +134,25 @@ class BetaCudaDeviceInterface : public DeviceInterface {
168134
// parser has accumulated enough data to decode a frame. We send that frame to
169135
// the NVDEC hardware for **async** decoding. While that frame is being
170136
// decoded, we store a light reference (a Slot) to that frame in the
171-
// frameBuffer_, and mark that slot as BEING_DECODED. The value that uniquely
172-
// identifies that frame in the frameBuffer_ is its "slotId", which is given
137+
// readyFrames_, and mark that slot as BEING_DECODED. The value that uniquely
138+
// identifies that frame in the readyFrames_ is its "slotId", which is given
173139
// to us by NVCUVID in the callback parameter: picParams->CurrPicIdx.
174140
// - frameReadyInDisplayOrder(dispInfo)): triggered **in display order** when a
175141
// frame is ready to be "displayed" (returned). When it is triggered, we look
176-
// up the corresponding frame/slot in the frameBuffer_, using
142+
// up the corresponding frame/slot in the readyFrames_, using
177143
// dispInfo->picture_index to match it against a given BEING_DECODED slotId.
178144
// We mark that frame/slot as READY_FOR_OUTPUT.
179145
// Crucially, this callback also tells us the pts of that frame. We store
180146
// the pts and other relevant info the slot.
181147
//
182-
// Said differently, from the perspective of the frameBuffer_, at any point in
183-
// time a slot/frame in the frameBuffer_ can be in 3 states:
184-
// - empty: no slot for that slotId exists in the frameBuffer_
148+
// Said differently, from the perspective of the readyFrames_, at any point in
149+
// time a slot/frame in the readyFrames_ can be in 3 states:
150+
// - empty: no slot for that slotId exists in the readyFrames_
185151
// - BEING_DECODED: frameReadyForDecoding was triggered for that frame, and the
186152
// frame was sent to NVDEC for async decoding. We don't know its pts because
187153
// the parser didn't trigger frameReadyInDisplayOrder() for that frame yet.
188154
// - READY_FOR_OUTPUT: frameReadyInDisplayOrder was triggered for that frame, it
189-
// is decoded and ready to be mapped and returned. We know its pts.
155+
// is decoded and ready to be mapped and returned. We know its pts.
190156
//
191157
// Because frameReadyInDisplayOrder is triggered in display order, we know that
192158
// if a slot is READY_FOR_OUTPUT, then all frames with a lower pts are also
@@ -196,4 +162,4 @@ class BetaCudaDeviceInterface : public DeviceInterface {
196162
// display order. If no slot is READY_FOR_OUTPUT, then we return EAGAIN to
197163
// indicate the caller should send more packets.
198164
//
199-
// Simple, innit?
165+
// Simple, innit?

0 commit comments

Comments
 (0)