Skip to content

Commit 82f9764

Browse files
committed
Simplify main decoding loop by duplicating logic
1 parent 666b592 commit 82f9764

File tree

2 files changed

+113
-75
lines changed

2 files changed

+113
-75
lines changed

src/torchcodec/_core/SingleStreamDecoder.cpp

Lines changed: 105 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1172,86 +1172,52 @@ UniqueAVFrame SingleStreamDecoder::decodeAVFrame(
11721172
}
11731173

11741174
StreamInfo& streamInfo = streamInfos_[activeStreamIndex_];
1175-
1176-
// Need to get the next frame or error from PopFrame.
11771175
UniqueAVFrame avFrame(av_frame_alloc());
11781176
AutoAVPacket autoAVPacket;
11791177
int status = AVSUCCESS;
11801178
bool reachedEOF = false;
1181-
while (true) {
1182-
status =
1183-
avcodec_receive_frame(streamInfo.codecContext.get(), avFrame.get());
11841179

1185-
if (status != AVSUCCESS && status != AVERROR(EAGAIN)) {
1186-
// Non-retriable error
1187-
break;
1188-
}
1180+
// TODONVDEC we now have 2 separate main decoding loops: the new one when the
1181+
// device interface can decode packets directly, and the traditional ffmpeg
1182+
// path, as in `main`. This makes things more obvious and explicit for now,
1183+
// but we should consider consolidating and merging both, with the right
1184+
// abstractions/extension points.
1185+
if (deviceInterface_ && deviceInterface_->canDecodePacketDirectly()) {
1186+
while (true) {
1187+
if (reachedEOF) {
1188+
throw SingleStreamDecoder::EndOfFileException(
1189+
"Requested next frame while there are no more frames left to "
1190+
"decode.");
1191+
}
11891192

1190-
decodeStats_.numFramesReceivedByDecoder++;
1191-
// Is this the kind of frame we're looking for?
1192-
if (status == AVSUCCESS && filterFunction(avFrame)) {
1193-
// Yes, this is the frame we'll return; break out of the decoding loop.
1194-
break;
1195-
} else if (status == AVSUCCESS) {
1196-
// No, but we received a valid frame - just not the kind we're looking
1197-
// for. The logic below will read packets and send them to the decoder.
1198-
// But since we did just receive a frame, we should skip reading more
1199-
// packets and sending them to the decoder and just try to receive more
1200-
// frames from the decoder.
1201-
continue;
1202-
}
1193+
// Read packets and send them to the decoder
1194+
ReferenceAVPacket packet(autoAVPacket);
1195+
do {
1196+
status = av_read_frame(formatContext_.get(), packet.get());
1197+
decodeStats_.numPacketsRead++;
12031198

1204-
if (reachedEOF) {
1205-
// We don't have any more packets to receive. So keep on pulling frames
1206-
// from its internal buffers.
1207-
continue;
1208-
}
1199+
if (status == AVERROR_EOF) {
1200+
reachedEOF = true;
1201+
break;
1202+
}
12091203

1210-
// We still haven't found the frame we're looking for. So let's read more
1211-
// packets and send them to the decoder.
1212-
ReferenceAVPacket packet(autoAVPacket);
1213-
do {
1214-
status = av_read_frame(formatContext_.get(), packet.get());
1215-
decodeStats_.numPacketsRead++;
1216-
1217-
if (status == AVERROR_EOF) {
1218-
// End of file reached. We must drain the codec by sending a nullptr
1219-
// packet.
1220-
status = avcodec_send_packet(
1221-
streamInfo.codecContext.get(),
1222-
/*avpkt=*/nullptr);
12231204
TORCH_CHECK(
12241205
status >= AVSUCCESS,
1225-
"Could not flush decoder: ",
1206+
"Could not read frame from input file: ",
12261207
getFFMPEGErrorStringFromErrorCode(status));
12271208

1228-
reachedEOF = true;
1229-
break;
1230-
}
1209+
} while (packet->stream_index != activeStreamIndex_);
12311210

1232-
TORCH_CHECK(
1233-
status >= AVSUCCESS,
1234-
"Could not read frame from input file: ",
1235-
getFFMPEGErrorStringFromErrorCode(status));
1236-
1237-
} while (packet->stream_index != activeStreamIndex_);
1238-
1239-
if (reachedEOF) {
1240-
// We don't have any more packets to send to the decoder. So keep on
1241-
// pulling frames from its internal buffers.
1242-
continue;
1243-
}
1211+
if (reachedEOF) {
1212+
continue;
1213+
}
12441214

1245-
// Check if device interface can handle packet decoding directly
1246-
if (deviceInterface_ && deviceInterface_->canDecodePacketDirectly()) {
12471215
ReferenceAVPacket* packetToSend = &packet;
12481216
AutoAVPacket filteredAutoPacket;
12491217
ReferenceAVPacket filteredPacket(filteredAutoPacket);
12501218

12511219
// Apply bitstream filtering if needed
12521220
if (streamInfo.needsBitstreamFiltering && streamInfo.bitstreamFilter) {
1253-
// printf("Applying bitstream filter to packet\n");
1254-
12551221
// Send packet to bitstream filter
12561222
int retVal = av_bsf_send_packet(streamInfo.bitstreamFilter.get(), packet.get());
12571223
TORCH_CHECK(retVal >= AVSUCCESS, "Failed to send packet to bitstream filter: ",
@@ -1263,22 +1229,86 @@ UniqueAVFrame SingleStreamDecoder::decodeAVFrame(
12631229
getFFMPEGErrorStringFromErrorCode(retVal));
12641230

12651231
packetToSend = &filteredPacket;
1266-
// printf("Bitstream filtering complete: original size=%d, filtered size=%d\n",
1267-
// packet->size, filteredPacket->size);
12681232
}
12691233

12701234
// Use custom packet decoding (e.g., direct NVDEC)
12711235
UniqueAVFrame decodedFrame =
12721236
deviceInterface_->decodePacketDirectly(*packetToSend);
1237+
decodeStats_.numPacketsSentToDecoder++;
1238+
12731239
if (decodedFrame && filterFunction(decodedFrame)) {
12741240
// We got the frame we're looking for from direct decoding
12751241
avFrame = std::move(decodedFrame);
1276-
decodeStats_.numPacketsSentToDecoder++;
12771242
break;
12781243
}
12791244
// If custom decoding didn't produce the desired frame, continue the loop
1280-
decodeStats_.numPacketsSentToDecoder++;
1281-
} else {
1245+
}
1246+
} else {
1247+
// Standard FFmpeg decoding path
1248+
while (true) {
1249+
status =
1250+
avcodec_receive_frame(streamInfo.codecContext.get(), avFrame.get());
1251+
1252+
if (status != AVSUCCESS && status != AVERROR(EAGAIN)) {
1253+
// Non-retriable error
1254+
break;
1255+
}
1256+
1257+
decodeStats_.numFramesReceivedByDecoder++;
1258+
// Is this the kind of frame we're looking for?
1259+
if (status == AVSUCCESS && filterFunction(avFrame)) {
1260+
// Yes, this is the frame we'll return; break out of the decoding loop.
1261+
break;
1262+
} else if (status == AVSUCCESS) {
1263+
// No, but we received a valid frame - just not the kind we're looking
1264+
// for. The logic below will read packets and send them to the decoder.
1265+
// But since we did just receive a frame, we should skip reading more
1266+
// packets and sending them to the decoder and just try to receive more
1267+
// frames from the decoder.
1268+
continue;
1269+
}
1270+
1271+
if (reachedEOF) {
1272+
// We don't have any more packets to receive. So keep on pulling frames
1273+
// from its internal buffers.
1274+
continue;
1275+
}
1276+
1277+
// We still haven't found the frame we're looking for. So let's read more
1278+
// packets and send them to the decoder.
1279+
ReferenceAVPacket packet(autoAVPacket);
1280+
do {
1281+
status = av_read_frame(formatContext_.get(), packet.get());
1282+
decodeStats_.numPacketsRead++;
1283+
1284+
if (status == AVERROR_EOF) {
1285+
// End of file reached. We must drain the codec by sending a nullptr
1286+
// packet.
1287+
status = avcodec_send_packet(
1288+
streamInfo.codecContext.get(),
1289+
/*avpkt=*/nullptr);
1290+
TORCH_CHECK(
1291+
status >= AVSUCCESS,
1292+
"Could not flush decoder: ",
1293+
getFFMPEGErrorStringFromErrorCode(status));
1294+
1295+
reachedEOF = true;
1296+
break;
1297+
}
1298+
1299+
TORCH_CHECK(
1300+
status >= AVSUCCESS,
1301+
"Could not read frame from input file: ",
1302+
getFFMPEGErrorStringFromErrorCode(status));
1303+
1304+
} while (packet->stream_index != activeStreamIndex_);
1305+
1306+
if (reachedEOF) {
1307+
// We don't have any more packets to send to the decoder. So keep on
1308+
// pulling frames from its internal buffers.
1309+
continue;
1310+
}
1311+
12821312
// Use standard FFmpeg decoding path
12831313
status = avcodec_send_packet(streamInfo.codecContext.get(), packet.get());
12841314
TORCH_CHECK(
@@ -1288,18 +1318,18 @@ UniqueAVFrame SingleStreamDecoder::decodeAVFrame(
12881318

12891319
decodeStats_.numPacketsSentToDecoder++;
12901320
}
1291-
}
12921321

1293-
if (status < AVSUCCESS) {
1294-
if (reachedEOF || status == AVERROR_EOF) {
1295-
throw SingleStreamDecoder::EndOfFileException(
1296-
"Requested next frame while there are no more frames left to "
1297-
"decode.");
1322+
if (status < AVSUCCESS) {
1323+
if (reachedEOF || status == AVERROR_EOF) {
1324+
throw SingleStreamDecoder::EndOfFileException(
1325+
"Requested next frame while there are no more frames left to "
1326+
"decode.");
1327+
}
1328+
TORCH_CHECK(
1329+
false,
1330+
"Could not receive frame from decoder: ",
1331+
getFFMPEGErrorStringFromErrorCode(status));
12981332
}
1299-
TORCH_CHECK(
1300-
false,
1301-
"Could not receive frame from decoder: ",
1302-
getFFMPEGErrorStringFromErrorCode(status));
13031333
}
13041334

13051335
// Note that we don't flush the decoder when we reach EOF (even though that's

src/torchcodec/_core/custom_ops.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,14 @@ void _add_video_stream(
253253
". color_conversion_library must be either filtergraph or swscale.");
254254
}
255255
}
256+
// TODONVDEC:
257+
// - All parsing logic of the the 'cuda:0:variant' string should probably
258+
// happen in Python, and the core/C++ code should just accept a clean device
259+
// string with a separate variant parameter.
260+
// - the createTorchDevice function does 2 things: return a clean
261+
// torch::Device, and also validate that a corresponding interface exists. We
262+
// should separate both logics. Note that this pre-exists in main.
263+
// - The variant should be part of the VideoStreamOptions struct when passed to the SingleStreamDecoder.
256264
std::string deviceVariant = "default";
257265
if (device.has_value()) {
258266
std::string deviceStr = std::string(device.value());

0 commit comments

Comments
 (0)