Skip to content

Commit 4269832

Browse files
authored
* Fix accuracy and latency issues with FFmpegFrameGrabber.setVideoFrameNumber() (pull #1734)
1 parent 97aab27 commit 4269832

File tree

4 files changed

+62
-30
lines changed

4 files changed

+62
-30
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11

2+
* Fix accuracy and latency issues with `FFmpegFrameGrabber.setVideoFrameNumber()` ([pull #1734](https://github.com/bytedeco/javacv/pull/1734))
23
* Add new `Frame.pictType` field set to `I`, `P`, `B`, etc by `FFmpegFrameGrabber` ([pull #1730](https://github.com/bytedeco/javacv/pull/1730))
34
* Set metadata for `AVFrame.opaque` in `FFmpegFrameGrabber` with call to `av_frame_copy_props()` ([issue #1729](https://github.com/bytedeco/javacv/issues/1729))
45
* Add `charset` property to `FrameGrabber` and `FrameRecorder` to use for metadata from FFmpeg ([pull #1720](https://github.com/bytedeco/javacv/pull/1720))

src/main/java/org/bytedeco/javacv/FFmpegFrameGrabber.java

Lines changed: 51 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,7 @@ static class SeekCallback extends Seek_Pointer_long_int {
385385
private int samples_channels, samples_format, samples_rate;
386386
private boolean frameGrabbed;
387387
private Frame frame;
388+
private int[] streams;
388389

389390
private volatile boolean started = false;
390391

@@ -605,23 +606,23 @@ public double getVideoFrameRate() {
605606
/** default override of super.setFrameNumber implies setting
606607
* of a frame close to a video frame having that number */
607608
@Override public void setFrameNumber(int frameNumber) throws Exception {
608-
if (hasVideo()) setTimestamp(Math.round(1000000L * frameNumber / getFrameRate()));
609+
if (hasVideo()) setTimestamp((long)Math.floor(1000000L * frameNumber / getFrameRate()));
609610
else super.frameNumber = frameNumber;
610611
}
611612

612613
/** if there is video stream tries to seek to video frame with corresponding timestamp
613614
* otherwise sets super.frameNumber only because frameRate==0 if there is no video stream */
614615
public void setVideoFrameNumber(int frameNumber) throws Exception {
615616
// best guess, AVSEEK_FLAG_FRAME has not been implemented in FFmpeg...
616-
if (hasVideo()) setVideoTimestamp(Math.round(1000000L * frameNumber / getFrameRate()));
617+
if (hasVideo()) setVideoTimestamp((long)Math.floor(1000000L * frameNumber / getFrameRate()));
617618
else super.frameNumber = frameNumber;
618619
}
619620

620621
/** if there is audio stream tries to seek to audio frame with corresponding timestamp
621622
* ignoring otherwise */
622623
public void setAudioFrameNumber(int frameNumber) throws Exception {
623624
// best guess, AVSEEK_FLAG_FRAME has not been implemented in FFmpeg...
624-
if (hasAudio()) setAudioTimestamp(Math.round(1000000L * frameNumber / getAudioFrameRate()));
625+
if (hasAudio()) setAudioTimestamp((long)Math.floor(1000000L * frameNumber / getAudioFrameRate()));
625626

626627
}
627628

@@ -755,9 +756,14 @@ else if (frameTypesToSeek.contains(Frame.Type.AUDIO)) {
755756
else if (seekFrame.samples != null && samples_frame != null && getSampleRate() > 0) {
756757
frameDuration = AV_TIME_BASE * samples_frame.nb_samples() / (double)getSampleRate();
757758
}
759+
// if(frameDuration>0.0) {
760+
// maxSeekSteps = (long)(10*(timestamp - initialSeekPosition - frameDuration)/frameDuration);
761+
// if (maxSeekSteps<0) maxSeekSteps = 0;
762+
// }
758763
if(frameDuration>0.0) {
759-
maxSeekSteps = (long)(10*(timestamp - initialSeekPosition - frameDuration)/frameDuration);
760-
if (maxSeekSteps<0) maxSeekSteps = 0;
764+
maxSeekSteps = 0; //no more grab if the distance to the requested timestamp is smaller than frameDuration
765+
if (timestamp - initialSeekPosition + 1 > frameDuration) //allow for a rounding error
766+
maxSeekSteps = (long)(10*(timestamp - initialSeekPosition)/frameDuration);
761767
}
762768
else if (initialSeekPosition < timestamp) maxSeekSteps = 1000;
763769

@@ -768,7 +774,7 @@ else if (seekFrame.samples != null && samples_frame != null && getSampleRate() >
768774
if (seekFrame == null) return; //is it better to throw NullPointerException?
769775

770776
count++;
771-
double ts=this.timestamp;
777+
double ts=seekFrame.timestamp;
772778
frameDuration = 0.0;
773779
if (seekFrame.image != null && this.getFrameRate() > 0)
774780
frameDuration = AV_TIME_BASE / (double)getFrameRate();
@@ -933,10 +939,12 @@ public synchronized void startUnsafe(boolean findStreamInfo) throws Exception {
933939
video_st = audio_st = null;
934940
AVCodecParameters video_par = null, audio_par = null;
935941
int nb_streams = oc.nb_streams();
942+
streams = new int[nb_streams];
936943
for (int i = 0; i < nb_streams; i++) {
937944
AVStream st = oc.streams(i);
938945
// Get a pointer to the codec context for the video or audio stream
939946
AVCodecParameters par = st.codecpar();
947+
streams[i] = par.codec_type();
940948
if (video_st == null && par.codec_type() == AVMEDIA_TYPE_VIDEO && (videoStream < 0 || videoStream == i)) {
941949
video_st = st;
942950
video_par = par;
@@ -1294,7 +1302,7 @@ public synchronized Frame grabFrame(boolean doAudio, boolean doVideo, boolean do
12941302

12951303
if (oc == null || oc.isNull()) {
12961304
throw new Exception("Could not grab: No AVFormatContext. (Has start() been called?)");
1297-
} else if ((!doVideo || video_st == null) && (!doAudio || audio_st == null)) {
1305+
} else if ((!doVideo || video_st == null) && (!doAudio || audio_st == null) && !doData) {
12981306
return null;
12991307
}
13001308
if (!started) {
@@ -1303,19 +1311,8 @@ public synchronized Frame grabFrame(boolean doAudio, boolean doVideo, boolean do
13031311

13041312
boolean videoFrameGrabbed = frameGrabbed && frame.image != null;
13051313
boolean audioFrameGrabbed = frameGrabbed && frame.samples != null;
1314+
boolean dataFrameGrabbed = frameGrabbed && frame.data != null;
13061315
frameGrabbed = false;
1307-
frame.keyFrame = false;
1308-
frame.imageWidth = 0;
1309-
frame.imageHeight = 0;
1310-
frame.imageDepth = 0;
1311-
frame.imageChannels = 0;
1312-
frame.imageStride = 0;
1313-
frame.image = null;
1314-
frame.sampleRate = 0;
1315-
frame.audioChannels = 0;
1316-
frame.samples = null;
1317-
frame.data = null;
1318-
frame.opaque = null;
13191316
if (doVideo && videoFrameGrabbed) {
13201317
if (doProcessing) {
13211318
processImage();
@@ -1328,7 +1325,24 @@ public synchronized Frame grabFrame(boolean doAudio, boolean doVideo, boolean do
13281325
}
13291326
frame.keyFrame = samples_frame.key_frame() != 0;
13301327
return frame;
1328+
} else if (doData && dataFrameGrabbed) {
1329+
return frame;
13311330
}
1331+
1332+
frame.keyFrame = false;
1333+
frame.imageWidth = 0;
1334+
frame.imageHeight = 0;
1335+
frame.imageDepth = 0;
1336+
frame.imageChannels = 0;
1337+
frame.imageStride = 0;
1338+
frame.image = null;
1339+
frame.sampleRate = 0;
1340+
frame.audioChannels = 0;
1341+
frame.samples = null;
1342+
frame.data = null;
1343+
frame.opaque = null;
1344+
frame.type = null;
1345+
13321346
boolean done = false;
13331347
boolean readPacket = pkt.stream_index() == -1;
13341348
while (!done) {
@@ -1355,7 +1369,7 @@ public synchronized Frame grabFrame(boolean doAudio, boolean doVideo, boolean do
13551369
frame.streamIndex = pkt.stream_index();
13561370

13571371
// Is this a packet from the video stream?
1358-
if (doVideo && video_st != null && pkt.stream_index() == video_st.index()
1372+
if (doVideo && video_st != null && frame.streamIndex == video_st.index()
13591373
&& (!keyFrames || pkt.flags() == AV_PKT_FLAG_KEY)) {
13601374
// Decode video frame
13611375
if (readPacket) {
@@ -1393,7 +1407,7 @@ public synchronized Frame grabFrame(boolean doAudio, boolean doVideo, boolean do
13931407
AVRational time_base = video_st.time_base();
13941408
timestamp = 1000000L * pts * time_base.num() / time_base.den();
13951409
// best guess, AVCodecContext.frame_number = number of decoded frames...
1396-
frameNumber = (int)Math.round(timestamp * getFrameRate() / 1000000L);
1410+
frameNumber = (int)Math.floor(timestamp * getFrameRate() / 1000000L);
13971411
frame.image = image_buf;
13981412
if (doProcessing) {
13991413
processImage();
@@ -1404,9 +1418,10 @@ public synchronized Frame grabFrame(boolean doAudio, boolean doVideo, boolean do
14041418
frame.timestamp = timestamp;
14051419
frame.keyFrame = picture.key_frame() != 0;
14061420
frame.pictType = (char)av_get_picture_type_char(picture.pict_type());
1421+
frame.type = Frame.Type.VIDEO;
14071422
}
14081423
}
1409-
} else if (doAudio && audio_st != null && pkt.stream_index() == audio_st.index()) {
1424+
} else if (doAudio && audio_st != null && frame.streamIndex == audio_st.index()) {
14101425
// Decode audio frame
14111426
if (readPacket) {
14121427
ret = avcodec_send_packet(audio_c, pkt);
@@ -1440,15 +1455,24 @@ public synchronized Frame grabFrame(boolean doAudio, boolean doVideo, boolean do
14401455
done = true;
14411456
frame.timestamp = timestamp;
14421457
frame.keyFrame = samples_frame.key_frame() != 0;
1458+
frame.type = Frame.Type.AUDIO;
14431459
}
1444-
} else if (doData) {
1445-
if (!readPacket) {
1446-
readPacket = true;
1447-
continue;
1448-
}
1460+
} else if (readPacket && doData
1461+
&& frame.streamIndex > -1 && frame.streamIndex < streams.length
1462+
&& streams[frame.streamIndex] != AVMEDIA_TYPE_VIDEO && streams[frame.streamIndex] != AVMEDIA_TYPE_AUDIO) {
14491463
// Export the stream byte data for non audio / video frames
14501464
frame.data = pkt.data().position(0).capacity(pkt.size()).asByteBuffer();
1465+
frame.opaque = pkt;
14511466
done = true;
1467+
switch (streams[frame.streamIndex]) {
1468+
case AVMEDIA_TYPE_DATA: frame.type = Frame.Type.DATA; break;
1469+
case AVMEDIA_TYPE_SUBTITLE: frame.type = Frame.Type.SUBTITLE; break;
1470+
case AVMEDIA_TYPE_ATTACHMENT: frame.type = Frame.Type.ATTACHMENT; break;
1471+
default: frame.type = null;
1472+
}
1473+
} else {
1474+
// Current packet is not needed (different stream index required)
1475+
readPacket = true;
14521476
}
14531477
}
14541478
return frame;

src/main/java/org/bytedeco/javacv/FFmpegFrameRecorder.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1302,7 +1302,7 @@ private boolean record(AVFrame frame) throws Exception {
13021302

13031303
private void writePacket(int mediaType, AVPacket avPacket) throws Exception {
13041304

1305-
AVStream avStream = (mediaType == AVMEDIA_TYPE_VIDEO) ? audio_st : (mediaType == AVMEDIA_TYPE_AUDIO) ? video_st : null;
1305+
AVStream avStream = (mediaType == AVMEDIA_TYPE_VIDEO) ? video_st : (mediaType == AVMEDIA_TYPE_AUDIO) ? audio_st : null;
13061306
String mediaTypeStr = (mediaType == AVMEDIA_TYPE_VIDEO) ? "video" : (mediaType == AVMEDIA_TYPE_AUDIO) ? "audio" : "unsupported media stream type";
13071307

13081308
synchronized (oc) {

src/main/java/org/bytedeco/javacv/Frame.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,13 @@ public class Frame implements AutoCloseable, Indexable {
7575
DEPTH_FLOAT = 32,
7676
DEPTH_DOUBLE = 64;
7777

78-
/** Constants defining data type in the frame*/
78+
/** Constants defining data type in the frame. */
7979
public static enum Type {
8080
VIDEO,
8181
AUDIO,
82-
DATA
82+
DATA,
83+
SUBTITLE,
84+
ATTACHMENT
8385
}
8486

8587
/** Information associated with the {@link #image} field. */
@@ -104,6 +106,9 @@ public static enum Type {
104106
/** Stream number the audio|video|other data is associated with. */
105107
public int streamIndex;
106108

109+
/** The type of the stream. */
110+
public Type type;
111+
107112
/** The underlying data object, for example, Pointer, AVFrame, IplImage, or Mat. */
108113
public Object opaque;
109114

@@ -132,6 +137,7 @@ public Frame(int width, int height, int depth, int channels, int imageStride) {
132137
this.image = new Buffer[1];
133138
this.data = null;
134139
this.streamIndex = -1;
140+
this.type = null;
135141

136142
Pointer pointer = new BytePointer(imageHeight * imageStride * pixelSize(depth));
137143
ByteBuffer buffer = pointer.asByteBuffer();
@@ -222,6 +228,7 @@ public Frame clone() {
222228
newFrame.keyFrame = keyFrame;
223229
newFrame.pictType = pictType;
224230
newFrame.streamIndex = streamIndex;
231+
newFrame.type = type;
225232
newFrame.opaque = new Pointer[3];
226233
if (image != null) {
227234
newFrame.image = new Buffer[image.length];

0 commit comments

Comments
 (0)