Skip to content

Commit b7e99de

Browse files
committed
[common] Allow FrameTimecode to be created from Timecode
This allows us to keep the existing VideoStream interface but still have backends provide precise timing information. For now this is gated under a hard-coded feature flag as it still needs to be integrated with the SceneDetector interface.
1 parent c7c82a1 commit b7e99de

File tree

8 files changed

+139
-180
lines changed

8 files changed

+139
-180
lines changed

docs/api.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ Most types/functions are also available directly from the `scenedetect` package
4646

4747
.. code:: python
4848
49-
scenedetect<0.7
49+
scenedetect<0.8
5050
5151
5252
.. _scenedetect-quickstart:

scenedetect/backends/opencv.py

Lines changed: 17 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
from scenedetect.video_stream import (
3333
FrameRateUnavailable,
3434
SeekError,
35-
VideoFrame,
3635
VideoOpenFailure,
3736
VideoStream,
3837
)
@@ -47,6 +46,8 @@
4746
" ! ", # gstreamer pipe
4847
)
4948

49+
_USE_PTS_IN_DEVELOPMENT = False
50+
5051

5152
def _get_aspect_ratio(cap: cv2.VideoCapture, epsilon: float = 0.0001) -> float:
5253
"""Display/pixel aspect ratio of the VideoCapture as a float (1.0 represents square pixels)."""
@@ -195,6 +196,16 @@ def aspect_ratio(self) -> float:
195196
"""Display/pixel aspect ratio as a float (1.0 represents square pixels)."""
196197
return _get_aspect_ratio(self._cap)
197198

199+
@property
200+
def timecode(self) -> Timecode:
201+
"""Current position within stream as a Timecode. This is not frame accurate."""
202+
# *NOTE*: Although OpenCV has `CAP_PROP_PTS`, it doesn't seem to be reliable. For now, we
203+
# use `CAP_PROP_POS_MSEC` instead, with a time base of 1/1000. Unfortunately this means that
204+
# rounding errors will affect frame accuracy with this backend.
205+
pts = self._cap.get(cv2.CAP_PROP_POS_MSEC)
206+
time_base = Fraction(1, 1000)
207+
return Timecode(pts=round(pts), time_base=time_base)
208+
198209
@property
199210
def position(self) -> FrameTimecode:
200211
"""Current position within stream as FrameTimecode.
@@ -204,6 +215,8 @@ def position(self) -> FrameTimecode:
204215
205216
This method will always return 0 (e.g. be equal to `base_timecode`) if no frames
206217
have been `read`."""
218+
if _USE_PTS_IN_DEVELOPMENT:
219+
return FrameTimecode(timecode=self.timecode, fps=self.frame_rate)
207220
if self.frame_number < 1:
208221
return self.base_timecode
209222
return self.base_timecode + (self.frame_number - 1)
@@ -272,30 +285,6 @@ def reset(self):
272285
self._cap.release()
273286
self._open_capture(self._frame_rate)
274287

275-
def __next__(self):
276-
# NOTE: POS_FRAMES starts from 0 before any frames are read.
277-
read, image = self._cap.read()
278-
if not read:
279-
raise StopIteration()
280-
# We can only query CAP_PROP_PTS if this uses the ffmpeg backend, however it doesn't seem
281-
# to work correctly. Quite frequently consecutive frames return the same PTS. We might need
282-
# to just abandon using PTS with OpenCV and rely on milliseconds. This will still result
283-
# in occasional off-by-one errors for VFR videos, but better than the status quo.
284-
#
285-
# We should also add a config option so users can specify if OpenCV should use fixed or
286-
# variable timing (i.e. if we should use CAP_PROP_POS_MSEC or CAP_PROP_POS_FRAMES for
287-
# timestamp calculation).
288-
USE_PTS = False
289-
if USE_PTS:
290-
pts = self._cap.get(cv2.CAP_PROP_PTS)
291-
time_base = Fraction.from_float(self._cap.get(cv2.CAP_PROP_FPS))
292-
time_base = Fraction(numerator=time_base.denominator, denominator=time_base.numerator)
293-
else:
294-
pts = self._cap.get(cv2.CAP_PROP_POS_MSEC)
295-
time_base = Fraction(1, 1000)
296-
timecode = Timecode(pts=round(pts), time_base=time_base)
297-
return VideoFrame(image=image, timecode=timecode)
298-
299288
def read(self, decode: bool = True, advance: bool = True) -> ty.Union[np.ndarray, bool]:
300289
"""Read and decode the next frame as a np.ndarray. Returns False when video ends,
301290
or the maximum number of decode attempts has passed.
@@ -490,6 +479,8 @@ def frame_size(self) -> ty.Tuple[int, int]:
490479
@property
491480
def duration(self) -> ty.Optional[FrameTimecode]:
492481
"""Duration of the stream as a FrameTimecode, or None if non terminating."""
482+
# TODO(v0.7): This will be incorrect for VFR. See if there is another property we can use
483+
# to estimate the video length correctly.
493484
frame_count = math.trunc(self._cap.get(cv2.CAP_PROP_FRAME_COUNT))
494485
if frame_count > 0:
495486
return self.base_timecode + frame_count
@@ -508,6 +499,7 @@ def position(self) -> FrameTimecode:
508499
509500
This method will always return 0 (e.g. be equal to `base_timecode`) if no frames
510501
have been `read`."""
502+
511503
if self.frame_number < 1:
512504
return self.base_timecode
513505
return self.base_timecode + (self.frame_number - 1)

scenedetect/backends/pyav.py

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,14 @@
2020
from scenedetect.common import Timecode
2121
from scenedetect.frame_timecode import MAX_FPS_DELTA, FrameTimecode
2222
from scenedetect.platform import get_file_name
23-
from scenedetect.video_stream import FrameRateUnavailable, VideoFrame, VideoOpenFailure, VideoStream
23+
from scenedetect.video_stream import FrameRateUnavailable, VideoOpenFailure, VideoStream
2424

2525
logger = getLogger("pyscenedetect")
2626

2727
VALID_THREAD_MODES = ["NONE", "SLICE", "FRAME", "AUTO"]
2828

29+
_USE_PTS_IN_DEVELOPMENT = False
30+
2931

3032
class VideoStreamAv(VideoStream):
3133
"""PyAV `av.InputContainer` backend."""
@@ -80,7 +82,7 @@ def __init__(
8082

8183
self._name = "" if name is None else name
8284
self._path = ""
83-
self._frame = None
85+
self._frame: ty.Optional[av.VideoFrame] = None
8486
self._reopened = True
8587

8688
if threading_mode:
@@ -183,6 +185,9 @@ def position(self) -> FrameTimecode:
183185
184186
This can be interpreted as presentation time stamp, thus frame 1 corresponds
185187
to the presentation time 0. Returns 0 even if `frame_number` is 1."""
188+
if _USE_PTS_IN_DEVELOPMENT:
189+
timecode = Timecode(pts=self._frame.pts, time_base=self._frame.time_base)
190+
return FrameTimecode(timecode=timecode, fps=self.frame_rate)
186191
if self._frame is None:
187192
return self.base_timecode
188193
return FrameTimecode(round(self._frame.time * self.frame_rate), self.frame_rate)
@@ -264,19 +269,6 @@ def reset(self):
264269
except Exception as ex:
265270
raise VideoOpenFailure() from ex
266271

267-
def __next__(self) -> VideoFrame:
268-
# TODO: On the VFR test video, we seem to only decode 1979 frames instead of 1980. See what
269-
# the issue could be.
270-
try:
271-
frame = next(self._container.decode(video=0))
272-
except av.error.EOFError as ex:
273-
if not self._handle_eof():
274-
raise StopIteration() from ex
275-
return next(self) # *NOTE*: self._handle_eof must ensure we won't recurse again.
276-
image = frame.to_ndarray(format="bgr24")
277-
timecode = Timecode(pts=frame.pts, time_base=frame.time_base)
278-
return VideoFrame(image=image, timecode=timecode)
279-
280272
def read(self, decode: bool = True, advance: bool = True) -> ty.Union[np.ndarray, bool]:
281273
"""Read and decode the next frame as a np.ndarray. Returns False when video ends.
282274

0 commit comments

Comments
 (0)