|
| 1 | +import re |
1 | 2 | import gc
|
2 | 3 | import torch
|
3 | 4 | import numpy as np
|
@@ -68,18 +69,34 @@ def _read_from_stream(container, start_offset, end_offset, stream, stream_name):
|
68 | 69 | should_buffer = False
|
69 | 70 | max_buffer_size = 5
|
70 | 71 | if stream.type == "video":
|
71 |
| - # TODO consider also using stream.codec_context.codec.reorder |
72 |
| - # videos with b frames can have out-of-order pts |
| 72 | + # DivX-style packed B-frames can have out-of-order pts (2 frames in a single pkt) |
73 | 73 | # so need to buffer some extra frames to sort everything
|
74 | 74 | # properly
|
75 |
| - should_buffer = stream.codec_context.has_b_frames |
| 75 | + extradata = stream.codec_context.extradata |
| 76 | + # overly complicated way of finding if `divx_packed` is set, following |
| 77 | + # https://github.com/FFmpeg/FFmpeg/commit/d5a21172283572af587b3d939eba0091484d3263 |
| 78 | + if extradata and b"DivX" in extradata: |
| 79 | + # can't use regex directly because of some weird characters sometimes... |
| 80 | + pos = extradata.find(b"DivX") |
| 81 | + d = extradata[pos:] |
| 82 | + o = re.search(br"DivX(\d+)Build(\d+)(\w)", d) |
| 83 | + if o is None: |
| 84 | + o = re.search(br"DivX(\d+)b(\d+)(\w)", d) |
| 85 | + if o is not None: |
| 86 | + should_buffer = o.group(3) == b"p" |
76 | 87 | seek_offset = start_offset
|
| 88 | + # some files don't seek to the right location, so better be safe here |
| 89 | + seek_offset = max(seek_offset - 1, 0) |
77 | 90 | if should_buffer:
|
78 | 91 | # FIXME this is kind of a hack, but we will jump to the previous keyframe
|
79 | 92 | # so this will be safe
|
80 | 93 | seek_offset = max(seek_offset - max_buffer_size, 0)
|
81 |
| - # TODO check if stream needs to always be the video stream here or not |
82 |
| - container.seek(seek_offset, any_frame=False, backward=True, stream=stream) |
| 94 | + try: |
| 95 | + # TODO check if stream needs to always be the video stream here or not |
| 96 | + container.seek(seek_offset, any_frame=False, backward=True, stream=stream) |
| 97 | + except av.AVError: |
| 98 | + print("Corrupted file?", container.name) |
| 99 | + return [] |
83 | 100 | buffer_count = 0
|
84 | 101 | for idx, frame in enumerate(container.decode(**stream_name)):
|
85 | 102 | frames[frame.pts] = frame
|
|
0 commit comments