@@ -56,14 +56,9 @@ void CpuDeviceInterface::initialize(
5656 timeBase_ = timeBase;
5757 outputDims_ = outputDims;
5858
59- // TODO: rationalize comment below with new stuff.
60- // By default, we want to use swscale for color conversion because it is
61- // faster. However, it has width requirements, so we may need to fall back
62- // to filtergraph. We also need to respect what was requested from the
63- // options; we respect the options unconditionally, so it's possible for
64- // swscale's width requirements to be violated. We don't expose the ability to
65- // choose color conversion library publicly; we only use this ability
66- // internally.
59+ // We want to use swscale for color conversion if possible because it is
60+ // faster than filtergraph. The following are the conditions we need to meet
61+ // to use it.
6762
6863 // We can only use swscale when we have a single resize transform. Note that
6964 // this means swscale will not support the case of having several,
@@ -76,12 +71,14 @@ void CpuDeviceInterface::initialize(
7671 // https://stackoverflow.com/questions/74351955/turn-off-sw-scale-conversion-to-planar-yuv-32-byte-alignment-requirements
7772 bool isWidthSwScaleCompatible = (outputDims_.width % 32 ) == 0 ;
7873
74+ // Note that we do not expose this capability in the public API, only through
75+ // the core API.
7976 bool userRequestedSwScale = videoStreamOptions_.colorConversionLibrary ==
8077 ColorConversionLibrary::SWSCALE;
8178
8279 // Note that we treat the transform limitation differently from the width
8380 // limitation. That is, we consider the transforms being compatible with
84- // sws_scale as a hard requirement. If the transforms are not compatiable,
81+ // swscale as a hard requirement. If the transforms are not compatiable,
8582 // then we will end up not applying the transforms, and that is wrong.
8683 //
8784 // The width requirement, however, is a soft requirement. Even if we don't
@@ -94,7 +91,7 @@ void CpuDeviceInterface::initialize(
9491 colorConversionLibrary_ = ColorConversionLibrary::SWSCALE;
9592
9693 // We established above that if the transforms are swscale compatible and
97- // non-empty, then they must have only one transforms , and that transform is
94+ // non-empty, then they must have only one transform , and that transform is
9895 // ResizeTransform.
9996 if (!transforms.empty ()) {
10097 auto resize = dynamic_cast <ResizeTransform*>(transforms[0 ].get ());
@@ -207,7 +204,7 @@ void CpuDeviceInterface::convertAVFrameToFrameOutput(
207204 std::make_unique<FilterGraph>(filtersContext, videoStreamOptions_);
208205 prevFiltersContext_ = std::move (filtersContext);
209206 }
210- outputTensor = toTensor (filterGraphContext_->convert (avFrame));
207+ outputTensor = rgbAVFrameToTensor (filterGraphContext_->convert (avFrame));
211208
212209 // Similarly to above, if this check fails it means the frame wasn't
213210 // reshaped to its expected dimensions by filtergraph.
@@ -256,21 +253,6 @@ int CpuDeviceInterface::convertAVFrameToTensorUsingSwScale(
256253 return resultHeight;
257254}
258255
259- torch::Tensor CpuDeviceInterface::toTensor (const UniqueAVFrame& avFrame) {
260- TORCH_CHECK_EQ (avFrame->format , AV_PIX_FMT_RGB24);
261-
262- int height = avFrame->height ;
263- int width = avFrame->width ;
264- std::vector<int64_t > shape = {height, width, 3 };
265- std::vector<int64_t > strides = {avFrame->linesize [0 ], 3 , 1 };
266- AVFrame* avFrameClone = av_frame_clone (avFrame.get ());
267- auto deleter = [avFrameClone](void *) {
268- UniqueAVFrame avFrameToDelete (avFrameClone);
269- };
270- return torch::from_blob (
271- avFrameClone->data [0 ], shape, strides, deleter, {torch::kUInt8 });
272- }
273-
274256void CpuDeviceInterface::createSwsContext (
275257 const SwsFrameContext& swsFrameContext,
276258 const enum AVColorSpace colorspace) {
0 commit comments