@@ -46,8 +46,7 @@ void CpuDeviceInterface::initializeVideo(
4646 // We calculate this value during initilization but we don't refer to it until
4747 // getColorConversionLibrary() is called. Calculating this value during
4848 // initialization saves us from having to save all of the transforms.
49- areTransformsSwScaleCompatible_ = transforms.empty () ||
50- (transforms.size () == 1 && transforms[0 ]->isResize ());
49+ areTransformsSwScaleCompatible_ = transforms.empty ();
5150
5251 // Note that we do not expose this capability in the public API, only through
5352 // the core API.
@@ -57,16 +56,6 @@ void CpuDeviceInterface::initializeVideo(
5756 userRequestedSwScale_ = videoStreamOptions_.colorConversionLibrary ==
5857 ColorConversionLibrary::SWSCALE;
5958
60- // We can only use swscale when we have a single resize transform. Note that
61- // we actually decide on whether or not to actually use swscale at the last
62- // possible moment, when we actually convert the frame. This is because we
63- // need to know the actual frame dimensions.
64- if (transforms.size () == 1 && transforms[0 ]->isResize ()) {
65- auto resize = dynamic_cast <ResizeTransform*>(transforms[0 ].get ());
66- TORCH_CHECK (resize != nullptr , " ResizeTransform expected but not found!" )
67- swsFlags_ = resize->getSwsFlags ();
68- }
69-
7059 // If we have any transforms, replace filters_ with the filter strings from
7160 // the transforms. As noted above, we decide between swscale and filtergraph
7261 // when we actually decode a frame.
@@ -83,7 +72,7 @@ void CpuDeviceInterface::initializeVideo(
8372 // Note that we ensure that the transforms come BEFORE the format
8473 // conversion. This means that the transforms are applied in the frame's
8574 // original pixel format and colorspace.
86- filters_ = filters. str () + " ," + filters_ ;
75+ filters_ += " ," + filters. str () ;
8776 }
8877
8978 initialized_ = true ;
@@ -221,6 +210,11 @@ int CpuDeviceInterface::convertAVFrameToTensorUsingSwScale(
221210 enum AVPixelFormat frameFormat =
222211 static_cast <enum AVPixelFormat>(avFrame->format );
223212
213+ TORCH_CHECK (
214+ avFrame->height == outputDims.height &&
215+ avFrame->width == outputDims.width ,
216+ " Input dimensions are not equal to output dimensions; resize for sws_scale() is not yet supported." );
217+
224218 // We need to compare the current frame context with our previous frame
225219 // context. If they are different, then we need to re-create our colorspace
226220 // conversion objects. We create our colorspace conversion objects late so
@@ -237,7 +231,11 @@ int CpuDeviceInterface::convertAVFrameToTensorUsingSwScale(
237231
238232 if (!swsContext_ || prevSwsFrameContext_ != swsFrameContext) {
239233 swsContext_ = createSwsContext (
240- swsFrameContext, avFrame->colorspace , AV_PIX_FMT_RGB24, swsFlags_);
234+ swsFrameContext,
235+ avFrame->colorspace ,
236+ /* outputFormat=*/ AV_PIX_FMT_RGB24,
237+ /* swsFlags=*/ 0 ); // We don't set any flags because we don't yet use
238+ // sws_scale() for resizing.
241239 prevSwsFrameContext_ = swsFrameContext;
242240 }
243241
0 commit comments