Better comments

scotts · scotts · commit 2a391b60182c · 2025-10-31T19:38:15.000-07:00
diff --git a/src/torchcodec/_core/CpuDeviceInterface.cpp b/src/torchcodec/_core/CpuDeviceInterface.cpp
@@ -69,9 +69,9 @@ void CpuDeviceInterface::initializeVideo(
     first = false;
   }
   if (!transforms.empty()) {
-    // Note that we ensure that the transforms come BEFORE the format
-    // conversion. This means that the transforms are applied in the frame's
-    // original pixel format and colorspace.
+    // Note that we ensure that the transforms come AFTER the format conversion.
+    // This means that the transforms are applied in the output pixel format and
+    // colorspace.
     filters_ += "," + filters.str();
   }
 
diff --git a/src/torchcodec/_core/CpuDeviceInterface.h b/src/torchcodec/_core/CpuDeviceInterface.h
@@ -93,33 +93,18 @@ class CpuDeviceInterface : public DeviceInterface {
   // initialization, we convert the user-supplied transforms into this string of
   // filters.
   //
-  // TODO: make sure Scott corrects the below:
   // Note that we start with just the format conversion, and then we ensure that
-  // the user-supplied filters always happen BEFORE the format conversion. We
-  // want the user-supplied filters to operate on frames in their original pixel
+  // the user-supplied filters always happen AFTER the format conversion. We
+  // want the user-supplied filters to operate on frames in the output pixel
   // format and colorspace.
   //
-  // The reason why is not obvious: when users do not need to perform any
-  // transforms, or the only transform they apply is a single resize, we can
-  // sometimes just call swscale directly; see getColorConversionLibrary() for
-  // the full conditions. A single call to swscale's sws_scale() will always do
-  // the scaling (resize) in the frame's original pixel format and colorspace.
-  // In order for calling swscale directly to be an optimization, we must make
-  // sure that the behavior between calling it directly and using filtergraph
-  // is identical.
+  // We apply the transforms on the output pixel format and colorspace because
+  // then decoder-native transforms are as close as possible to returning
+  // untransformed frames and applying TochVision transforms to them.
   //
-  // If we had to apply transforms in the output pixel format and colorspace,
-  // we could achieve that by calling sws_scale() twice: once to do the resize
-  // and another time to do the format conversion. But that will be slower,
-  // which goes against the whole point of calling sws_scale() directly.
-  //
-  // Further note that we also configure the sink node of the filtergraph to
-  // be AV_PIX_FMT_RGB24. However, the explicit format conversion in the
-  // filters is not redundant. Filtergraph will automatically insert scale
-  // filters that will change the resolution and format of frames to meet the
-  // requirements of downstream filters. If we don't put an explicit format
-  // conversion to rgb24 at the end, filtergraph may automatically insert format
-  // conversions before our filters.
+  // We ensure that the transforms happen on the output pixel format and
+  // colorspace by making sure all of the user-supplied filters happen AFTER
+  // an explicit format conversion.
   std::string filters_ = "format=rgb24";
 
   // The flags we supply to swsContext_, if it used. The flags control the

Original file line number	Diff line number	Diff line change
`@@ -69,9 +69,9 @@ void CpuDeviceInterface::initializeVideo(`
`69`	`69`	`first = false;`
`70`	`70`	`}`
`71`	`71`	`if (!transforms.empty()) {`
`72`		`- // Note that we ensure that the transforms come BEFORE the format`
`73`		`- // conversion. This means that the transforms are applied in the frame's`
`74`		`- // original pixel format and colorspace.`
	`72`	`+ // Note that we ensure that the transforms come AFTER the format conversion.`
	`73`	`+ // This means that the transforms are applied in the output pixel format and`
	`74`	`+ // colorspace.`
`75`	`75`	`filters_ += "," + filters.str();`
`76`	`76`	`}`
`77`	`77`