Allow output tensor re-allocation

NicolasHug · NicolasHug · commit 42c6373da815 · 2025-04-14T14:54:30.000+01:00
diff --git a/src/torchcodec/_core/AVIOBytesContext.cpp b/src/torchcodec/_core/AVIOBytesContext.cpp
@@ -68,16 +68,32 @@ int64_t AVIOBytesContext::seek(void* opaque, int64_t offset, int whence) {
 }
 
 AVIOToTensorContext::AVIOToTensorContext()
-    : dataContext_{torch::empty({OUTPUT_TENSOR_SIZE}, {torch::kUInt8}), 0} {
+    : dataContext_{torch::empty({INITIAL_TENSOR_SIZE}, {torch::kUInt8}), 0} {
   createAVIOContext(nullptr, &write, &seek, &dataContext_);
 }
 
 // The signature of this function is defined by FFMPEG.
 int AVIOToTensorContext::write(void* opaque, uint8_t* buf, int buf_size) {
   auto dataContext = static_cast<DataContext*>(opaque);
+
+  if (dataContext->current + buf_size > dataContext->outputTensor.numel()) {
+    TORCH_CHECK(
+        dataContext->outputTensor.numel() * 2 <= MAX_TENSOR_SIZE,
+        "We tried to allocate an output encoded tensor larger than ",
+        MAX_TENSOR_SIZE,
+        " bytes. If you think this should be supported, please report.");
+
+    // We double the size of the outpout tensor. Calling cat() may not be the
+    // most efficient, but it's simple.
+    dataContext->outputTensor =
+        torch::cat({dataContext->outputTensor, dataContext->outputTensor});
+  }
+
   TORCH_CHECK(
-      dataContext->current + buf_size <= OUTPUT_TENSOR_SIZE,
-      "Can't encode more, output tensor needs to be re-allocated and this isn't supported yet.");
+      dataContext->current + buf_size <= dataContext->outputTensor.numel(),
+      "Re-allocation of the output tensor didn't work. ",
+      "This should not happen, please report on TorchCodec bug tracker");
+
   uint8_t* outputTensorData = dataContext->outputTensor.data_ptr<uint8_t>();
   std::memcpy(outputTensorData + dataContext->current, buf, buf_size);
   dataContext->current += static_cast<int64_t>(buf_size);
diff --git a/src/torchcodec/_core/AVIOBytesContext.h b/src/torchcodec/_core/AVIOBytesContext.h
@@ -42,7 +42,8 @@ class AVIOToTensorContext : public AVIOContextHolder {
     int64_t current;
   };
 
-  static const int OUTPUT_TENSOR_SIZE = 5'000'000; // TODO-ENCODING handle this
+  static const int INITIAL_TENSOR_SIZE = 10'000'000; // 10MB
+  static const int MAX_TENSOR_SIZE = 320'000'000; // 320 MB
   static int write(void* opaque, uint8_t* buf, int buf_size);
   // We need to expose seek() for some formats like mp3.
   static int64_t seek(void* opaque, int64_t offset, int whence);
diff --git a/test/test_ops.py b/test/test_ops.py
@@ -1224,6 +1224,23 @@ def test_tensor_against_file(self, asset, bit_rate, output_format, tmp_path):
             self.decode(encoded_file), self.decode(encoded_tensor)
         )
 
+    def test_encode_to_tensor_long_output(self):
+        # Check that we support re-allocating the output tensor when the encoded
+        # data is large.
+        samples = torch.rand(1, int(1e7))
+        encoded_tensor = encode_audio_to_tensor(
+            wf=samples,
+            sample_rate=16_000,
+            format="flac",
+            bit_rate=44_000,
+        )
+        # Note: this should be in sync with its C++ counterpart for the test to
+        # be meaningful.
+        INITIAL_TENSOR_SIZE = 10_000_000
+        assert encoded_tensor.numel() > INITIAL_TENSOR_SIZE
+
+        torch.testing.assert_close(self.decode(encoded_tensor), samples)
+
 
 if __name__ == "__main__":
     pytest.main()