Validate encoder sample rate

NicolasHug · NicolasHug · commit fa9e85f6c422 · 2025-04-07T16:02:10.000+01:00
diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp
@@ -3,15 +3,45 @@
 
 namespace facebook::torchcodec {
 
+namespace {
+
+void validateSampleRate(const AVCodec& avCodec, int sampleRate) {
+  if (avCodec.supported_samplerates == nullptr) {
+    return;
+  }
+
+  for (auto i = 0; avCodec.supported_samplerates[i] != 0; ++i) {
+    if (sampleRate == avCodec.supported_samplerates[i]) {
+      return;
+    }
+  }
+  std::string supportedRates;
+  for (auto i = 0; avCodec.supported_samplerates[i] != 0; ++i) {
+    if (i > 0) {
+      supportedRates += ", ";
+    }
+    supportedRates += std::to_string(avCodec.supported_samplerates[i]);
+  }
+
+  TORCH_CHECK(
+      false,
+      "invalid sample rate=",
+      sampleRate,
+      ". Supported sample rate values are: ",
+      supportedRates);
+}
+
+} // namespace
+
 AudioEncoder::~AudioEncoder() {}
 
 // TODO-ENCODING: disable ffmpeg logs by default
 
 AudioEncoder::AudioEncoder(
     const torch::Tensor wf,
     int sampleRate,
-    std::string_view fileName)
-    : wf_(wf), sampleRate_(sampleRate) {
+    std::string_view fileName,
+    : wf_(wf) {
   TORCH_CHECK(
       wf_.dtype() == torch::kFloat32,
       "waveform must have float32 dtype, got ",
@@ -55,7 +85,8 @@ AudioEncoder::AudioEncoder(
   // TODO-ENCODING Should also let user choose for compressed formats like mp3.
   avCodecContext_->bit_rate = 0;
 
-  avCodecContext_->sample_rate = sampleRate_;
+  validateSampleRate(*avCodec, sampleRate);
+  avCodecContext_->sample_rate = sampleRate;
 
   // Note: This is the format of the **input** waveform. This doesn't determine
   // the output.
diff --git a/src/torchcodec/_core/Encoder.h b/src/torchcodec/_core/Encoder.h
@@ -9,6 +9,10 @@ class AudioEncoder {
 
   AudioEncoder(
       const torch::Tensor wf,
+      // The *output* sample rate. We can't really decide for the user what it
+      // should be. Particularly, the sample rate of the input waveform should
+      // match this, and that's up to the user. If sample rates don't match,
+      // encoding will still work but audio will be distorted.
       int sampleRate,
       std::string_view fileName);
   void encode();
@@ -24,13 +28,5 @@ class AudioEncoder {
   int streamIndex_;
 
   const torch::Tensor wf_;
-  // The *output* sample rate. We can't really decide for the user what it
-  // should be. Particularly, the sample rate of the input waveform should match
-  // this, and that's up to the user. If sample rates don't match, encoding will
-  // still work but audio will be distorted.
-  // We technically could let the user also specify the input sample rate, and
-  // resample the waveform internally to match them, but that's not in scope for
-  // an initial version (if at all).
-  int sampleRate_;
 };
 } // namespace facebook::torchcodec
diff --git a/test/test_ops.py b/test/test_ops.py
@@ -1107,9 +1107,7 @@ def test_bad_input(self, tmp_path):
                 wf=torch.rand(10, 10), sample_rate=10, filename="./file.bad_extension"
             )
 
-        # TODO-ENCODING: raise more informative error message when sample rate
-        # isn't supported
-        with pytest.raises(RuntimeError, match="Invalid argument"):
+        with pytest.raises(RuntimeError, match="invalid sample rate=10"):
             create_audio_encoder(
                 wf=self.decode(NASA_AUDIO_MP3),
                 sample_rate=10,