Merge branch 'main' into remove_libtorio

samanklesaria · samanklesaria · commit 11560efe4f2e · 2025-08-19T15:58:15.000Z
diff --git a/.github/workflows/build_docs.yml b/.github/workflows/build_docs.yml
@@ -59,12 +59,6 @@ jobs:
 
         echo "::endgroup::"
         echo "::group::Install PyTorch"
-        # conda install \
-        #   --yes \
-        #   --quiet \
-        #   -c "pytorch-${CHANNEL}" \
-        #   -c nvidia "pytorch-${CHANNEL}"::pytorch[build="*${CU_VERSION}*"] \
-        #   "${CUDATOOLKIT}"
 
         GPU_ARCH_ID=cu126  # This is hard-coded and must be consistent with gpu-arch-version.
         PYTORCH_WHEEL_INDEX="https://download.pytorch.org/whl/${CHANNEL}/${GPU_ARCH_ID}"
@@ -75,19 +69,15 @@ jobs:
         conda install --quiet --yes cmake>=3.18.0 ninja
         pip3 install --progress-bar off -v -e . --no-use-pep517
 
+        # TODO: Need to rely on torchcodec instead of building ffmpeg from source.
         echo "::endgroup::"
         echo "::group::Build FFmpeg"
         .github/scripts/ffmpeg/build_gpu.sh
 
         echo "::endgroup::"
         echo "::group::Install other dependencies"
-        conda install \
-          --quiet --yes \
-          -c conda-forge \
-          sox libvorbis pandoc doxygen pysoundfile
-        pip install --progress-bar off \
-            git+https://github.com/kpu/kenlm/ flashlight-text \
-            -r docs/requirements.txt -r docs/requirements-tutorials.txt
+
+        pip install --progress-bar off -r docs/requirements.txt -r docs/requirements-tutorials.txt
 
         echo "::endgroup::"
         echo "::group::Build documentation"
diff --git a/docs/Makefile b/docs/Makefile
@@ -24,7 +24,7 @@ docset: html
 # Catch-all target: route all unknown targets to Sphinx using the new
 # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
 %: Makefile
-	doxygen source/Doxyfile
+# 	doxygen source/Doxyfile
 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
 	@python post_process_dispatcher.py $(BUILDDIR)
 
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -2,9 +2,6 @@ Jinja2<3.1.0
 matplotlib<=3.8
 pyparsing<3,>=2.0.2
 
-# C++ docs
-breathe==4.34.0
-
 # Note:
 # When changing Sphinx-related packages, make sure that the custom behaviors in the following
 # locations are working as expected.
@@ -17,15 +14,3 @@ sphinxcontrib.katex==0.8.6
 sphinxcontrib.bibtex
 sphinx_gallery==0.11.1
 nbsphinx==0.8.8
-
-# https://github.com/bmcfee/resampy/issues/106
-# Since 2022-07-07 build_docs CI job started to fail.
-# Pinning resampy to 0.2.2 resolves this.
-# The real cause is not know at the moment but the use
-# of librosa seems to cause this
-# https://github.com/bmcfee/resampy/issues/106
-# In our case, the tutorial timed out is online_asr_tutorial,
-# which itself does not use resampy
-# However audio_feature_augmentation_tutorial is executed before that,
-# which uses librosa.
-resampy==0.2.2
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -53,23 +53,13 @@
     "sphinxcontrib.bibtex",
     "sphinx_gallery.gen_gallery",
     "nbsphinx",
-    "breathe",
 ]
 
-breathe_projects = {"libtorio": "cpp/xml"}
-
-breathe_default_project = "libtorio"
-
-breathe_projects_source = {
-    "libtorio": (
-        "../../src/libtorio/ffmpeg/",
-        ["stream_reader/stream_reader.h", "stream_writer/stream_writer.h"],
-    )
-}
 
 nbsphinx_requirejs_path = ""
 
 autodoc_member_order = "bysource"
+autodoc_mock_imports = ['torchaudio.models.decoder']
 
 # katex options
 #
@@ -121,7 +111,7 @@ def _get_pattern():
         }
 
     ret = {"filename_pattern": "tutorial.py"}
-    no_build = r"/examples/tutorials/asr_inference_with_cuda_ctc_decoder_tutorial.py"
+    no_build = r".*ctc_decoder_tutorial.py"
     if os.getenv("GALLERY_PATTERN"):
         # See https://github.com/pytorch/tutorials/blob/cbf2238df0e78d84c15bd94288966d2f4b2e83ae/conf.py#L75-L83
         ret["ignore_pattern"] = r"(/(?!" + re.escape(os.getenv("GALLERY_PATTERN")) + r")[^/]+$)"
diff --git a/src/torchaudio/functional/functional.py b/src/torchaudio/functional/functional.py
@@ -817,7 +817,7 @@ def _get_mask_param(mask_param: int, p: float, axis_length: int) -> int:
 def mask_along_axis_iid(
     specgrams: Tensor,
     mask_param: int,
-    mask_value: float,
+    mask_value: Union[float, Tensor],
     axis: int,
     p: float = 1.0,
 ) -> Tensor:
@@ -874,7 +874,12 @@ def mask_along_axis_iid(
 
     # Per batch example masking
     specgrams = specgrams.transpose(axis, -1)
-    specgrams = specgrams.masked_fill((mask >= mask_start) & (mask < mask_end), mask_value)
+    # this aims to avoid CPU-GPU sync from upstream
+    specgrams = (
+        torch.where((mask >= mask_start) & (mask < mask_end), mask_value.repeat(*specgrams.shape), specgrams)
+        if isinstance(mask_value, Tensor)
+        else specgrams.masked_fill((mask >= mask_start) & (mask < mask_end), mask_value)
+    )
     specgrams = specgrams.transpose(axis, -1)
 
     return specgrams
diff --git a/src/torchaudio/transforms/_transforms.py b/src/torchaudio/transforms/_transforms.py
@@ -1185,7 +1185,7 @@ def __init__(self, mask_param: int, axis: int, iid_masks: bool, p: float = 1.0)
         self.iid_masks = iid_masks
         self.p = p
 
-    def forward(self, specgram: Tensor, mask_value: float = 0.0) -> Tensor:
+    def forward(self, specgram: Tensor, mask_value: Union[float, torch.Tensor] = 0.0) -> Tensor:
         r"""
         Args:
             specgram (Tensor): Tensor of dimension `(..., freq, time)`.
diff --git a/test/torchaudio_unittest/functional/functional_impl.py b/test/torchaudio_unittest/functional/functional_impl.py
@@ -456,6 +456,20 @@ def test_mask_along_axis_iid(self, mask_param, mask_value, axis, p):
         assert mask_specgrams.size() == specgrams.size()
         assert (num_masked_columns < mask_param).sum() == num_masked_columns.numel()
 
+    @parameterized.expand(list(itertools.product([100], [0.0, 30.0], [2, 3], [0.2, 1.0])))
+    def test_mask_along_axis_iid_mask_value(self, mask_param, mask_value, axis, p):
+        specgrams = torch.randn(4, 2, 1025, 400, dtype=self.dtype, device=self.device)
+        mask_value_tensor = torch.tensor(mask_value, dtype=self.dtype, device=self.device)
+        torch.manual_seed(0)
+        # as this operation is random we need to fix the seed for results to match
+        mask_specgrams = F.mask_along_axis_iid(specgrams, mask_param, mask_value_tensor, axis, p=p)
+        torch.manual_seed(0)
+        mask_specgrams_float = F.mask_along_axis_iid(specgrams, mask_param, mask_value, axis, p=p)
+        assert torch.allclose(
+            mask_specgrams, mask_specgrams_float
+        ), f"""Masking with float and tensor should be the same diff = {
+            torch.abs(mask_specgrams - mask_specgrams_float).max()}"""
+
     @parameterized.expand(list(itertools.product([(2, 1025, 400), (1, 201, 100)], [100], [0.0, 30.0], [1, 2])))
     def test_mask_along_axis_preserve(self, shape, mask_param, mask_value, axis):
         """mask_along_axis should not alter original input Tensor