diff --git a/CMakeLists.txt b/CMakeLists.txt
index ddc6dc15a2..6fada209fe 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -166,10 +166,6 @@ else()
 endif()
 
 add_subdirectory(src/libtorchaudio)
-if (BUILD_SOX)
-  add_subdirectory(third_party/sox)
-  add_subdirectory(src/libtorchaudio/sox)
-endif()
 if (USE_FFMPEG)
   if (DEFINED ENV{FFMPEG_ROOT})
     add_subdirectory(third_party/ffmpeg/single)
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 819f72d813..785341c363 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -102,7 +102,6 @@ model implementations and application components.
    models
    models.decoder
    pipelines
-   sox_effects
    compliance.kaldi
    kaldi_io
    utils
diff --git a/docs/source/installation.rst b/docs/source/installation.rst
index cb0fa190b8..91136d52dd 100644
--- a/docs/source/installation.rst
+++ b/docs/source/installation.rst
@@ -70,27 +70,6 @@ Optional Dependencies
      files you installed follow this naming scheme, (and then make sure
      that they are in one of the directories listed in library search path.)
 
-* `SoX <https://sox.sourceforge.net/>`__
-
-  Required to use ``backend="sox"`` in `I/O functions <./torchaudio.html#i-o>`__.
-
-  Starting version 2.1, TorchAudio requires separately installed libsox.
-
-  If dynamic linking is causing an issue, you can set the environment variable
-  ``TORCHAUDIO_USE_SOX=0``, and TorchAudio won't use SoX.
-
-  .. note::
-
-     TorchAudio looks for a library file with unversioned name, that is ``libsox.so``
-     for Linux, and ``libsox.dylib`` for macOS. Some package managers install the library
-     file with different name. For example, aptitude on Ubuntu installs ``libsox.so.3``.
-     To have TorchAudio link against it, you can create a symbolic link to it with name
-     ``libsox.so`` (and put the symlink in a library search path).
-
-  .. note::
-     TorchAudio is tested on libsox 14.4.2. (And it is unlikely that other
-     versions would work.)
-
 * `SoundFile <https://pypi.org/project/PySoundFile/>`__
 
   Required to use ``backend="soundfile"`` in `I/O functions <./torchaudio.html#i-o>`__.
diff --git a/docs/source/sox_effects.rst b/docs/source/sox_effects.rst
deleted file mode 100644
index a8ee260144..0000000000
--- a/docs/source/sox_effects.rst
+++ /dev/null
@@ -1,34 +0,0 @@
-.. py:module:: torchaudio.sox_effects
-
-torchaudio.sox_effects
-======================
-
-.. currentmodule:: torchaudio.sox_effects
-
-.. warning::
-    Starting with version 2.8, we are refactoring TorchAudio to transition it
-    into a maintenance phase. As a result, the ``sox_effect`` module is
-    deprecated in 2.8 and will be removed in 2.9.
-
-Applying effects
-----------------
-
-Apply SoX effects chain on torch.Tensor or on file and load as torch.Tensor.
-
-.. autosummary::
-   :toctree: generated
-   :nosignatures:
-
-   apply_effects_tensor
-   apply_effects_file
-
-.. minigallery:: torchaudio.sox_effects.apply_effects_tensor
-   
-Utilities
----------
-
-.. autosummary::
-   :toctree: generated
-   :nosignatures:
-
-   effect_names
diff --git a/docs/source/torchaudio.rst b/docs/source/torchaudio.rst
index aa933e84ad..629ffd312a 100644
--- a/docs/source/torchaudio.rst
+++ b/docs/source/torchaudio.rst
@@ -78,14 +78,6 @@ The following table summarizes the backends.
        to retrieve the supported codecs.
 
        This backend Supports various protocols, such as HTTPS and MP4, and file-like objects.
-   * - 2
-     - SoX
-     - Linux, macOS
-     - Use :py:func:`~torchaudio.utils.sox_utils.list_read_formats` and
-       :py:func:`~torchaudio.utils.sox_utils.list_write_formats`
-       to retrieve the supported codecs.
-
-       This backend does *not* support file-like objects.
    * - 3
      - SoundFile
      - Linux, macOS, Windows
diff --git a/examples/libtorchaudio/augmentation/CMakeLists.txt b/examples/libtorchaudio/augmentation/CMakeLists.txt
deleted file mode 100644
index e9bfece93a..0000000000
--- a/examples/libtorchaudio/augmentation/CMakeLists.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-add_executable(augment main.cpp)
-target_link_libraries(augment "${TORCH_LIBRARIES}" "${TORCHAUDIO_LIBRARY}")
-set_property(TARGET augment PROPERTY CXX_STANDARD 14)
diff --git a/examples/libtorchaudio/augmentation/README.md b/examples/libtorchaudio/augmentation/README.md
deleted file mode 100644
index 81c58b3bd6..0000000000
--- a/examples/libtorchaudio/augmentation/README.md
+++ /dev/null
@@ -1,35 +0,0 @@
-# Augmentation
-
-This example demonstrates how you can use torchaudio's I/O features and augmentations in C++ application.
-
-**NOTE**
-This example uses `"sox_io"` backend, thus does not work on Windows.
-
-## Steps
-### 1. Create augmentation pipeline TorchScript file.
-
-First, we implement our data process pipeline as a regular Python, and save it as a TorchScript object.
-We will load and execute it in our C++ application. The C++ code is found in [`main.cpp`](./main.cpp).
-
-```python
-python create_jittable_pipeline.py \
-    --rir-path "../data/rir.wav" \
-    --output-path "./pipeline.zip"
-```
-
-### 2. Build the application
-
-Please refer to [the top level README.md](../README.md)
-
-### 3. Run the application
-
-Now we run the C++ application `augment`, with the TorchScript object we created in Step.1 and an input audio file.
-
-In [the top level directory](../)
-
-```bash
-input_audio_file="./data/input.wav"
-./build/augmentation/augment ./augmentation/pipeline.zip "${input_audio_file}" "output.wav"
-```
-
-When you give a clean speech file, the output audio sounds like it's a phone conversation.
diff --git a/examples/libtorchaudio/augmentation/create_jittable_pipeline.py b/examples/libtorchaudio/augmentation/create_jittable_pipeline.py
deleted file mode 100755
index 79f56819fc..0000000000
--- a/examples/libtorchaudio/augmentation/create_jittable_pipeline.py
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/usr/bin/env python3
-"""
-Create a data preprocess pipeline that can be run with libtorchaudio
-"""
-import argparse
-import os
-
-import torch
-import torchaudio
-
-
-class Pipeline(torch.nn.Module):
-    """Example audio process pipeline.
-
-    This example load waveform from a file then apply effects and save it to a file.
-    """
-
-    def __init__(self, rir_path: str):
-        super().__init__()
-        rir, sample_rate = torchaudio.load(rir_path)
-        self.register_buffer("rir", rir)
-        self.rir_sample_rate: int = sample_rate
-
-    def forward(self, input_path: str, output_path: str):
-        torchaudio.sox_effects.init_sox_effects()
-
-        # 1. load audio
-        waveform, sample_rate = torchaudio.load(input_path)
-
-        # 2. Add background noise
-        alpha = 0.01
-        waveform = alpha * torch.randn_like(waveform) + (1 - alpha) * waveform
-
-        # 3. Reample the RIR filter to much the audio sample rate
-        rir, _ = torchaudio.sox_effects.apply_effects_tensor(
-            self.rir, self.rir_sample_rate, effects=[["rate", str(sample_rate)]]
-        )
-        rir = rir / torch.linalg.vector_norm(rir, ord=2)
-        rir = torch.flip(rir, [1])
-
-        # 4. Apply RIR filter
-        waveform = torch.nn.functional.pad(waveform, (rir.shape[1] - 1, 0))
-        waveform = torch.nn.functional.conv1d(waveform[None, ...], rir[None, ...])[0]
-
-        # Save
-        torchaudio.save(output_path, waveform, sample_rate)
-
-
-def _create_jit_pipeline(rir_path, output_path):
-    module = torch.jit.script(Pipeline(rir_path))
-    print("*" * 40)
-    print("* Pipeline code")
-    print("*" * 40)
-    print()
-    print(module.code)
-    print("*" * 40)
-    module.save(output_path)
-
-
-def _get_path(*paths):
-    return os.path.join(os.path.dirname(__file__), *paths)
-
-
-def _parse_args():
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument(
-        "--rir-path", default=_get_path("..", "data", "rir.wav"), help="Audio dara for room impulse response."
-    )
-    parser.add_argument("--output-path", default=_get_path("pipeline.zip"), help="Output JIT file.")
-    return parser.parse_args()
-
-
-def _main():
-    args = _parse_args()
-    _create_jit_pipeline(args.rir_path, args.output_path)
-
-
-if __name__ == "__main__":
-    _main()
diff --git a/examples/libtorchaudio/augmentation/main.cpp b/examples/libtorchaudio/augmentation/main.cpp
deleted file mode 100644
index 921c67972b..0000000000
--- a/examples/libtorchaudio/augmentation/main.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-#include <torch/script.h>
-
-int main(int argc, char* argv[]) {
-  if (argc != 4) {
-    std::cerr << "Usage: " << argv[0]
-              << " <JIT_OBJECT> <INPUT_FILE> <OUTPUT_FILE>" << std::endl;
-    return -1;
-  }
-
-  torch::jit::script::Module module;
-  std::cout << "Loading module from: " << argv[1] << std::endl;
-  try {
-    module = torch::jit::load(argv[1]);
-  } catch (const c10::Error& error) {
-    std::cerr << "Failed to load the module:" << error.what() << std::endl;
-    return -1;
-  }
-
-  std::cout << "Performing the process ..." << std::endl;
-  module.forward({c10::IValue(argv[2]), c10::IValue(argv[3])});
-  std::cout << "Done." << std::endl;
-}
diff --git a/examples/source_separation/conv_tasnet/train.py b/examples/source_separation/conv_tasnet/train.py
index 133b1f4f5e..72b8f57824 100644
--- a/examples/source_separation/conv_tasnet/train.py
+++ b/examples/source_separation/conv_tasnet/train.py
@@ -189,8 +189,6 @@ def train(args):
     _LG.info("%s", args)
 
     args.save_dir.mkdir(parents=True, exist_ok=True)
-    if "sox_io" in torchaudio.list_audio_backends():
-        torchaudio.set_audio_backend("sox_io")
 
     start_epoch = 1
     if args.resume:
diff --git a/src/libtorchaudio/sox/CMakeLists.txt b/src/libtorchaudio/sox/CMakeLists.txt
deleted file mode 100644
index 5ffe782c82..0000000000
--- a/src/libtorchaudio/sox/CMakeLists.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-set(
-  sources
-  io.cpp
-  utils.cpp
-  effects.cpp
-  effects_chain.cpp
-  types.cpp
-  )
-torchaudio_library(
-  libtorchaudio_sox
-  "${sources}"
-  ""
-  "torch;sox"
-  ""
-  )
-
-if (BUILD_TORCHAUDIO_PYTHON_EXTENSION)
-  torchaudio_extension(
-    _torchaudio_sox
-    "pybind/pybind.cpp;"
-    ""
-    "libtorchaudio_sox"
-    ""
-    )
-endif()
diff --git a/src/libtorchaudio/sox/effects.cpp b/src/libtorchaudio/sox/effects.cpp
deleted file mode 100644
index 947c04e3fc..0000000000
--- a/src/libtorchaudio/sox/effects.cpp
+++ /dev/null
@@ -1,133 +0,0 @@
-#include <libtorchaudio/sox/effects.h>
-#include <libtorchaudio/sox/effects_chain.h>
-#include <libtorchaudio/sox/utils.h>
-#include <sox.h>
-
-namespace torchaudio::sox {
-namespace {
-
-enum SoxEffectsResourceState { NotInitialized, Initialized, ShutDown };
-SoxEffectsResourceState SOX_RESOURCE_STATE = NotInitialized;
-std::mutex SOX_RESOUCE_STATE_MUTEX;
-
-} // namespace
-
-void initialize_sox_effects() {
-  const std::lock_guard<std::mutex> lock(SOX_RESOUCE_STATE_MUTEX);
-
-  switch (SOX_RESOURCE_STATE) {
-    case NotInitialized:
-      TORCH_CHECK(
-          sox_init() == SOX_SUCCESS, "Failed to initialize sox effects.");
-      SOX_RESOURCE_STATE = Initialized;
-      break;
-    case Initialized:
-      break;
-    case ShutDown:
-      TORCH_CHECK(
-          false, "SoX Effects has been shut down. Cannot initialize again.");
-  }
-};
-
-void shutdown_sox_effects() {
-  const std::lock_guard<std::mutex> lock(SOX_RESOUCE_STATE_MUTEX);
-
-  switch (SOX_RESOURCE_STATE) {
-    case NotInitialized:
-      TORCH_CHECK(false, "SoX Effects is not initialized. Cannot shutdown.");
-    case Initialized:
-      TORCH_CHECK(
-          sox_quit() == SOX_SUCCESS, "Failed to initialize sox effects.");
-      SOX_RESOURCE_STATE = ShutDown;
-      break;
-    case ShutDown:
-      break;
-  }
-}
-
-auto apply_effects_tensor(
-    torch::Tensor waveform,
-    int64_t sample_rate,
-    const std::vector<std::vector<std::string>>& effects,
-    bool channels_first) -> std::tuple<torch::Tensor, int64_t> {
-  validate_input_tensor(waveform);
-
-  // Create SoxEffectsChain
-  const auto dtype = waveform.dtype();
-  SoxEffectsChain chain(
-      /*input_encoding=*/get_tensor_encodinginfo(dtype),
-      /*output_encoding=*/get_tensor_encodinginfo(dtype));
-
-  // Prepare output buffer
-  std::vector<sox_sample_t> out_buffer;
-  out_buffer.reserve(waveform.numel());
-
-  // Build and run effects chain
-  chain.addInputTensor(&waveform, sample_rate, channels_first);
-  for (const auto& effect : effects) {
-    chain.addEffect(effect);
-  }
-  chain.addOutputBuffer(&out_buffer);
-  chain.run();
-
-  // Create tensor from buffer
-  auto out_tensor = convert_to_tensor(
-      /*buffer=*/out_buffer.data(),
-      /*num_samples=*/out_buffer.size(),
-      /*num_channels=*/chain.getOutputNumChannels(),
-      dtype,
-      /*normalize=*/false,
-      channels_first);
-
-  return std::tuple<torch::Tensor, int64_t>(
-      out_tensor, chain.getOutputSampleRate());
-}
-
-auto apply_effects_file(
-    const std::string& path,
-    const std::vector<std::vector<std::string>>& effects,
-    std::optional<bool> normalize,
-    std::optional<bool> channels_first,
-    const std::optional<std::string>& format)
-    -> std::tuple<torch::Tensor, int64_t> {
-  // Open input file
-  SoxFormat sf(sox_open_read(
-      path.c_str(),
-      /*signal=*/nullptr,
-      /*encoding=*/nullptr,
-      /*filetype=*/format.has_value() ? format.value().c_str() : nullptr));
-
-  validate_input_file(sf, path);
-
-  const auto dtype = get_dtype(sf->encoding.encoding, sf->signal.precision);
-
-  // Prepare output
-  std::vector<sox_sample_t> out_buffer;
-  out_buffer.reserve(sf->signal.length);
-
-  // Create and run SoxEffectsChain
-  SoxEffectsChain chain(
-      /*input_encoding=*/sf->encoding,
-      /*output_encoding=*/get_tensor_encodinginfo(dtype));
-
-  chain.addInputFile(sf);
-  for (const auto& effect : effects) {
-    chain.addEffect(effect);
-  }
-  chain.addOutputBuffer(&out_buffer);
-  chain.run();
-
-  // Create tensor from buffer
-  bool channels_first_ = channels_first.value_or(true);
-  auto tensor = convert_to_tensor(
-      /*buffer=*/out_buffer.data(),
-      /*num_samples=*/out_buffer.size(),
-      /*num_channels=*/chain.getOutputNumChannels(),
-      dtype,
-      normalize.value_or(true),
-      channels_first_);
-
-  return std::tuple<torch::Tensor, int64_t>(
-      tensor, chain.getOutputSampleRate());
-}
-} // namespace torchaudio::sox
diff --git a/src/libtorchaudio/sox/effects.h b/src/libtorchaudio/sox/effects.h
deleted file mode 100644
index 8b56427c1e..0000000000
--- a/src/libtorchaudio/sox/effects.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef TORCHAUDIO_SOX_EFFECTS_H
-#define TORCHAUDIO_SOX_EFFECTS_H
-
-#include <libtorchaudio/sox/utils.h>
-#include <torch/script.h>
-
-namespace torchaudio::sox {
-
-void initialize_sox_effects();
-
-void shutdown_sox_effects();
-
-auto apply_effects_tensor(
-    torch::Tensor waveform,
-    int64_t sample_rate,
-    const std::vector<std::vector<std::string>>& effects,
-    bool channels_first) -> std::tuple<torch::Tensor, int64_t>;
-
-auto apply_effects_file(
-    const std::string& path,
-    const std::vector<std::vector<std::string>>& effects,
-    std::optional<bool> normalize,
-    std::optional<bool> channels_first,
-    const std::optional<std::string>& format)
-    -> std::tuple<torch::Tensor, int64_t>;
-
-} // namespace torchaudio::sox
-
-#endif
diff --git a/src/libtorchaudio/sox/effects_chain.cpp b/src/libtorchaudio/sox/effects_chain.cpp
deleted file mode 100644
index 7f6109a343..0000000000
--- a/src/libtorchaudio/sox/effects_chain.cpp
+++ /dev/null
@@ -1,301 +0,0 @@
-#include <libtorchaudio/sox/effects_chain.h>
-#include <libtorchaudio/sox/utils.h>
-#include "c10/util/Exception.h"
-
-using namespace torch::indexing;
-
-namespace torchaudio::sox {
-
-namespace {
-
-/// helper classes for passing the location of input tensor and output buffer
-///
-/// drain/flow callback functions require plaing C style function signature and
-/// the way to pass extra data is to attach data to sox_effect_t::priv pointer.
-/// The following structs will be assigned to sox_effect_t::priv pointer which
-/// gives sox_effect_t an access to input Tensor and output buffer object.
-struct TensorInputPriv {
-  size_t index;
-  torch::Tensor* waveform;
-  int64_t sample_rate;
-  bool channels_first;
-};
-struct TensorOutputPriv {
-  std::vector<sox_sample_t>* buffer;
-};
-struct FileOutputPriv {
-  sox_format_t* sf;
-};
-
-/// Callback function to feed Tensor data to SoxEffectChain.
-int tensor_input_drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp) {
-  // Retrieve the input Tensor and current index
-  auto priv = static_cast<TensorInputPriv*>(effp->priv);
-  auto index = priv->index;
-  auto tensor = *(priv->waveform);
-  auto num_channels = effp->out_signal.channels;
-
-  // Adjust the number of samples to read
-  const size_t num_samples = tensor.numel();
-  if (index + *osamp > num_samples) {
-    *osamp = num_samples - index;
-  }
-  // Ensure that it's a multiple of the number of channels
-  *osamp -= *osamp % num_channels;
-
-  // Slice the input Tensor
-  auto chunk = [&]() {
-    auto i_frame = index / num_channels;
-    auto num_frames = *osamp / num_channels;
-    auto t = (priv->channels_first)
-        ? tensor.index({Slice(), Slice(i_frame, i_frame + num_frames)}).t()
-        : tensor.index({Slice(i_frame, i_frame + num_frames), Slice()});
-    return t.reshape({-1});
-  }();
-
-  // Convert to sox_sample_t (int32_t)
-  switch (chunk.dtype().toScalarType()) {
-    case c10::ScalarType::Float: {
-      // Need to convert to 64-bit precision so that
-      // values around INT32_MIN/MAX are handled correctly.
-      chunk = chunk.to(c10::ScalarType::Double);
-      chunk *= 2147483648.;
-      chunk.clamp_(INT32_MIN, INT32_MAX);
-      chunk = chunk.to(c10::ScalarType::Int);
-      break;
-    }
-    case c10::ScalarType::Int: {
-      break;
-    }
-    case c10::ScalarType::Short: {
-      chunk = chunk.to(c10::ScalarType::Int);
-      chunk *= 65536;
-      break;
-    }
-    case c10::ScalarType::Byte: {
-      chunk = chunk.to(c10::ScalarType::Int);
-      chunk -= 128;
-      chunk *= 16777216;
-      break;
-    }
-    default:
-      TORCH_CHECK(false, "Unexpected dtype: ", chunk.dtype());
-  }
-  // Write to buffer
-  chunk = chunk.contiguous();
-  memcpy(obuf, chunk.data_ptr<int32_t>(), *osamp * 4);
-  priv->index += *osamp;
-  return (priv->index == num_samples) ? SOX_EOF : SOX_SUCCESS;
-}
-
-/// Callback function to fetch data from SoxEffectChain.
-int tensor_output_flow(
-    sox_effect_t* effp,
-    sox_sample_t const* ibuf,
-    sox_sample_t* obuf LSX_UNUSED,
-    size_t* isamp,
-    size_t* osamp) {
-  *osamp = 0;
-  // Get output buffer
-  auto out_buffer = static_cast<TensorOutputPriv*>(effp->priv)->buffer;
-  // Append at the end
-  out_buffer->insert(out_buffer->end(), ibuf, ibuf + *isamp);
-  return SOX_SUCCESS;
-}
-
-int file_output_flow(
-    sox_effect_t* effp,
-    sox_sample_t const* ibuf,
-    sox_sample_t* obuf LSX_UNUSED,
-    size_t* isamp,
-    size_t* osamp) {
-  *osamp = 0;
-  if (*isamp) {
-    auto sf = static_cast<FileOutputPriv*>(effp->priv)->sf;
-    if (sox_write(sf, ibuf, *isamp) != *isamp) {
-      TORCH_CHECK(
-          !sf->sox_errno,
-          sf->sox_errstr,
-          " ",
-          sox_strerror(sf->sox_errno),
-          " ",
-          sf->filename);
-      return SOX_EOF;
-    }
-  }
-  return SOX_SUCCESS;
-}
-
-sox_effect_handler_t* get_tensor_input_handler() {
-  static sox_effect_handler_t handler{
-      /*name=*/"input_tensor",
-      /*usage=*/nullptr,
-      /*flags=*/SOX_EFF_MCHAN,
-      /*getopts=*/nullptr,
-      /*start=*/nullptr,
-      /*flow=*/nullptr,
-      /*drain=*/tensor_input_drain,
-      /*stop=*/nullptr,
-      /*kill=*/nullptr,
-      /*priv_size=*/sizeof(TensorInputPriv)};
-  return &handler;
-}
-
-sox_effect_handler_t* get_tensor_output_handler() {
-  static sox_effect_handler_t handler{
-      /*name=*/"output_tensor",
-      /*usage=*/nullptr,
-      /*flags=*/SOX_EFF_MCHAN,
-      /*getopts=*/nullptr,
-      /*start=*/nullptr,
-      /*flow=*/tensor_output_flow,
-      /*drain=*/nullptr,
-      /*stop=*/nullptr,
-      /*kill=*/nullptr,
-      /*priv_size=*/sizeof(TensorOutputPriv)};
-  return &handler;
-}
-
-sox_effect_handler_t* get_file_output_handler() {
-  static sox_effect_handler_t handler{
-      /*name=*/"output_file",
-      /*usage=*/nullptr,
-      /*flags=*/SOX_EFF_MCHAN,
-      /*getopts=*/nullptr,
-      /*start=*/nullptr,
-      /*flow=*/file_output_flow,
-      /*drain=*/nullptr,
-      /*stop=*/nullptr,
-      /*kill=*/nullptr,
-      /*priv_size=*/sizeof(FileOutputPriv)};
-  return &handler;
-}
-
-} // namespace
-
-SoxEffect::SoxEffect(sox_effect_t* se) noexcept : se_(se) {}
-
-SoxEffect::~SoxEffect() {
-  if (se_ != nullptr) {
-    free(se_);
-  }
-}
-
-SoxEffect::operator sox_effect_t*() const {
-  return se_;
-}
-
-auto SoxEffect::operator->() noexcept -> sox_effect_t* {
-  return se_;
-}
-
-SoxEffectsChain::SoxEffectsChain(
-    sox_encodinginfo_t input_encoding,
-    sox_encodinginfo_t output_encoding)
-    : in_enc_(input_encoding),
-      out_enc_(output_encoding),
-      in_sig_(),
-      interm_sig_(),
-      out_sig_(),
-      sec_(sox_create_effects_chain(&in_enc_, &out_enc_)) {
-  TORCH_CHECK(sec_, "Failed to create effect chain.");
-}
-
-SoxEffectsChain::~SoxEffectsChain() {
-  if (sec_ != nullptr) {
-    sox_delete_effects_chain(sec_);
-  }
-}
-
-void SoxEffectsChain::run() {
-  sox_flow_effects(sec_, nullptr, nullptr);
-}
-
-void SoxEffectsChain::addInputTensor(
-    torch::Tensor* waveform,
-    int64_t sample_rate,
-    bool channels_first) {
-  in_sig_ = get_signalinfo(waveform, sample_rate, "wav", channels_first);
-  interm_sig_ = in_sig_;
-  SoxEffect e(sox_create_effect(get_tensor_input_handler()));
-  auto priv = static_cast<TensorInputPriv*>(e->priv);
-  priv->index = 0;
-  priv->waveform = waveform;
-  priv->sample_rate = sample_rate;
-  priv->channels_first = channels_first;
-  TORCH_CHECK(
-      sox_add_effect(sec_, e, &interm_sig_, &in_sig_) == SOX_SUCCESS,
-      "Internal Error: Failed to add effect: input_tensor");
-}
-
-void SoxEffectsChain::addOutputBuffer(
-    std::vector<sox_sample_t>* output_buffer) {
-  SoxEffect e(sox_create_effect(get_tensor_output_handler()));
-  static_cast<TensorOutputPriv*>(e->priv)->buffer = output_buffer;
-  TORCH_CHECK(
-      sox_add_effect(sec_, e, &interm_sig_, &in_sig_) == SOX_SUCCESS,
-      "Internal Error: Failed to add effect: output_tensor");
-}
-
-void SoxEffectsChain::addInputFile(sox_format_t* sf) {
-  in_sig_ = sf->signal;
-  interm_sig_ = in_sig_;
-  SoxEffect e(sox_create_effect(sox_find_effect("input")));
-  char* opts[] = {(char*)sf};
-  sox_effect_options(e, 1, opts);
-  TORCH_CHECK(
-      sox_add_effect(sec_, e, &interm_sig_, &in_sig_) == SOX_SUCCESS,
-      "Internal Error: Failed to add effect: input ",
-      sf->filename);
-}
-
-void SoxEffectsChain::addOutputFile(sox_format_t* sf) {
-  out_sig_ = sf->signal;
-  SoxEffect e(sox_create_effect(get_file_output_handler()));
-  static_cast<FileOutputPriv*>(e->priv)->sf = sf;
-  TORCH_CHECK(
-      sox_add_effect(sec_, e, &interm_sig_, &out_sig_) == SOX_SUCCESS,
-      "Internal Error: Failed to add effect: output ",
-      sf->filename);
-}
-
-void SoxEffectsChain::addEffect(const std::vector<std::string>& effect) {
-  const auto num_args = effect.size();
-  TORCH_CHECK(num_args != 0, "Invalid argument: empty effect.");
-  const auto name = effect[0];
-  TORCH_CHECK(
-      UNSUPPORTED_EFFECTS.find(name) == UNSUPPORTED_EFFECTS.end(),
-      "Unsupported effect: ",
-      name)
-
-  auto returned_effect = sox_find_effect(name.c_str());
-  TORCH_CHECK(returned_effect, "Unsupported effect: ", name)
-
-  SoxEffect e(sox_create_effect(returned_effect));
-  const auto num_options = num_args - 1;
-
-  std::vector<char*> opts;
-  for (size_t i = 1; i < num_args; ++i) {
-    opts.push_back((char*)effect[i].c_str());
-  }
-  TORCH_CHECK(
-      sox_effect_options(e, num_options, num_options ? opts.data() : nullptr) ==
-          SOX_SUCCESS,
-      "Invalid effect option: ",
-      c10::Join(" ", effect))
-  TORCH_CHECK(
-      sox_add_effect(sec_, e, &interm_sig_, &in_sig_) == SOX_SUCCESS,
-      "Internal Error: Failed to add effect: \"",
-      c10::Join(" ", effect),
-      "\"");
-}
-
-int64_t SoxEffectsChain::getOutputNumChannels() {
-  return interm_sig_.channels;
-}
-
-int64_t SoxEffectsChain::getOutputSampleRate() {
-  return interm_sig_.rate;
-}
-
-} // namespace torchaudio::sox
diff --git a/src/libtorchaudio/sox/effects_chain.h b/src/libtorchaudio/sox/effects_chain.h
deleted file mode 100644
index e6a892b5e8..0000000000
--- a/src/libtorchaudio/sox/effects_chain.h
+++ /dev/null
@@ -1,61 +0,0 @@
-#ifndef TORCHAUDIO_SOX_EFFECTS_CHAIN_H
-#define TORCHAUDIO_SOX_EFFECTS_CHAIN_H
-
-#include <libtorchaudio/sox/utils.h>
-#include <sox.h>
-
-namespace torchaudio::sox {
-
-// Helper struct to safely close sox_effect_t* pointer returned by
-// sox_create_effect
-
-struct SoxEffect {
-  explicit SoxEffect(sox_effect_t* se) noexcept;
-  SoxEffect(const SoxEffect& other) = delete;
-  SoxEffect(SoxEffect&& other) = delete;
-  auto operator=(const SoxEffect& other) -> SoxEffect& = delete;
-  auto operator=(SoxEffect&& other) -> SoxEffect& = delete;
-  ~SoxEffect();
-  operator sox_effect_t*() const;
-  auto operator->() noexcept -> sox_effect_t*;
-
- private:
-  sox_effect_t* se_;
-};
-
-// Helper struct to safely close sox_effects_chain_t with handy methods
-class SoxEffectsChain {
-  const sox_encodinginfo_t in_enc_;
-  const sox_encodinginfo_t out_enc_;
-
- protected:
-  sox_signalinfo_t in_sig_;
-  sox_signalinfo_t interm_sig_;
-  sox_signalinfo_t out_sig_;
-  sox_effects_chain_t* sec_;
-
- public:
-  explicit SoxEffectsChain(
-      sox_encodinginfo_t input_encoding,
-      sox_encodinginfo_t output_encoding);
-  SoxEffectsChain(const SoxEffectsChain& other) = delete;
-  SoxEffectsChain(SoxEffectsChain&& other) = delete;
-  SoxEffectsChain& operator=(const SoxEffectsChain& other) = delete;
-  SoxEffectsChain& operator=(SoxEffectsChain&& other) = delete;
-  ~SoxEffectsChain();
-  void run();
-  void addInputTensor(
-      torch::Tensor* waveform,
-      int64_t sample_rate,
-      bool channels_first);
-  void addInputFile(sox_format_t* sf);
-  void addOutputBuffer(std::vector<sox_sample_t>* output_buffer);
-  void addOutputFile(sox_format_t* sf);
-  void addEffect(const std::vector<std::string>& effect);
-  int64_t getOutputNumChannels();
-  int64_t getOutputSampleRate();
-};
-
-} // namespace torchaudio::sox
-
-#endif
diff --git a/src/libtorchaudio/sox/io.cpp b/src/libtorchaudio/sox/io.cpp
deleted file mode 100644
index 474726ad1c..0000000000
--- a/src/libtorchaudio/sox/io.cpp
+++ /dev/null
@@ -1,128 +0,0 @@
-#include <libtorchaudio/sox/effects.h>
-#include <libtorchaudio/sox/effects_chain.h>
-#include <libtorchaudio/sox/io.h>
-#include <libtorchaudio/sox/types.h>
-#include <libtorchaudio/sox/utils.h>
-
-using namespace torch::indexing;
-
-namespace torchaudio::sox {
-
-std::tuple<int64_t, int64_t, int64_t, int64_t, std::string> get_info_file(
-    const std::string& path,
-    const std::optional<std::string>& format) {
-  SoxFormat sf(sox_open_read(
-      path.c_str(),
-      /*signal=*/nullptr,
-      /*encoding=*/nullptr,
-      /*filetype=*/format.has_value() ? format.value().c_str() : nullptr));
-
-  validate_input_file(sf, path);
-
-  return std::make_tuple(
-      static_cast<int64_t>(sf->signal.rate),
-      static_cast<int64_t>(sf->signal.length / sf->signal.channels),
-      static_cast<int64_t>(sf->signal.channels),
-      static_cast<int64_t>(sf->encoding.bits_per_sample),
-      get_encoding(sf->encoding.encoding));
-}
-
-std::vector<std::vector<std::string>> get_effects(
-    const std::optional<int64_t>& frame_offset,
-    const std::optional<int64_t>& num_frames) {
-  const auto offset = frame_offset.value_or(0);
-  TORCH_CHECK(
-      offset >= 0,
-      "Invalid argument: frame_offset must be non-negative. Found: ",
-      offset);
-  const auto frames = num_frames.value_or(-1);
-  TORCH_CHECK(
-      frames > 0 || frames == -1,
-      "Invalid argument: num_frames must be -1 or greater than 0.");
-
-  std::vector<std::vector<std::string>> effects;
-  if (frames != -1) {
-    std::ostringstream os_offset, os_frames;
-    os_offset << offset << "s";
-    os_frames << "+" << frames << "s";
-    effects.emplace_back(
-        std::vector<std::string>{"trim", os_offset.str(), os_frames.str()});
-  } else if (offset != 0) {
-    std::ostringstream os_offset;
-    os_offset << offset << "s";
-    effects.emplace_back(std::vector<std::string>{"trim", os_offset.str()});
-  }
-  return effects;
-}
-
-std::tuple<torch::Tensor, int64_t> load_audio_file(
-    const std::string& path,
-    const std::optional<int64_t>& frame_offset,
-    const std::optional<int64_t>& num_frames,
-    std::optional<bool> normalize,
-    std::optional<bool> channels_first,
-    const std::optional<std::string>& format) {
-  auto effects = get_effects(frame_offset, num_frames);
-  return apply_effects_file(path, effects, normalize, channels_first, format);
-}
-
-void save_audio_file(
-    const std::string& path,
-    torch::Tensor tensor,
-    int64_t sample_rate,
-    bool channels_first,
-    std::optional<double> compression,
-    std::optional<std::string> format,
-    std::optional<std::string> encoding,
-    std::optional<int64_t> bits_per_sample) {
-  validate_input_tensor(tensor);
-
-  const auto filetype = [&]() {
-    if (format.has_value()) {
-      return format.value();
-    }
-    return get_filetype(path);
-  }();
-
-  if (filetype == "amr-nb") {
-    const auto num_channels = tensor.size(channels_first ? 0 : 1);
-    TORCH_CHECK(
-        num_channels == 1, "amr-nb format only supports single channel audio.");
-  } else if (filetype == "htk") {
-    const auto num_channels = tensor.size(channels_first ? 0 : 1);
-    TORCH_CHECK(
-        num_channels == 1, "htk format only supports single channel audio.");
-  } else if (filetype == "gsm") {
-    const auto num_channels = tensor.size(channels_first ? 0 : 1);
-    TORCH_CHECK(
-        num_channels == 1, "gsm format only supports single channel audio.");
-    TORCH_CHECK(
-        sample_rate == 8000,
-        "gsm format only supports a sampling rate of 8kHz.");
-  }
-  const auto signal_info =
-      get_signalinfo(&tensor, sample_rate, filetype, channels_first);
-  const auto encoding_info = get_encodinginfo_for_save(
-      filetype, tensor.dtype(), compression, encoding, bits_per_sample);
-
-  SoxFormat sf(sox_open_write(
-      path.c_str(),
-      &signal_info,
-      &encoding_info,
-      /*filetype=*/filetype.c_str(),
-      /*oob=*/nullptr,
-      /*overwrite_permitted=*/nullptr));
-
-  TORCH_CHECK(
-      static_cast<sox_format_t*>(sf) != nullptr,
-      "Error saving audio file: failed to open file ",
-      path);
-
-  SoxEffectsChain chain(
-      /*input_encoding=*/get_tensor_encodinginfo(tensor.dtype()),
-      /*output_encoding=*/sf->encoding);
-  chain.addInputTensor(&tensor, sample_rate, channels_first);
-  chain.addOutputFile(sf);
-  chain.run();
-}
-} // namespace torchaudio::sox
diff --git a/src/libtorchaudio/sox/io.h b/src/libtorchaudio/sox/io.h
deleted file mode 100644
index b011ef59be..0000000000
--- a/src/libtorchaudio/sox/io.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#ifndef TORCHAUDIO_SOX_IO_H
-#define TORCHAUDIO_SOX_IO_H
-
-#include <libtorchaudio/sox/utils.h>
-#include <torch/script.h>
-
-namespace torchaudio::sox {
-
-auto get_effects(
-    const std::optional<int64_t>& frame_offset,
-    const std::optional<int64_t>& num_frames)
-    -> std::vector<std::vector<std::string>>;
-
-std::tuple<int64_t, int64_t, int64_t, int64_t, std::string> get_info_file(
-    const std::string& path,
-    const std::optional<std::string>& format);
-
-std::tuple<torch::Tensor, int64_t> load_audio_file(
-    const std::string& path,
-    const std::optional<int64_t>& frame_offset,
-    const std::optional<int64_t>& num_frames,
-    std::optional<bool> normalize,
-    std::optional<bool> channels_first,
-    const std::optional<std::string>& format);
-
-void save_audio_file(
-    const std::string& path,
-    torch::Tensor tensor,
-    int64_t sample_rate,
-    bool channels_first,
-    std::optional<double> compression,
-    std::optional<std::string> format,
-    std::optional<std::string> encoding,
-    std::optional<int64_t> bits_per_sample);
-
-} // namespace torchaudio::sox
-
-#endif
diff --git a/src/libtorchaudio/sox/pybind/pybind.cpp b/src/libtorchaudio/sox/pybind/pybind.cpp
deleted file mode 100644
index bd9c82c349..0000000000
--- a/src/libtorchaudio/sox/pybind/pybind.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-#include <libtorchaudio/sox/effects.h>
-#include <libtorchaudio/sox/io.h>
-#include <libtorchaudio/sox/utils.h>
-#include <torch/extension.h>
-
-namespace torchaudio {
-namespace sox {
-namespace {
-
-TORCH_LIBRARY(torchaudio_sox, m) {
-  m.def("torchaudio_sox::get_info", &get_info_file);
-  m.def("torchaudio_sox::load_audio_file", &load_audio_file);
-  m.def("torchaudio_sox::save_audio_file", &save_audio_file);
-  m.def("torchaudio_sox::initialize_sox_effects", &initialize_sox_effects);
-  m.def("torchaudio_sox::shutdown_sox_effects", &shutdown_sox_effects);
-  m.def("torchaudio_sox::apply_effects_tensor", &apply_effects_tensor);
-  m.def("torchaudio_sox::apply_effects_file", &apply_effects_file);
-}
-
-PYBIND11_MODULE(_torchaudio_sox, m) {
-  m.def("set_seed", &set_seed, "Set random seed.");
-  m.def("set_verbosity", &set_verbosity, "Set verbosity.");
-  m.def("set_use_threads", &set_use_threads, "Set threading.");
-  m.def("set_buffer_size", &set_buffer_size, "Set buffer size.");
-  m.def("get_buffer_size", &get_buffer_size, "Get buffer size.");
-  m.def("list_effects", &list_effects, "List available effects.");
-  m.def(
-      "list_read_formats",
-      &list_read_formats,
-      "List supported formats for decoding.");
-  m.def(
-      "list_write_formats",
-      &list_write_formats,
-      "List supported formats for encoding.");
-}
-
-} // namespace
-} // namespace sox
-} // namespace torchaudio
diff --git a/src/libtorchaudio/sox/types.cpp b/src/libtorchaudio/sox/types.cpp
deleted file mode 100644
index 12bd070105..0000000000
--- a/src/libtorchaudio/sox/types.cpp
+++ /dev/null
@@ -1,148 +0,0 @@
-#include <libtorchaudio/sox/types.h>
-
-namespace torchaudio::sox {
-
-Format get_format_from_string(const std::string& format) {
-  if (format == "wav") {
-    return Format::WAV;
-  }
-  if (format == "mp3") {
-    return Format::MP3;
-  }
-  if (format == "flac") {
-    return Format::FLAC;
-  }
-  if (format == "ogg" || format == "vorbis") {
-    return Format::VORBIS;
-  }
-  if (format == "amr-nb") {
-    return Format::AMR_NB;
-  }
-  if (format == "amr-wb") {
-    return Format::AMR_WB;
-  }
-  if (format == "amb") {
-    return Format::AMB;
-  }
-  if (format == "sph") {
-    return Format::SPHERE;
-  }
-  if (format == "htk") {
-    return Format::HTK;
-  }
-  if (format == "gsm") {
-    return Format::GSM;
-  }
-  TORCH_CHECK(false, "Internal Error: unexpected format value: ", format);
-}
-
-std::string to_string(Encoding v) {
-  switch (v) {
-    case Encoding::UNKNOWN:
-      return "UNKNOWN";
-    case Encoding::PCM_SIGNED:
-      return "PCM_S";
-    case Encoding::PCM_UNSIGNED:
-      return "PCM_U";
-    case Encoding::PCM_FLOAT:
-      return "PCM_F";
-    case Encoding::FLAC:
-      return "FLAC";
-    case Encoding::ULAW:
-      return "ULAW";
-    case Encoding::ALAW:
-      return "ALAW";
-    case Encoding::MP3:
-      return "MP3";
-    case Encoding::VORBIS:
-      return "VORBIS";
-    case Encoding::AMR_WB:
-      return "AMR_WB";
-    case Encoding::AMR_NB:
-      return "AMR_NB";
-    case Encoding::OPUS:
-      return "OPUS";
-    default:
-      TORCH_CHECK(false, "Internal Error: unexpected encoding.");
-  }
-}
-
-Encoding get_encoding_from_option(const std::optional<std::string>& encoding) {
-  if (!encoding.has_value()) {
-    return Encoding::NOT_PROVIDED;
-  }
-  std::string v = encoding.value();
-  if (v == "PCM_S") {
-    return Encoding::PCM_SIGNED;
-  }
-  if (v == "PCM_U") {
-    return Encoding::PCM_UNSIGNED;
-  }
-  if (v == "PCM_F") {
-    return Encoding::PCM_FLOAT;
-  }
-  if (v == "ULAW") {
-    return Encoding::ULAW;
-  }
-  if (v == "ALAW") {
-    return Encoding::ALAW;
-  }
-  TORCH_CHECK(false, "Internal Error: unexpected encoding value: ", v);
-}
-
-BitDepth get_bit_depth_from_option(const std::optional<int64_t>& bit_depth) {
-  if (!bit_depth.has_value()) {
-    return BitDepth::NOT_PROVIDED;
-  }
-  int64_t v = bit_depth.value();
-  switch (v) {
-    case 8:
-      return BitDepth::B8;
-    case 16:
-      return BitDepth::B16;
-    case 24:
-      return BitDepth::B24;
-    case 32:
-      return BitDepth::B32;
-    case 64:
-      return BitDepth::B64;
-    default: {
-      TORCH_CHECK(false, "Internal Error: unexpected bit depth value: ", v);
-    }
-  }
-}
-
-std::string get_encoding(sox_encoding_t encoding) {
-  switch (encoding) {
-    case SOX_ENCODING_UNKNOWN:
-      return "UNKNOWN";
-    case SOX_ENCODING_SIGN2:
-      return "PCM_S";
-    case SOX_ENCODING_UNSIGNED:
-      return "PCM_U";
-    case SOX_ENCODING_FLOAT:
-      return "PCM_F";
-    case SOX_ENCODING_FLAC:
-      return "FLAC";
-    case SOX_ENCODING_ULAW:
-      return "ULAW";
-    case SOX_ENCODING_ALAW:
-      return "ALAW";
-    case SOX_ENCODING_MP3:
-      return "MP3";
-    case SOX_ENCODING_VORBIS:
-      return "VORBIS";
-    case SOX_ENCODING_AMR_WB:
-      return "AMR_WB";
-    case SOX_ENCODING_AMR_NB:
-      return "AMR_NB";
-    case SOX_ENCODING_OPUS:
-      return "OPUS";
-    case SOX_ENCODING_GSM:
-      return "GSM";
-    default:
-      return "UNKNOWN";
-  }
-}
-
-} // namespace torchaudio::sox
diff --git a/src/libtorchaudio/sox/types.h b/src/libtorchaudio/sox/types.h
deleted file mode 100644
index 714d303313..0000000000
--- a/src/libtorchaudio/sox/types.h
+++ /dev/null
@@ -1,58 +0,0 @@
-#ifndef TORCHAUDIO_SOX_TYPES_H
-#define TORCHAUDIO_SOX_TYPES_H
-
-#include <sox.h>
-#include <torch/script.h>
-
-namespace torchaudio::sox {
-
-enum class Format {
-  WAV,
-  MP3,
-  FLAC,
-  VORBIS,
-  AMR_NB,
-  AMR_WB,
-  AMB,
-  SPHERE,
-  GSM,
-  HTK,
-};
-
-Format get_format_from_string(const std::string& format);
-
-enum class Encoding {
-  NOT_PROVIDED,
-  UNKNOWN,
-  PCM_SIGNED,
-  PCM_UNSIGNED,
-  PCM_FLOAT,
-  FLAC,
-  ULAW,
-  ALAW,
-  MP3,
-  VORBIS,
-  AMR_WB,
-  AMR_NB,
-  OPUS,
-};
-
-std::string to_string(Encoding v);
-Encoding get_encoding_from_option(const std::optional<std::string>& encoding);
-
-enum class BitDepth : unsigned {
-  NOT_PROVIDED = 0,
-  B8 = 8,
-  B16 = 16,
-  B24 = 24,
-  B32 = 32,
-  B64 = 64,
-};
-
-BitDepth get_bit_depth_from_option(const std::optional<int64_t>& bit_depth);
-
-std::string get_encoding(sox_encoding_t encoding);
-
-} // namespace torchaudio::sox
-
-#endif
diff --git a/src/libtorchaudio/sox/utils.cpp b/src/libtorchaudio/sox/utils.cpp
deleted file mode 100644
index 94748c5209..0000000000
--- a/src/libtorchaudio/sox/utils.cpp
+++ /dev/null
@@ -1,509 +0,0 @@
-#include <c10/core/ScalarType.h>
-#include <libtorchaudio/sox/types.h>
-#include <libtorchaudio/sox/utils.h>
-#include <sox.h>
-
-namespace torchaudio::sox {
-
-const std::unordered_set<std::string> UNSUPPORTED_EFFECTS{
-    "input",
-    "output",
-    "spectrogram",
-    "noiseprof",
-    "noisered",
-    "splice"};
-
-void set_seed(const int64_t seed) {
-  sox_get_globals()->ranqd1 = static_cast<sox_int32_t>(seed);
-}
-
-void set_verbosity(const int64_t verbosity) {
-  sox_get_globals()->verbosity = static_cast<unsigned>(verbosity);
-}
-
-void set_use_threads(const bool use_threads) {
-  sox_get_globals()->use_threads = static_cast<sox_bool>(use_threads);
-}
-
-void set_buffer_size(const int64_t buffer_size) {
-  sox_get_globals()->bufsiz = static_cast<size_t>(buffer_size);
-}
-
-int64_t get_buffer_size() {
-  return sox_get_globals()->bufsiz;
-}
-
-std::vector<std::vector<std::string>> list_effects() {
-  std::vector<std::vector<std::string>> effects;
-  for (const sox_effect_fn_t* fns = sox_get_effect_fns(); *fns; ++fns) {
-    const sox_effect_handler_t* handler = (*fns)();
-    if (handler && handler->name) {
-      if (UNSUPPORTED_EFFECTS.find(handler->name) ==
-          UNSUPPORTED_EFFECTS.end()) {
-        effects.emplace_back(std::vector<std::string>{
-            handler->name,
-            handler->usage ? std::string(handler->usage) : std::string("")});
-      }
-    }
-  }
-  return effects;
-}
-
-std::vector<std::string> list_write_formats() {
-  std::vector<std::string> formats;
-  for (const sox_format_tab_t* fns = sox_get_format_fns(); fns->fn; ++fns) {
-    const sox_format_handler_t* handler = fns->fn();
-    for (const char* const* names = handler->names; *names; ++names) {
-      if (!strchr(*names, '/') && handler->write) {
-        formats.emplace_back(*names);
-      }
-    }
-  }
-  return formats;
-}
-
-std::vector<std::string> list_read_formats() {
-  std::vector<std::string> formats;
-  for (const sox_format_tab_t* fns = sox_get_format_fns(); fns->fn; ++fns) {
-    const sox_format_handler_t* handler = fns->fn();
-    for (const char* const* names = handler->names; *names; ++names) {
-      if (!strchr(*names, '/') && handler->read) {
-        formats.emplace_back(*names);
-      }
-    }
-  }
-  return formats;
-}
-
-SoxFormat::SoxFormat(sox_format_t* fd) noexcept : fd_(fd) {}
-SoxFormat::~SoxFormat() {
-  close();
-}
-
-sox_format_t* SoxFormat::operator->() const noexcept {
-  return fd_;
-}
-SoxFormat::operator sox_format_t*() const noexcept {
-  return fd_;
-}
-
-void SoxFormat::close() {
-  if (fd_ != nullptr) {
-    sox_close(fd_);
-    fd_ = nullptr;
-  }
-}
-
-void validate_input_file(const SoxFormat& sf, const std::string& path) {
-  TORCH_CHECK(
-      static_cast<sox_format_t*>(sf) != nullptr,
-      "Error loading audio file: failed to open file " + path);
-  TORCH_CHECK(
-      sf->encoding.encoding != SOX_ENCODING_UNKNOWN,
-      "Error loading audio file: unknown encoding.");
-}
-
-void validate_input_tensor(const torch::Tensor& tensor) {
-  TORCH_CHECK(tensor.device().is_cpu(), "Input tensor has to be on CPU.");
-
-  TORCH_CHECK(tensor.ndimension() == 2, "Input tensor has to be 2D.");
-
-  switch (tensor.dtype().toScalarType()) {
-    case c10::ScalarType::Byte:
-    case c10::ScalarType::Short:
-    case c10::ScalarType::Int:
-    case c10::ScalarType::Float:
-      break;
-    default:
-      TORCH_CHECK(
-          false,
-          "Input tensor has to be one of float32, int32, int16 or uint8 type.");
-  }
-}
-
-caffe2::TypeMeta get_dtype(
-    const sox_encoding_t encoding,
-    const unsigned precision) {
-  const auto dtype = [&]() {
-    switch (encoding) {
-      case SOX_ENCODING_UNSIGNED: // 8-bit PCM WAV
-        return torch::kUInt8;
-      case SOX_ENCODING_SIGN2: // 16-bit, 24-bit, or 32-bit PCM WAV
-        switch (precision) {
-          case 16:
-            return torch::kInt16;
-          case 24: // Cast 24-bit to 32-bit.
-          case 32:
-            return torch::kInt32;
-          default:
-            TORCH_CHECK(
-                false,
-                "Only 16, 24, and 32 bits are supported for signed PCM.");
-        }
-      default:
-        // default to float32 for the other formats, including
-        // 32-bit flaoting-point WAV,
-        // MP3,
-        // FLAC,
-        // VORBIS etc...
-        return torch::kFloat32;
-    }
-  }();
-  return c10::scalarTypeToTypeMeta(dtype);
-}
-
-torch::Tensor convert_to_tensor(
-    sox_sample_t* buffer,
-    const int32_t num_samples,
-    const int32_t num_channels,
-    const caffe2::TypeMeta dtype,
-    const bool normalize,
-    const bool channels_first) {
-  torch::Tensor t;
-  uint64_t dummy = 0;
-  SOX_SAMPLE_LOCALS;
-  if (normalize || dtype == torch::kFloat32) {
-    t = torch::empty(
-        {num_samples / num_channels, num_channels}, torch::kFloat32);
-    auto ptr = t.data_ptr<float_t>();
-    for (int32_t i = 0; i < num_samples; ++i) {
-      ptr[i] = SOX_SAMPLE_TO_FLOAT_32BIT(buffer[i], dummy);
-    }
-  } else if (dtype == torch::kInt32) {
-    t = torch::from_blob(
-            buffer, {num_samples / num_channels, num_channels}, torch::kInt32)
-            .clone();
-  } else if (dtype == torch::kInt16) {
-    t = torch::empty({num_samples / num_channels, num_channels}, torch::kInt16);
-    auto ptr = t.data_ptr<int16_t>();
-    for (int32_t i = 0; i < num_samples; ++i) {
-      ptr[i] = SOX_SAMPLE_TO_SIGNED_16BIT(buffer[i], dummy);
-    }
-  } else if (dtype == torch::kUInt8) {
-    t = torch::empty({num_samples / num_channels, num_channels}, torch::kUInt8);
-    auto ptr = t.data_ptr<uint8_t>();
-    for (int32_t i = 0; i < num_samples; ++i) {
-      ptr[i] = SOX_SAMPLE_TO_UNSIGNED_8BIT(buffer[i], dummy);
-    }
-  } else {
-    TORCH_CHECK(false, "Unsupported dtype: ", dtype);
-  }
-  if (channels_first) {
-    t = t.transpose(1, 0);
-  }
-  return t.contiguous();
-}
-
-const std::string get_filetype(const std::string& path) {
-  std::string ext = path.substr(path.find_last_of('.') + 1);
-  std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
-  return ext;
-}
-
-namespace {
-
-std::tuple<sox_encoding_t, unsigned> get_save_encoding_for_wav(
-    const std::string& format,
-    caffe2::TypeMeta dtype,
-    const Encoding& encoding,
-    const BitDepth& bits_per_sample) {
-  switch (encoding) {
-    case Encoding::NOT_PROVIDED:
-      switch (bits_per_sample) {
-        case BitDepth::NOT_PROVIDED:
-          switch (dtype.toScalarType()) {
-            case c10::ScalarType::Float:
-              return std::make_tuple<>(SOX_ENCODING_FLOAT, 32);
-            case c10::ScalarType::Int:
-              return std::make_tuple<>(SOX_ENCODING_SIGN2, 32);
-            case c10::ScalarType::Short:
-              return std::make_tuple<>(SOX_ENCODING_SIGN2, 16);
-            case c10::ScalarType::Byte:
-              return std::make_tuple<>(SOX_ENCODING_UNSIGNED, 8);
-            default:
-              TORCH_CHECK(false, "Internal Error: Unexpected dtype: ", dtype);
-          }
-        case BitDepth::B8:
-          return std::make_tuple<>(SOX_ENCODING_UNSIGNED, 8);
-        default:
-          return std::make_tuple<>(
-              SOX_ENCODING_SIGN2, static_cast<unsigned>(bits_per_sample));
-      }
-    case Encoding::PCM_SIGNED:
-      switch (bits_per_sample) {
-        case BitDepth::NOT_PROVIDED:
-          return std::make_tuple<>(SOX_ENCODING_SIGN2, 16);
-        case BitDepth::B8:
-          TORCH_CHECK(
-              false, format, " does not support 8-bit signed PCM encoding.");
-        default:
-          return std::make_tuple<>(
-              SOX_ENCODING_SIGN2, static_cast<unsigned>(bits_per_sample));
-      }
-    case Encoding::PCM_UNSIGNED:
-      switch (bits_per_sample) {
-        case BitDepth::NOT_PROVIDED:
-        case BitDepth::B8:
-          return std::make_tuple<>(SOX_ENCODING_UNSIGNED, 8);
-        default:
-          TORCH_CHECK(
-              false, format, " only supports 8-bit for unsigned PCM encoding.");
-      }
-    case Encoding::PCM_FLOAT:
-      switch (bits_per_sample) {
-        case BitDepth::NOT_PROVIDED:
-        case BitDepth::B32:
-          return std::make_tuple<>(SOX_ENCODING_FLOAT, 32);
-        case BitDepth::B64:
-          return std::make_tuple<>(SOX_ENCODING_FLOAT, 64);
-        default:
-          TORCH_CHECK(
-              false,
-              format,
-              " only supports 32-bit or 64-bit for floating-point PCM encoding.");
-      }
-    case Encoding::ULAW:
-      switch (bits_per_sample) {
-        case BitDepth::NOT_PROVIDED:
-        case BitDepth::B8:
-          return std::make_tuple<>(SOX_ENCODING_ULAW, 8);
-        default:
-          TORCH_CHECK(
-              false, format, " only supports 8-bit for mu-law encoding.");
-      }
-    case Encoding::ALAW:
-      switch (bits_per_sample) {
-        case BitDepth::NOT_PROVIDED:
-        case BitDepth::B8:
-          return std::make_tuple<>(SOX_ENCODING_ALAW, 8);
-        default:
-          TORCH_CHECK(
-              false, format, " only supports 8-bit for a-law encoding.");
-      }
-    default:
-      TORCH_CHECK(
-          false, format, " does not support encoding: " + to_string(encoding));
-  }
-}
-
-std::tuple<sox_encoding_t, unsigned> get_save_encoding(
-    const std::string& format,
-    const caffe2::TypeMeta& dtype,
-    const std::optional<std::string>& encoding,
-    const std::optional<int64_t>& bits_per_sample) {
-  const Format fmt = get_format_from_string(format);
-  const Encoding enc = get_encoding_from_option(encoding);
-  const BitDepth bps = get_bit_depth_from_option(bits_per_sample);
-
-  switch (fmt) {
-    case Format::WAV:
-    case Format::AMB:
-      return get_save_encoding_for_wav(format, dtype, enc, bps);
-    case Format::MP3:
-      TORCH_CHECK(
-          enc == Encoding::NOT_PROVIDED,
-          "mp3 does not support `encoding` option.");
-      TORCH_CHECK(
-          bps == BitDepth::NOT_PROVIDED,
-          "mp3 does not support `bits_per_sample` option.");
-      return std::make_tuple<>(SOX_ENCODING_MP3, 16);
-    case Format::HTK:
-      TORCH_CHECK(
-          enc == Encoding::NOT_PROVIDED,
-          "htk does not support `encoding` option.");
-      TORCH_CHECK(
-          bps == BitDepth::NOT_PROVIDED,
-          "htk does not support `bits_per_sample` option.");
-      return std::make_tuple<>(SOX_ENCODING_SIGN2, 16);
-    case Format::VORBIS:
-      TORCH_CHECK(
-          enc == Encoding::NOT_PROVIDED,
-          "vorbis does not support `encoding` option.");
-      TORCH_CHECK(
-          bps == BitDepth::NOT_PROVIDED,
-          "vorbis does not support `bits_per_sample` option.");
-      return std::make_tuple<>(SOX_ENCODING_VORBIS, 0);
-    case Format::AMR_NB:
-      TORCH_CHECK(
-          enc == Encoding::NOT_PROVIDED,
-          "amr-nb does not support `encoding` option.");
-      TORCH_CHECK(
-          bps == BitDepth::NOT_PROVIDED,
-          "amr-nb does not support `bits_per_sample` option.");
-      return std::make_tuple<>(SOX_ENCODING_AMR_NB, 16);
-    case Format::FLAC:
-      TORCH_CHECK(
-          enc == Encoding::NOT_PROVIDED,
-          "flac does not support `encoding` option.");
-      switch (bps) {
-        case BitDepth::B32:
-        case BitDepth::B64:
-          TORCH_CHECK(
-              false, "flac does not support `bits_per_sample` larger than 24.");
-        default:
-          return std::make_tuple<>(
-              SOX_ENCODING_FLAC, static_cast<unsigned>(bps));
-      }
-    case Format::SPHERE:
-      switch (enc) {
-        case Encoding::NOT_PROVIDED:
-        case Encoding::PCM_SIGNED:
-          switch (bps) {
-            case BitDepth::NOT_PROVIDED:
-              return std::make_tuple<>(SOX_ENCODING_SIGN2, 32);
-            default:
-              return std::make_tuple<>(
-                  SOX_ENCODING_SIGN2, static_cast<unsigned>(bps));
-          }
-        case Encoding::PCM_UNSIGNED:
-          TORCH_CHECK(false, "sph does not support unsigned integer PCM.");
-        case Encoding::PCM_FLOAT:
-          TORCH_CHECK(false, "sph does not support floating point PCM.");
-        case Encoding::ULAW:
-          switch (bps) {
-            case BitDepth::NOT_PROVIDED:
-            case BitDepth::B8:
-              return std::make_tuple<>(SOX_ENCODING_ULAW, 8);
-            default:
-              TORCH_CHECK(
-                  false, "sph only supports 8-bit for mu-law encoding.");
-          }
-        case Encoding::ALAW:
-          switch (bps) {
-            case BitDepth::NOT_PROVIDED:
-            case BitDepth::B8:
-              return std::make_tuple<>(SOX_ENCODING_ALAW, 8);
-            default:
-              return std::make_tuple<>(
-                  SOX_ENCODING_ALAW, static_cast<unsigned>(bps));
-          }
-        default:
-          TORCH_CHECK(
-              false, "sph does not support encoding: ", encoding.value());
-      }
-    case Format::GSM:
-      TORCH_CHECK(
-          enc == Encoding::NOT_PROVIDED,
-          "gsm does not support `encoding` option.");
-      TORCH_CHECK(
-          bps == BitDepth::NOT_PROVIDED,
-          "gsm does not support `bits_per_sample` option.");
-      return std::make_tuple<>(SOX_ENCODING_GSM, 16);
-
-    default:
-      TORCH_CHECK(false, "Unsupported format: " + format);
-  }
-}
-
-unsigned get_precision(const std::string& filetype, caffe2::TypeMeta dtype) {
-  if (filetype == "mp3") {
-    return SOX_UNSPEC;
-  }
-  if (filetype == "flac") {
-    return 24;
-  }
-  if (filetype == "ogg" || filetype == "vorbis") {
-    return SOX_UNSPEC;
-  }
-  if (filetype == "wav" || filetype == "amb") {
-    switch (dtype.toScalarType()) {
-      case c10::ScalarType::Byte:
-        return 8;
-      case c10::ScalarType::Short:
-        return 16;
-      case c10::ScalarType::Int:
-        return 32;
-      case c10::ScalarType::Float:
-        return 32;
-      default:
-        TORCH_CHECK(false, "Unsupported dtype: ", dtype);
-    }
-  }
-  if (filetype == "sph") {
-    return 32;
-  }
-  if (filetype == "amr-nb") {
-    return 16;
-  }
-  if (filetype == "gsm") {
-    return 16;
-  }
-  if (filetype == "htk") {
-    return 16;
-  }
-  TORCH_CHECK(false, "Unsupported file type: ", filetype);
-}
-
-} // namespace
-
-sox_signalinfo_t get_signalinfo(
-    const torch::Tensor* waveform,
-    const int64_t sample_rate,
-    const std::string& filetype,
-    const bool channels_first) {
-  return sox_signalinfo_t{
-      /*rate=*/static_cast<sox_rate_t>(sample_rate),
-      /*channels=*/
-      static_cast<unsigned>(waveform->size(channels_first ? 0 : 1)),
-      /*precision=*/get_precision(filetype, waveform->dtype()),
-      /*length=*/static_cast<uint64_t>(waveform->numel()),
-      nullptr};
-}
-
-sox_encodinginfo_t get_tensor_encodinginfo(caffe2::TypeMeta dtype) {
-  sox_encoding_t encoding = [&]() {
-    switch (dtype.toScalarType()) {
-      case c10::ScalarType::Byte:
-        return SOX_ENCODING_UNSIGNED;
-      case c10::ScalarType::Short:
-        return SOX_ENCODING_SIGN2;
-      case c10::ScalarType::Int:
-        return SOX_ENCODING_SIGN2;
-      case c10::ScalarType::Float:
-        return SOX_ENCODING_FLOAT;
-      default:
-        TORCH_CHECK(false, "Unsupported dtype: ", dtype);
-    }
-  }();
-  unsigned bits_per_sample = [&]() {
-    switch (dtype.toScalarType()) {
-      case c10::ScalarType::Byte:
-        return 8;
-      case c10::ScalarType::Short:
-        return 16;
-      case c10::ScalarType::Int:
-        return 32;
-      case c10::ScalarType::Float:
-        return 32;
-      default:
-        TORCH_CHECK(false, "Unsupported dtype: ", dtype);
-    }
-  }();
-  return sox_encodinginfo_t{
-      /*encoding=*/encoding,
-      /*bits_per_sample=*/bits_per_sample,
-      /*compression=*/HUGE_VAL,
-      /*reverse_bytes=*/sox_option_default,
-      /*reverse_nibbles=*/sox_option_default,
-      /*reverse_bits=*/sox_option_default,
-      /*opposite_endian=*/sox_false};
-}
-
-sox_encodinginfo_t get_encodinginfo_for_save(
-    const std::string& format,
-    const caffe2::TypeMeta& dtype,
-    const std::optional<double>& compression,
-    const std::optional<std::string>& encoding,
-    const std::optional<int64_t>& bits_per_sample) {
-  auto enc = get_save_encoding(format, dtype, encoding, bits_per_sample);
-  return sox_encodinginfo_t{
-      /*encoding=*/std::get<0>(enc),
-      /*bits_per_sample=*/std::get<1>(enc),
-      /*compression=*/compression.value_or(HUGE_VAL),
-      /*reverse_bytes=*/sox_option_default,
-      /*reverse_nibbles=*/sox_option_default,
-      /*reverse_bits=*/sox_option_default,
-      /*opposite_endian=*/sox_false};
-}
-
-} // namespace torchaudio::sox
diff --git a/src/libtorchaudio/sox/utils.h b/src/libtorchaudio/sox/utils.h
deleted file mode 100644
index b26e25f65e..0000000000
--- a/src/libtorchaudio/sox/utils.h
+++ /dev/null
@@ -1,112 +0,0 @@
-#ifndef TORCHAUDIO_SOX_UTILS_H
-#define TORCHAUDIO_SOX_UTILS_H
-
-#include <sox.h>
-#include <torch/types.h>
-
-namespace torchaudio::sox {
-
-////////////////////////////////////////////////////////////////////////////////
-// APIs for Python interaction
-////////////////////////////////////////////////////////////////////////////////
-
-/// Set sox global options
-void set_seed(const int64_t seed);
-
-void set_verbosity(const int64_t verbosity);
-
-void set_use_threads(const bool use_threads);
-
-void set_buffer_size(const int64_t buffer_size);
-
-int64_t get_buffer_size();
-
-std::vector<std::vector<std::string>> list_effects();
-
-std::vector<std::string> list_read_formats();
-
-std::vector<std::string> list_write_formats();
-
-////////////////////////////////////////////////////////////////////////////////
-// Utilities for sox_io / sox_effects implementations
-////////////////////////////////////////////////////////////////////////////////
-
-extern const std::unordered_set<std::string> UNSUPPORTED_EFFECTS;
-
-/// helper class to automatically close sox_format_t*
-struct SoxFormat {
-  explicit SoxFormat(sox_format_t* fd) noexcept;
-  SoxFormat(const SoxFormat& other) = delete;
-  SoxFormat(SoxFormat&& other) = delete;
-  SoxFormat& operator=(const SoxFormat& other) = delete;
-  SoxFormat& operator=(SoxFormat&& other) = delete;
-  ~SoxFormat();
-  sox_format_t* operator->() const noexcept;
-  operator sox_format_t*() const noexcept;
-
-  void close();
-
- private:
-  sox_format_t* fd_;
-};
-
-///
-/// Verify that input file is found, has known encoding, and not empty
-void validate_input_file(const SoxFormat& sf, const std::string& path);
-
-///
-/// Verify that input Tensor is 2D, CPU and either uin8, int16, int32 or float32
-void validate_input_tensor(const torch::Tensor&);
-
-///
-/// Get target dtype for the given encoding and precision.
-caffe2::TypeMeta get_dtype(
-    const sox_encoding_t encoding,
-    const unsigned precision);
-
-///
-/// Convert sox_sample_t buffer to uint8/int16/int32/float32 Tensor
-/// NOTE: This function might modify the values in the input buffer to
-/// reduce the number of memory copy.
-/// @param buffer Pointer to buffer that contains audio data.
-/// @param num_samples The number of samples to read.
-/// @param num_channels The number of channels. Used to reshape the resulting
-/// Tensor.
-/// @param dtype Target dtype. Determines the output dtype and value range in
-/// conjunction with normalization.
-/// @param noramlize Perform normalization. Only effective when dtype is not
-/// kFloat32. When effective, the output tensor is kFloat32 type and value range
-/// is [-1.0, 1.0]
-/// @param channels_first When True, output Tensor has shape of [num_channels,
-/// num_frames].
-torch::Tensor convert_to_tensor(
-    sox_sample_t* buffer,
-    const int32_t num_samples,
-    const int32_t num_channels,
-    const caffe2::TypeMeta dtype,
-    const bool normalize,
-    const bool channels_first);
-
-/// Extract extension from file path
-const std::string get_filetype(const std::string& path);
-
-/// Get sox_signalinfo_t for passing a torch::Tensor object.
-sox_signalinfo_t get_signalinfo(
-    const torch::Tensor* waveform,
-    const int64_t sample_rate,
-    const std::string& filetype,
-    const bool channels_first);
-
-/// Get sox_encodinginfo_t for Tensor I/O
-sox_encodinginfo_t get_tensor_encodinginfo(const caffe2::TypeMeta dtype);
-
-/// Get sox_encodinginfo_t for saving to file/file object
-sox_encodinginfo_t get_encodinginfo_for_save(
-    const std::string& format,
-    const caffe2::TypeMeta& dtype,
-    const std::optional<double>& compression,
-    const std::optional<std::string>& encoding,
-    const std::optional<int64_t>& bits_per_sample);
-
-} // namespace torchaudio::sox
-#endif
diff --git a/src/torchaudio/__init__.py b/src/torchaudio/__init__.py
index f57572e5c8..f21454226c 100644
--- a/src/torchaudio/__init__.py
+++ b/src/torchaudio/__init__.py
@@ -15,7 +15,6 @@
     kaldi_io,
     models,
     pipelines,
-    sox_effects,
     transforms,
     utils,
 )
@@ -205,6 +204,5 @@ def save(
     "pipelines",
     "kaldi_io",
     "utils",
-    "sox_effects",
     "transforms",
 ]
diff --git a/src/torchaudio/_extension/__init__.py b/src/torchaudio/_extension/__init__.py
index 5c2ff55583..11f7c6deec 100644
--- a/src/torchaudio/_extension/__init__.py
+++ b/src/torchaudio/_extension/__init__.py
@@ -4,7 +4,7 @@
 
 from torchaudio._internal.module_utils import fail_with_message, is_module_available, no_op
 
-from .utils import _check_cuda_version, _init_dll_path, _init_sox, _LazyImporter, _load_lib
+from .utils import _check_cuda_version, _init_dll_path, _LazyImporter, _load_lib
 
 _LG = logging.getLogger(__name__)
 
@@ -17,7 +17,6 @@
     "_check_cuda_version",
     "_IS_TORCHAUDIO_EXT_AVAILABLE",
     "_IS_RIR_AVAILABLE",
-    "lazy_import_sox_ext",
 ]
 
 
@@ -44,17 +43,6 @@
     _IS_ALIGN_AVAILABLE = torchaudio.lib._torchaudio.is_align_available()
 
 
-_SOX_EXT = None
-
-
-def lazy_import_sox_ext():
-    """Load SoX integration based on availability in lazy manner"""
-
-    global _SOX_EXT
-    if _SOX_EXT is None:
-        _SOX_EXT = _LazyImporter("_torchaudio_sox", _init_sox)
-    return _SOX_EXT
-
 
 fail_if_no_rir = (
     no_op
diff --git a/src/torchaudio/_extension/utils.py b/src/torchaudio/_extension/utils.py
index c5660a1e22..1cbe3d93e5 100644
--- a/src/torchaudio/_extension/utils.py
+++ b/src/torchaudio/_extension/utils.py
@@ -61,51 +61,6 @@ def _load_lib(lib: str) -> bool:
     return True
 
 
-def _import_sox_ext():
-    if os.name == "nt":
-        raise RuntimeError("sox extension is not supported on Windows")
-    if not eval_env("TORCHAUDIO_USE_SOX", True):
-        raise RuntimeError("sox extension is disabled. (TORCHAUDIO_USE_SOX=0)")
-
-    ext = "torchaudio.lib._torchaudio_sox"
-
-    if not importlib.util.find_spec(ext):
-        raise RuntimeError(
-            # fmt: off
-            "TorchAudio is not built with sox extension. "
-            "Please build TorchAudio with libsox support. (BUILD_SOX=1)"
-            # fmt: on
-        )
-
-    _load_lib("libtorchaudio_sox")
-    return importlib.import_module(ext)
-
-
-def _init_sox():
-    ext = _import_sox_ext()
-    ext.set_verbosity(0)
-
-    import atexit
-
-    torch.ops.torchaudio_sox.initialize_sox_effects()
-    atexit.register(torch.ops.torchaudio_sox.shutdown_sox_effects)
-
-    # Bundle functions registered with TORCH_LIBRARY into extension
-    # so that they can also be accessed in the same (lazy) manner
-    # from the extension.
-    keys = [
-        "get_info",
-        "load_audio_file",
-        "save_audio_file",
-        "apply_effects_tensor",
-        "apply_effects_file",
-    ]
-    for key in keys:
-        setattr(ext, key, getattr(torch.ops.torchaudio_sox, key))
-
-    return ext
-
-
 class _LazyImporter(types.ModuleType):
     """Lazily import module/extension."""
 
diff --git a/src/torchaudio/_internal/module_utils.py b/src/torchaudio/_internal/module_utils.py
index 45956cb175..2201055954 100644
--- a/src/torchaudio/_internal/module_utils.py
+++ b/src/torchaudio/_internal/module_utils.py
@@ -97,10 +97,6 @@ def decorator(func):
     {func.__doc__}
     """
 
-        # This is a temporary fix to avoid depending on sox during testing.
-        # It will be removed once the sox dependency is removed from the rest of the codebase.
-        if 'sox' not in func.__module__:
-            UNSUPPORTED.append(wrapped)
         return wrapped
 
     return decorator
diff --git a/src/torchaudio/sox_effects/__init__.py b/src/torchaudio/sox_effects/__init__.py
deleted file mode 100644
index 93c63cae1d..0000000000
--- a/src/torchaudio/sox_effects/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from .sox_effects import apply_effects_file, apply_effects_tensor, effect_names, init_sox_effects, shutdown_sox_effects
-
-
-__all__ = [
-    "init_sox_effects",
-    "shutdown_sox_effects",
-    "effect_names",
-    "apply_effects_tensor",
-    "apply_effects_file",
-]
diff --git a/src/torchaudio/sox_effects/sox_effects.py b/src/torchaudio/sox_effects/sox_effects.py
deleted file mode 100644
index 256c461edc..0000000000
--- a/src/torchaudio/sox_effects/sox_effects.py
+++ /dev/null
@@ -1,275 +0,0 @@
-import os
-from typing import List, Optional, Tuple
-
-import torch
-import torchaudio
-from torchaudio._internal.module_utils import deprecated, dropping_support
-from torchaudio.utils.sox_utils import list_effects
-
-
-sox_ext = torchaudio._extension.lazy_import_sox_ext()
-
-
-@deprecated("Please remove the call. This function is called automatically.")
-def init_sox_effects():
-    """Initialize resources required to use sox effects.
-
-    Note:
-        You do not need to call this function manually. It is called automatically.
-
-    Once initialized, you do not need to call this function again across the multiple uses of
-    sox effects though it is safe to do so as long as :func:`shutdown_sox_effects` is not called yet.
-    Once :func:`shutdown_sox_effects` is called, you can no longer use SoX effects and initializing
-    again will result in error.
-    """
-    pass
-
-
-@deprecated("Please remove the call. This function is called automatically.")
-def shutdown_sox_effects():
-    """Clean up resources required to use sox effects.
-
-    Note:
-        You do not need to call this function manually. It is called automatically.
-
-    It is safe to call this function multiple times.
-    Once :py:func:`shutdown_sox_effects` is called, you can no longer use SoX effects and
-    initializing again will result in error.
-    """
-    pass
-
-
-@dropping_support
-def effect_names() -> List[str]:
-    """Gets list of valid sox effect names
-
-    Returns:
-        List[str]: list of available effect names.
-
-    Example
-        >>> torchaudio.sox_effects.effect_names()
-        ['allpass', 'band', 'bandpass', ... ]
-    """
-    return list(list_effects().keys())
-
-
-@dropping_support
-def apply_effects_tensor(
-    tensor: torch.Tensor,
-    sample_rate: int,
-    effects: List[List[str]],
-    channels_first: bool = True,
-) -> Tuple[torch.Tensor, int]:
-    """Apply sox effects to given Tensor
-
-    .. devices:: CPU
-
-    .. properties:: TorchScript
-
-    Note:
-        This function only works on CPU Tensors.
-        This function works in the way very similar to ``sox`` command, however there are slight
-        differences. For example, ``sox`` command adds certain effects automatically (such as
-        ``rate`` effect after ``speed`` and ``pitch`` and other effects), but this function does
-        only applies the given effects. (Therefore, to actually apply ``speed`` effect, you also
-        need to give ``rate`` effect with desired sampling rate.).
-
-    Args:
-        tensor (torch.Tensor): Input 2D CPU Tensor.
-        sample_rate (int): Sample rate
-        effects (List[List[str]]): List of effects.
-        channels_first (bool, optional): Indicates if the input Tensor's dimension is
-            `[channels, time]` or `[time, channels]`
-
-    Returns:
-        (Tensor, int): Resulting Tensor and sample rate.
-        The resulting Tensor has the same ``dtype`` as the input Tensor, and
-        the same channels order. The shape of the Tensor can be different based on the
-        effects applied. Sample rate can also be different based on the effects applied.
-
-    Example - Basic usage
-        >>>
-        >>> # Defines the effects to apply
-        >>> effects = [
-        ...     ['gain', '-n'],  # normalises to 0dB
-        ...     ['pitch', '5'],  # 5 cent pitch shift
-        ...     ['rate', '8000'],  # resample to 8000 Hz
-        ... ]
-        >>>
-        >>> # Generate pseudo wave:
-        >>> # normalized, channels first, 2ch, sampling rate 16000, 1 second
-        >>> sample_rate = 16000
-        >>> waveform = 2 * torch.rand([2, sample_rate * 1]) - 1
-        >>> waveform.shape
-        torch.Size([2, 16000])
-        >>> waveform
-        tensor([[ 0.3138,  0.7620, -0.9019,  ..., -0.7495, -0.4935,  0.5442],
-                [-0.0832,  0.0061,  0.8233,  ..., -0.5176, -0.9140, -0.2434]])
-        >>>
-        >>> # Apply effects
-        >>> waveform, sample_rate = apply_effects_tensor(
-        ...     wave_form, sample_rate, effects, channels_first=True)
-        >>>
-        >>> # Check the result
-        >>> # The new waveform is sampling rate 8000, 1 second.
-        >>> # normalization and channel order are preserved
-        >>> waveform.shape
-        torch.Size([2, 8000])
-        >>> waveform
-        tensor([[ 0.5054, -0.5518, -0.4800,  ..., -0.0076,  0.0096, -0.0110],
-                [ 0.1331,  0.0436, -0.3783,  ..., -0.0035,  0.0012,  0.0008]])
-        >>> sample_rate
-        8000
-
-    Example - Torchscript-able transform
-        >>>
-        >>> # Use `apply_effects_tensor` in `torch.nn.Module` and dump it to file,
-        >>> # then run sox effect via Torchscript runtime.
-        >>>
-        >>> class SoxEffectTransform(torch.nn.Module):
-        ...     effects: List[List[str]]
-        ...
-        ...     def __init__(self, effects: List[List[str]]):
-        ...         super().__init__()
-        ...         self.effects = effects
-        ...
-        ...     def forward(self, tensor: torch.Tensor, sample_rate: int):
-        ...         return sox_effects.apply_effects_tensor(
-        ...             tensor, sample_rate, self.effects)
-        ...
-        ...
-        >>> # Create transform object
-        >>> effects = [
-        ...     ["lowpass", "-1", "300"],  # apply single-pole lowpass filter
-        ...     ["rate", "8000"],  # change sample rate to 8000
-        ... ]
-        >>> transform = SoxEffectTensorTransform(effects, input_sample_rate)
-        >>>
-        >>> # Dump it to file and load
-        >>> path = 'sox_effect.zip'
-        >>> torch.jit.script(trans).save(path)
-        >>> transform = torch.jit.load(path)
-        >>>
-        >>>> # Run transform
-        >>> waveform, input_sample_rate = torchaudio.load("input.wav")
-        >>> waveform, sample_rate = transform(waveform, input_sample_rate)
-        >>> assert sample_rate == 8000
-    """
-    return sox_ext.apply_effects_tensor(tensor, sample_rate, effects, channels_first)
-
-
-@dropping_support
-def apply_effects_file(
-    path: str,
-    effects: List[List[str]],
-    normalize: bool = True,
-    channels_first: bool = True,
-    format: Optional[str] = None,
-) -> Tuple[torch.Tensor, int]:
-    """Apply sox effects to the audio file and load the resulting data as Tensor
-
-    .. devices:: CPU
-
-    .. properties:: TorchScript
-
-    Note:
-        This function works in the way very similar to ``sox`` command, however there are slight
-        differences. For example, ``sox`` commnad adds certain effects automatically (such as
-        ``rate`` effect after ``speed``, ``pitch`` etc), but this function only applies the given
-        effects. Therefore, to actually apply ``speed`` effect, you also need to give ``rate``
-        effect with desired sampling rate, because internally, ``speed`` effects only alter sampling
-        rate and leave samples untouched.
-
-    Args:
-        path (path-like object):
-            Source of audio data.
-        effects (List[List[str]]): List of effects.
-        normalize (bool, optional):
-            When ``True``, this function converts the native sample type to ``float32``.
-            Default: ``True``.
-
-            If input file is integer WAV, giving ``False`` will change the resulting Tensor type to
-            integer type.
-            This argument has no effect for formats other than integer WAV type.
-
-        channels_first (bool, optional): When True, the returned Tensor has dimension `[channel, time]`.
-            Otherwise, the returned Tensor's dimension is `[time, channel]`.
-        format (str or None, optional):
-            Override the format detection with the given format.
-            Providing the argument might help when libsox can not infer the format
-            from header or extension,
-
-    Returns:
-        (Tensor, int): Resulting Tensor and sample rate.
-        If ``normalize=True``, the resulting Tensor is always ``float32`` type.
-        If ``normalize=False`` and the input audio file is of integer WAV file, then the
-        resulting Tensor has corresponding integer type. (Note 24 bit integer type is not supported)
-        If ``channels_first=True``, the resulting Tensor has dimension `[channel, time]`,
-        otherwise `[time, channel]`.
-
-    Example - Basic usage
-        >>>
-        >>> # Defines the effects to apply
-        >>> effects = [
-        ...     ['gain', '-n'],  # normalises to 0dB
-        ...     ['pitch', '5'],  # 5 cent pitch shift
-        ...     ['rate', '8000'],  # resample to 8000 Hz
-        ... ]
-        >>>
-        >>> # Apply effects and load data with channels_first=True
-        >>> waveform, sample_rate = apply_effects_file("data.wav", effects, channels_first=True)
-        >>>
-        >>> # Check the result
-        >>> waveform.shape
-        torch.Size([2, 8000])
-        >>> waveform
-        tensor([[ 5.1151e-03,  1.8073e-02,  2.2188e-02,  ...,  1.0431e-07,
-                 -1.4761e-07,  1.8114e-07],
-                [-2.6924e-03,  2.1860e-03,  1.0650e-02,  ...,  6.4122e-07,
-                 -5.6159e-07,  4.8103e-07]])
-        >>> sample_rate
-        8000
-
-    Example - Apply random speed perturbation to dataset
-        >>>
-        >>> # Load data from file, apply random speed perturbation
-        >>> class RandomPerturbationFile(torch.utils.data.Dataset):
-        ...     \"\"\"Given flist, apply random speed perturbation
-        ...
-        ...     Suppose all the input files are at least one second long.
-        ...     \"\"\"
-        ...     def __init__(self, flist: List[str], sample_rate: int):
-        ...         super().__init__()
-        ...         self.flist = flist
-        ...         self.sample_rate = sample_rate
-        ...
-        ...     def __getitem__(self, index):
-        ...         speed = 0.5 + 1.5 * random.randn()
-        ...         effects = [
-        ...             ['gain', '-n', '-10'],  # apply 10 db attenuation
-        ...             ['remix', '-'],  # merge all the channels
-        ...             ['speed', f'{speed:.5f}'],  # duration is now 0.5 ~ 2.0 seconds.
-        ...             ['rate', f'{self.sample_rate}'],
-        ...             ['pad', '0', '1.5'],  # add 1.5 seconds silence at the end
-        ...             ['trim', '0', '2'],  # get the first 2 seconds
-        ...         ]
-        ...         waveform, _ = torchaudio.sox_effects.apply_effects_file(
-        ...             self.flist[index], effects)
-        ...         return waveform
-        ...
-        ...     def __len__(self):
-        ...         return len(self.flist)
-        ...
-        >>> dataset = RandomPerturbationFile(file_list, sample_rate=8000)
-        >>> loader = torch.utils.data.DataLoader(dataset, batch_size=32)
-        >>> for batch in loader:
-        >>>     pass
-    """
-    if not torch.jit.is_scripting():
-        if hasattr(path, "read"):
-            raise RuntimeError(
-                "apply_effects_file function does not support file-like object. "
-                "Please use torchaudio.io.AudioEffector."
-            )
-        path = os.fspath(path)
-    return sox_ext.apply_effects_file(path, effects, normalize, channels_first, format)
diff --git a/src/torchaudio/utils/__init__.py b/src/torchaudio/utils/__init__.py
index 9d4dd2dd72..be1f0bad21 100644
--- a/src/torchaudio/utils/__init__.py
+++ b/src/torchaudio/utils/__init__.py
@@ -1,10 +1,8 @@
 from torio.utils import ffmpeg_utils
 
-from . import sox_utils
 from .download import _download_asset
 
 
 __all__ = [
-    "sox_utils",
     "ffmpeg_utils",
 ]
diff --git a/src/torchaudio/utils/sox_utils.py b/src/torchaudio/utils/sox_utils.py
deleted file mode 100644
index 8cc68361d5..0000000000
--- a/src/torchaudio/utils/sox_utils.py
+++ /dev/null
@@ -1,118 +0,0 @@
-"""Module to change the configuration of libsox, which is used by I/O functions like
-:py:mod:`~torchaudio.backend.sox_io_backend` and :py:mod:`~torchaudio.sox_effects`.
-
-.. warning::
-    Starting with version 2.8, we are refactoring TorchAudio to transition it
-    into a maintenance phase. As a result:
-
-    - Some APIs are deprecated in 2.8 and will be removed in 2.9.
-    - The decoding and encoding capabilities of PyTorch for both audio and video
-      are being consolidated into TorchCodec.
-
-    Please see https://github.com/pytorch/audio/issues/3902 for more information.
-"""
-
-from typing import Dict, List
-
-import torchaudio
-
-sox_ext = torchaudio._extension.lazy_import_sox_ext()
-
-from torchaudio._internal.module_utils import dropping_support
-
-@dropping_support
-def set_seed(seed: int):
-    """Set libsox's PRNG
-
-    Args:
-        seed (int): seed value. valid range is int32.
-
-    See Also:
-        http://sox.sourceforge.net/sox.html
-    """
-    sox_ext.set_seed(seed)
-
-
-@dropping_support
-def set_verbosity(verbosity: int):
-    """Set libsox's verbosity
-
-    Args:
-        verbosity (int): Set verbosity level of libsox.
-
-            * ``1`` failure messages
-            * ``2`` warnings
-            * ``3`` details of processing
-            * ``4``-``6`` increasing levels of debug messages
-
-    See Also:
-        http://sox.sourceforge.net/sox.html
-    """
-    sox_ext.set_verbosity(verbosity)
-
-
-@dropping_support
-def set_buffer_size(buffer_size: int):
-    """Set buffer size for sox effect chain
-
-    Args:
-        buffer_size (int): Set the size in bytes of the buffers used for processing audio.
-
-    See Also:
-        http://sox.sourceforge.net/sox.html
-    """
-    sox_ext.set_buffer_size(buffer_size)
-
-
-@dropping_support
-def set_use_threads(use_threads: bool):
-    """Set multithread option for sox effect chain
-
-    Args:
-        use_threads (bool): When ``True``, enables ``libsox``'s parallel effects channels processing.
-            To use mutlithread, the underlying ``libsox`` has to be compiled with OpenMP support.
-
-    See Also:
-        http://sox.sourceforge.net/sox.html
-    """
-    sox_ext.set_use_threads(use_threads)
-
-
-@dropping_support
-def list_effects() -> Dict[str, str]:
-    """List the available sox effect names
-
-    Returns:
-        Dict[str, str]: Mapping from ``effect name`` to ``usage``
-    """
-    return dict(sox_ext.list_effects())
-
-
-@dropping_support
-def list_read_formats() -> List[str]:
-    """List the supported audio formats for read
-
-    Returns:
-        List[str]: List of supported audio formats
-    """
-    return sox_ext.list_read_formats()
-
-
-@dropping_support
-def list_write_formats() -> List[str]:
-    """List the supported audio formats for write
-
-    Returns:
-        List[str]: List of supported audio formats
-    """
-    return sox_ext.list_write_formats()
-
-
-@dropping_support
-def get_buffer_size() -> int:
-    """Get buffer size for sox effect chain
-
-    Returns:
-        int: size in bytes of buffers used for processing audio.
-    """
-    return sox_ext.get_buffer_size()
diff --git a/test/torchaudio_unittest/common_utils/case_utils.py b/test/torchaudio_unittest/common_utils/case_utils.py
index b99b96f5b0..7ce9c89dd3 100644
--- a/test/torchaudio_unittest/common_utils/case_utils.py
+++ b/test/torchaudio_unittest/common_utils/case_utils.py
@@ -109,7 +109,6 @@ class TorchaudioTestCase(TestBaseMixin, PytorchTestCase):
 
 
 _IS_FFMPEG_AVAILABLE = torio._extension.lazy_import_ffmpeg_ext().is_available()
-_IS_SOX_AVAILABLE = torchaudio._extension.lazy_import_sox_ext().is_available()
 _IS_CTC_DECODER_AVAILABLE = None
 _IS_CUDA_CTC_DECODER_AVAILABLE = None
 
diff --git a/third_party/sox/CMakeLists.txt b/third_party/sox/CMakeLists.txt
deleted file mode 100644
index db96f05faf..0000000000
--- a/third_party/sox/CMakeLists.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-include(FetchContent)
-
-FetchContent_Declare(
-  sox_src
-  URL https://downloads.sourceforge.net/project/sox/sox/14.4.2/sox-14.4.2.tar.bz2
-  URL_HASH SHA256=81a6956d4330e75b5827316e44ae381e6f1e8928003c6aa45896da9041ea149c
-  PATCH_COMMAND ""
-  CONFIGURE_COMMAND ""
-  BUILD_COMMAND ""
-  )
-# FetchContent_MakeAvailable will parse the downloaded content and setup the targets.
-# We want to only download and not build, so we run Populate manually.
-if(NOT sox_src_POPULATED)
-  FetchContent_Populate(sox_src)
-endif()
-
-add_library(sox SHARED stub.c)
-if(APPLE)
-  set_target_properties(sox PROPERTIES SUFFIX .dylib)
-endif(APPLE)
-target_include_directories(sox PUBLIC ${sox_src_SOURCE_DIR}/src)
diff --git a/third_party/sox/stub.c b/third_party/sox/stub.c
deleted file mode 100644
index 4e668caf37..0000000000
--- a/third_party/sox/stub.c
+++ /dev/null
@@ -1,85 +0,0 @@
-#include <sox.h>
-
-int sox_add_effect(
-    sox_effects_chain_t* chain,
-    sox_effect_t* effp,
-    sox_signalinfo_t* in,
-    sox_signalinfo_t const* out) {
-  return -1;
-}
-int sox_close(sox_format_t* ft) {
-  return -1;
-}
-
-sox_effect_t* sox_create_effect(sox_effect_handler_t const* eh) {
-  return NULL;
-}
-
-sox_effects_chain_t* sox_create_effects_chain(
-    sox_encodinginfo_t const* in_enc,
-    sox_encodinginfo_t const* out_enc) {
-  return NULL;
-}
-
-void sox_delete_effect(sox_effect_t* effp) {}
-void sox_delete_effects_chain(sox_effects_chain_t* ecp) {}
-
-int sox_effect_options(sox_effect_t* effp, int argc, char* const argv[]) {
-  return -1;
-}
-
-const sox_effect_handler_t* sox_find_effect(char const* name) {
-  return NULL;
-}
-
-int sox_flow_effects(
-    sox_effects_chain_t* chain,
-    int callback(sox_bool all_done, void* client_data),
-    void* client_data) {
-  return -1;
-}
-
-const sox_effect_fn_t* sox_get_effect_fns(void) {
-  return NULL;
-}
-
-const sox_format_tab_t* sox_get_format_fns(void) {
-  return NULL;
-}
-
-sox_globals_t* sox_get_globals(void) {
-  return NULL;
-}
-
-sox_format_t* sox_open_read(
-    char const* path,
-    sox_signalinfo_t const* signal,
-    sox_encodinginfo_t const* encoding,
-    char const* filetype) {
-  return NULL;
-}
-
-sox_format_t* sox_open_write(
-    char const* path,
-    sox_signalinfo_t const* signal,
-    sox_encodinginfo_t const* encoding,
-    char const* filetype,
-    sox_oob_t const* oob,
-    sox_bool overwrite_permitted(char const* filename)) {
-  return NULL;
-}
-
-const char* sox_strerror(int sox_errno) {
-  return NULL;
-}
-
-size_t sox_write(sox_format_t* ft, const sox_sample_t* buf, size_t len) {
-  return 0;
-}
-
-int sox_init() {
-  return -1;
-};
-int sox_quit() {
-  return -1;
-};
diff --git a/tools/setup_helpers/extension.py b/tools/setup_helpers/extension.py
index 58f5087854..b322541e36 100644
--- a/tools/setup_helpers/extension.py
+++ b/tools/setup_helpers/extension.py
@@ -51,13 +51,6 @@ def get_ext_modules():
         Extension(name="torchaudio.lib.libtorchaudio", sources=[]),
         Extension(name="torchaudio.lib._torchaudio", sources=[]),
     ]
-    if _BUILD_SOX:
-        modules.extend(
-            [
-                Extension(name="torchaudio.lib.libtorchaudio_sox", sources=[]),
-                Extension(name="torchaudio.lib._torchaudio_sox", sources=[]),
-            ]
-        )
     if _BUILD_CUDA_CTC_DECODER:
         modules.extend(
             [