diff --git a/CMakeLists.txt b/CMakeLists.txt
index ddc6dc15a2..6fada209fe 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -166,10 +166,6 @@ else()
 endif()
 
 add_subdirectory(src/libtorchaudio)
-if (BUILD_SOX)
-  add_subdirectory(third_party/sox)
-  add_subdirectory(src/libtorchaudio/sox)
-endif()
 if (USE_FFMPEG)
   if (DEFINED ENV{FFMPEG_ROOT})
     add_subdirectory(third_party/ffmpeg/single)
diff --git a/docs/source/functional.rst b/docs/source/functional.rst
index f58a6730b8..158ae54869 100644
--- a/docs/source/functional.rst
+++ b/docs/source/functional.rst
@@ -23,7 +23,6 @@ Utility
    mask_along_axis_iid
    mu_law_encoding
    mu_law_decoding
-   apply_codec
    resample
    loudness
    convolve
diff --git a/docs/source/sox_effects.rst b/docs/source/sox_effects.rst
deleted file mode 100644
index a8ee260144..0000000000
--- a/docs/source/sox_effects.rst
+++ /dev/null
@@ -1,34 +0,0 @@
-.. py:module:: torchaudio.sox_effects
-
-torchaudio.sox_effects
-======================
-
-.. currentmodule:: torchaudio.sox_effects
-
-.. warning::
-    Starting with version 2.8, we are refactoring TorchAudio to transition it
-    into a maintenance phase. As a result, the ``sox_effect`` module is
-    deprecated in 2.8 and will be removed in 2.9.
-
-Applying effects
-----------------
-
-Apply SoX effects chain on torch.Tensor or on file and load as torch.Tensor.
-
-.. autosummary::
-   :toctree: generated
-   :nosignatures:
-
-   apply_effects_tensor
-   apply_effects_file
-
-.. minigallery:: torchaudio.sox_effects.apply_effects_tensor
-   
-Utilities
----------
-
-.. autosummary::
-   :toctree: generated
-   :nosignatures:
-
-   effect_names
diff --git a/docs/source/torchaudio.rst b/docs/source/torchaudio.rst
index aa933e84ad..629ffd312a 100644
--- a/docs/source/torchaudio.rst
+++ b/docs/source/torchaudio.rst
@@ -78,14 +78,6 @@ The following table summarizes the backends.
        to retrieve the supported codecs.
 
        This backend Supports various protocols, such as HTTPS and MP4, and file-like objects.
-   * - 2
-     - SoX
-     - Linux, macOS
-     - Use :py:func:`~torchaudio.utils.sox_utils.list_read_formats` and
-       :py:func:`~torchaudio.utils.sox_utils.list_write_formats`
-       to retrieve the supported codecs.
-
-       This backend does *not* support file-like objects.
    * - 3
      - SoundFile
      - Linux, macOS, Windows
diff --git a/docs/source/utils.rst b/docs/source/utils.rst
index af42445765..70d29f3093 100644
--- a/docs/source/utils.rst
+++ b/docs/source/utils.rst
@@ -8,7 +8,7 @@ torchaudio.utils
 .. warning::
     Starting with version 2.8, we are refactoring TorchAudio to transition it
     into a maintenance phase. As a result:
-    - ``sox_utils`` and `ffmpeg_utils`` are deprecated in 2.8 and will be removed in 2.9.
+    - ``ffmpeg_utils`` are deprecated in 2.8 and will be removed in 2.9.
     - The decoding and encoding capabilities of PyTorch for both audio and video
       are being consolidated into TorchCodec.
     Please see https://github.com/pytorch/audio/issues/3902 for more information.
diff --git a/examples/libtorchaudio/CMakeLists.txt b/examples/libtorchaudio/CMakeLists.txt
index b4cf58b375..e540f88044 100644
--- a/examples/libtorchaudio/CMakeLists.txt
+++ b/examples/libtorchaudio/CMakeLists.txt
@@ -2,8 +2,6 @@ cmake_minimum_required(VERSION 3.5)
 
 project(libtorchaudio-cpp-example)
 
-SET(BUILD_SOX ON CACHE BOOL "Build libsox into libtorchaudio")
-
 SET(BUILD_KALDI OFF CACHE BOOL "Build Kaldi into libtorchaudio")
 SET(BUILD_RNNT ON CACHE BOOL "Build RNN transducer into libtorchaudio")
 SET(BUILD_TORCHAUDIO_PYTHON_EXTENSION OFF CACHE BOOL "Build Python binding")
diff --git a/src/libtorchaudio/sox/CMakeLists.txt b/src/libtorchaudio/sox/CMakeLists.txt
deleted file mode 100644
index 5ffe782c82..0000000000
--- a/src/libtorchaudio/sox/CMakeLists.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-set(
-  sources
-  io.cpp
-  utils.cpp
-  effects.cpp
-  effects_chain.cpp
-  types.cpp
-  )
-torchaudio_library(
-  libtorchaudio_sox
-  "${sources}"
-  ""
-  "torch;sox"
-  ""
-  )
-
-if (BUILD_TORCHAUDIO_PYTHON_EXTENSION)
-  torchaudio_extension(
-    _torchaudio_sox
-    "pybind/pybind.cpp;"
-    ""
-    "libtorchaudio_sox"
-    ""
-    )
-endif()
diff --git a/src/libtorchaudio/sox/effects.cpp b/src/libtorchaudio/sox/effects.cpp
deleted file mode 100644
index 947c04e3fc..0000000000
--- a/src/libtorchaudio/sox/effects.cpp
+++ /dev/null
@@ -1,133 +0,0 @@
-#include <libtorchaudio/sox/effects.h>
-#include <libtorchaudio/sox/effects_chain.h>
-#include <libtorchaudio/sox/utils.h>
-#include <sox.h>
-
-namespace torchaudio::sox {
-namespace {
-
-enum SoxEffectsResourceState { NotInitialized, Initialized, ShutDown };
-SoxEffectsResourceState SOX_RESOURCE_STATE = NotInitialized;
-std::mutex SOX_RESOUCE_STATE_MUTEX;
-
-} // namespace
-
-void initialize_sox_effects() {
-  const std::lock_guard<std::mutex> lock(SOX_RESOUCE_STATE_MUTEX);
-
-  switch (SOX_RESOURCE_STATE) {
-    case NotInitialized:
-      TORCH_CHECK(
-          sox_init() == SOX_SUCCESS, "Failed to initialize sox effects.");
-      SOX_RESOURCE_STATE = Initialized;
-      break;
-    case Initialized:
-      break;
-    case ShutDown:
-      TORCH_CHECK(
-          false, "SoX Effects has been shut down. Cannot initialize again.");
-  }
-};
-
-void shutdown_sox_effects() {
-  const std::lock_guard<std::mutex> lock(SOX_RESOUCE_STATE_MUTEX);
-
-  switch (SOX_RESOURCE_STATE) {
-    case NotInitialized:
-      TORCH_CHECK(false, "SoX Effects is not initialized. Cannot shutdown.");
-    case Initialized:
-      TORCH_CHECK(
-          sox_quit() == SOX_SUCCESS, "Failed to initialize sox effects.");
-      SOX_RESOURCE_STATE = ShutDown;
-      break;
-    case ShutDown:
-      break;
-  }
-}
-
-auto apply_effects_tensor(
-    torch::Tensor waveform,
-    int64_t sample_rate,
-    const std::vector<std::vector<std::string>>& effects,
-    bool channels_first) -> std::tuple<torch::Tensor, int64_t> {
-  validate_input_tensor(waveform);
-
-  // Create SoxEffectsChain
-  const auto dtype = waveform.dtype();
-  SoxEffectsChain chain(
-      /*input_encoding=*/get_tensor_encodinginfo(dtype),
-      /*output_encoding=*/get_tensor_encodinginfo(dtype));
-
-  // Prepare output buffer
-  std::vector<sox_sample_t> out_buffer;
-  out_buffer.reserve(waveform.numel());
-
-  // Build and run effects chain
-  chain.addInputTensor(&waveform, sample_rate, channels_first);
-  for (const auto& effect : effects) {
-    chain.addEffect(effect);
-  }
-  chain.addOutputBuffer(&out_buffer);
-  chain.run();
-
-  // Create tensor from buffer
-  auto out_tensor = convert_to_tensor(
-      /*buffer=*/out_buffer.data(),
-      /*num_samples=*/out_buffer.size(),
-      /*num_channels=*/chain.getOutputNumChannels(),
-      dtype,
-      /*normalize=*/false,
-      channels_first);
-
-  return std::tuple<torch::Tensor, int64_t>(
-      out_tensor, chain.getOutputSampleRate());
-}
-
-auto apply_effects_file(
-    const std::string& path,
-    const std::vector<std::vector<std::string>>& effects,
-    std::optional<bool> normalize,
-    std::optional<bool> channels_first,
-    const std::optional<std::string>& format)
-    -> std::tuple<torch::Tensor, int64_t> {
-  // Open input file
-  SoxFormat sf(sox_open_read(
-      path.c_str(),
-      /*signal=*/nullptr,
-      /*encoding=*/nullptr,
-      /*filetype=*/format.has_value() ? format.value().c_str() : nullptr));
-
-  validate_input_file(sf, path);
-
-  const auto dtype = get_dtype(sf->encoding.encoding, sf->signal.precision);
-
-  // Prepare output
-  std::vector<sox_sample_t> out_buffer;
-  out_buffer.reserve(sf->signal.length);
-
-  // Create and run SoxEffectsChain
-  SoxEffectsChain chain(
-      /*input_encoding=*/sf->encoding,
-      /*output_encoding=*/get_tensor_encodinginfo(dtype));
-
-  chain.addInputFile(sf);
-  for (const auto& effect : effects) {
-    chain.addEffect(effect);
-  }
-  chain.addOutputBuffer(&out_buffer);
-  chain.run();
-
-  // Create tensor from buffer
-  bool channels_first_ = channels_first.value_or(true);
-  auto tensor = convert_to_tensor(
-      /*buffer=*/out_buffer.data(),
-      /*num_samples=*/out_buffer.size(),
-      /*num_channels=*/chain.getOutputNumChannels(),
-      dtype,
-      normalize.value_or(true),
-      channels_first_);
-
-  return std::tuple<torch::Tensor, int64_t>(
-      tensor, chain.getOutputSampleRate());
-}
-} // namespace torchaudio::sox
diff --git a/src/libtorchaudio/sox/effects.h b/src/libtorchaudio/sox/effects.h
deleted file mode 100644
index 8b56427c1e..0000000000
--- a/src/libtorchaudio/sox/effects.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef TORCHAUDIO_SOX_EFFECTS_H
-#define TORCHAUDIO_SOX_EFFECTS_H
-
-#include <libtorchaudio/sox/utils.h>
-#include <torch/script.h>
-
-namespace torchaudio::sox {
-
-void initialize_sox_effects();
-
-void shutdown_sox_effects();
-
-auto apply_effects_tensor(
-    torch::Tensor waveform,
-    int64_t sample_rate,
-    const std::vector<std::vector<std::string>>& effects,
-    bool channels_first) -> std::tuple<torch::Tensor, int64_t>;
-
-auto apply_effects_file(
-    const std::string& path,
-    const std::vector<std::vector<std::string>>& effects,
-    std::optional<bool> normalize,
-    std::optional<bool> channels_first,
-    const std::optional<std::string>& format)
-    -> std::tuple<torch::Tensor, int64_t>;
-
-} // namespace torchaudio::sox
-
-#endif
diff --git a/src/libtorchaudio/sox/effects_chain.cpp b/src/libtorchaudio/sox/effects_chain.cpp
deleted file mode 100644
index 7f6109a343..0000000000
--- a/src/libtorchaudio/sox/effects_chain.cpp
+++ /dev/null
@@ -1,301 +0,0 @@
-#include <libtorchaudio/sox/effects_chain.h>
-#include <libtorchaudio/sox/utils.h>
-#include "c10/util/Exception.h"
-
-using namespace torch::indexing;
-
-namespace torchaudio::sox {
-
-namespace {
-
-/// helper classes for passing the location of input tensor and output buffer
-///
-/// drain/flow callback functions require plaing C style function signature and
-/// the way to pass extra data is to attach data to sox_effect_t::priv pointer.
-/// The following structs will be assigned to sox_effect_t::priv pointer which
-/// gives sox_effect_t an access to input Tensor and output buffer object.
-struct TensorInputPriv {
-  size_t index;
-  torch::Tensor* waveform;
-  int64_t sample_rate;
-  bool channels_first;
-};
-struct TensorOutputPriv {
-  std::vector<sox_sample_t>* buffer;
-};
-struct FileOutputPriv {
-  sox_format_t* sf;
-};
-
-/// Callback function to feed Tensor data to SoxEffectChain.
-int tensor_input_drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp) {
-  // Retrieve the input Tensor and current index
-  auto priv = static_cast<TensorInputPriv*>(effp->priv);
-  auto index = priv->index;
-  auto tensor = *(priv->waveform);
-  auto num_channels = effp->out_signal.channels;
-
-  // Adjust the number of samples to read
-  const size_t num_samples = tensor.numel();
-  if (index + *osamp > num_samples) {
-    *osamp = num_samples - index;
-  }
-  // Ensure that it's a multiple of the number of channels
-  *osamp -= *osamp % num_channels;
-
-  // Slice the input Tensor
-  auto chunk = [&]() {
-    auto i_frame = index / num_channels;
-    auto num_frames = *osamp / num_channels;
-    auto t = (priv->channels_first)
-        ? tensor.index({Slice(), Slice(i_frame, i_frame + num_frames)}).t()
-        : tensor.index({Slice(i_frame, i_frame + num_frames), Slice()});
-    return t.reshape({-1});
-  }();
-
-  // Convert to sox_sample_t (int32_t)
-  switch (chunk.dtype().toScalarType()) {
-    case c10::ScalarType::Float: {
-      // Need to convert to 64-bit precision so that
-      // values around INT32_MIN/MAX are handled correctly.
-      chunk = chunk.to(c10::ScalarType::Double);
-      chunk *= 2147483648.;
-      chunk.clamp_(INT32_MIN, INT32_MAX);
-      chunk = chunk.to(c10::ScalarType::Int);
-      break;
-    }
-    case c10::ScalarType::Int: {
-      break;
-    }
-    case c10::ScalarType::Short: {
-      chunk = chunk.to(c10::ScalarType::Int);
-      chunk *= 65536;
-      break;
-    }
-    case c10::ScalarType::Byte: {
-      chunk = chunk.to(c10::ScalarType::Int);
-      chunk -= 128;
-      chunk *= 16777216;
-      break;
-    }
-    default:
-      TORCH_CHECK(false, "Unexpected dtype: ", chunk.dtype());
-  }
-  // Write to buffer
-  chunk = chunk.contiguous();
-  memcpy(obuf, chunk.data_ptr<int32_t>(), *osamp * 4);
-  priv->index += *osamp;
-  return (priv->index == num_samples) ? SOX_EOF : SOX_SUCCESS;
-}
-
-/// Callback function to fetch data from SoxEffectChain.
-int tensor_output_flow(
-    sox_effect_t* effp,
-    sox_sample_t const* ibuf,
-    sox_sample_t* obuf LSX_UNUSED,
-    size_t* isamp,
-    size_t* osamp) {
-  *osamp = 0;
-  // Get output buffer
-  auto out_buffer = static_cast<TensorOutputPriv*>(effp->priv)->buffer;
-  // Append at the end
-  out_buffer->insert(out_buffer->end(), ibuf, ibuf + *isamp);
-  return SOX_SUCCESS;
-}
-
-int file_output_flow(
-    sox_effect_t* effp,
-    sox_sample_t const* ibuf,
-    sox_sample_t* obuf LSX_UNUSED,
-    size_t* isamp,
-    size_t* osamp) {
-  *osamp = 0;
-  if (*isamp) {
-    auto sf = static_cast<FileOutputPriv*>(effp->priv)->sf;
-    if (sox_write(sf, ibuf, *isamp) != *isamp) {
-      TORCH_CHECK(
-          !sf->sox_errno,
-          sf->sox_errstr,
-          " ",
-          sox_strerror(sf->sox_errno),
-          " ",
-          sf->filename);
-      return SOX_EOF;
-    }
-  }
-  return SOX_SUCCESS;
-}
-
-sox_effect_handler_t* get_tensor_input_handler() {
-  static sox_effect_handler_t handler{
-      /*name=*/"input_tensor",
-      /*usage=*/nullptr,
-      /*flags=*/SOX_EFF_MCHAN,
-      /*getopts=*/nullptr,
-      /*start=*/nullptr,
-      /*flow=*/nullptr,
-      /*drain=*/tensor_input_drain,
-      /*stop=*/nullptr,
-      /*kill=*/nullptr,
-      /*priv_size=*/sizeof(TensorInputPriv)};
-  return &handler;
-}
-
-sox_effect_handler_t* get_tensor_output_handler() {
-  static sox_effect_handler_t handler{
-      /*name=*/"output_tensor",
-      /*usage=*/nullptr,
-      /*flags=*/SOX_EFF_MCHAN,
-      /*getopts=*/nullptr,
-      /*start=*/nullptr,
-      /*flow=*/tensor_output_flow,
-      /*drain=*/nullptr,
-      /*stop=*/nullptr,
-      /*kill=*/nullptr,
-      /*priv_size=*/sizeof(TensorOutputPriv)};
-  return &handler;
-}
-
-sox_effect_handler_t* get_file_output_handler() {
-  static sox_effect_handler_t handler{
-      /*name=*/"output_file",
-      /*usage=*/nullptr,
-      /*flags=*/SOX_EFF_MCHAN,
-      /*getopts=*/nullptr,
-      /*start=*/nullptr,
-      /*flow=*/file_output_flow,
-      /*drain=*/nullptr,
-      /*stop=*/nullptr,
-      /*kill=*/nullptr,
-      /*priv_size=*/sizeof(FileOutputPriv)};
-  return &handler;
-}
-
-} // namespace
-
-SoxEffect::SoxEffect(sox_effect_t* se) noexcept : se_(se) {}
-
-SoxEffect::~SoxEffect() {
-  if (se_ != nullptr) {
-    free(se_);
-  }
-}
-
-SoxEffect::operator sox_effect_t*() const {
-  return se_;
-}
-
-auto SoxEffect::operator->() noexcept -> sox_effect_t* {
-  return se_;
-}
-
-SoxEffectsChain::SoxEffectsChain(
-    sox_encodinginfo_t input_encoding,
-    sox_encodinginfo_t output_encoding)
-    : in_enc_(input_encoding),
-      out_enc_(output_encoding),
-      in_sig_(),
-      interm_sig_(),
-      out_sig_(),
-      sec_(sox_create_effects_chain(&in_enc_, &out_enc_)) {
-  TORCH_CHECK(sec_, "Failed to create effect chain.");
-}
-
-SoxEffectsChain::~SoxEffectsChain() {
-  if (sec_ != nullptr) {
-    sox_delete_effects_chain(sec_);
-  }
-}
-
-void SoxEffectsChain::run() {
-  sox_flow_effects(sec_, nullptr, nullptr);
-}
-
-void SoxEffectsChain::addInputTensor(
-    torch::Tensor* waveform,
-    int64_t sample_rate,
-    bool channels_first) {
-  in_sig_ = get_signalinfo(waveform, sample_rate, "wav", channels_first);
-  interm_sig_ = in_sig_;
-  SoxEffect e(sox_create_effect(get_tensor_input_handler()));
-  auto priv = static_cast<TensorInputPriv*>(e->priv);
-  priv->index = 0;
-  priv->waveform = waveform;
-  priv->sample_rate = sample_rate;
-  priv->channels_first = channels_first;
-  TORCH_CHECK(
-      sox_add_effect(sec_, e, &interm_sig_, &in_sig_) == SOX_SUCCESS,
-      "Internal Error: Failed to add effect: input_tensor");
-}
-
-void SoxEffectsChain::addOutputBuffer(
-    std::vector<sox_sample_t>* output_buffer) {
-  SoxEffect e(sox_create_effect(get_tensor_output_handler()));
-  static_cast<TensorOutputPriv*>(e->priv)->buffer = output_buffer;
-  TORCH_CHECK(
-      sox_add_effect(sec_, e, &interm_sig_, &in_sig_) == SOX_SUCCESS,
-      "Internal Error: Failed to add effect: output_tensor");
-}
-
-void SoxEffectsChain::addInputFile(sox_format_t* sf) {
-  in_sig_ = sf->signal;
-  interm_sig_ = in_sig_;
-  SoxEffect e(sox_create_effect(sox_find_effect("input")));
-  char* opts[] = {(char*)sf};
-  sox_effect_options(e, 1, opts);
-  TORCH_CHECK(
-      sox_add_effect(sec_, e, &interm_sig_, &in_sig_) == SOX_SUCCESS,
-      "Internal Error: Failed to add effect: input ",
-      sf->filename);
-}
-
-void SoxEffectsChain::addOutputFile(sox_format_t* sf) {
-  out_sig_ = sf->signal;
-  SoxEffect e(sox_create_effect(get_file_output_handler()));
-  static_cast<FileOutputPriv*>(e->priv)->sf = sf;
-  TORCH_CHECK(
-      sox_add_effect(sec_, e, &interm_sig_, &out_sig_) == SOX_SUCCESS,
-      "Internal Error: Failed to add effect: output ",
-      sf->filename);
-}
-
-void SoxEffectsChain::addEffect(const std::vector<std::string>& effect) {
-  const auto num_args = effect.size();
-  TORCH_CHECK(num_args != 0, "Invalid argument: empty effect.");
-  const auto name = effect[0];
-  TORCH_CHECK(
-      UNSUPPORTED_EFFECTS.find(name) == UNSUPPORTED_EFFECTS.end(),
-      "Unsupported effect: ",
-      name)
-
-  auto returned_effect = sox_find_effect(name.c_str());
-  TORCH_CHECK(returned_effect, "Unsupported effect: ", name)
-
-  SoxEffect e(sox_create_effect(returned_effect));
-  const auto num_options = num_args - 1;
-
-  std::vector<char*> opts;
-  for (size_t i = 1; i < num_args; ++i) {
-    opts.push_back((char*)effect[i].c_str());
-  }
-  TORCH_CHECK(
-      sox_effect_options(e, num_options, num_options ? opts.data() : nullptr) ==
-          SOX_SUCCESS,
-      "Invalid effect option: ",
-      c10::Join(" ", effect))
-  TORCH_CHECK(
-      sox_add_effect(sec_, e, &interm_sig_, &in_sig_) == SOX_SUCCESS,
-      "Internal Error: Failed to add effect: \"",
-      c10::Join(" ", effect),
-      "\"");
-}
-
-int64_t SoxEffectsChain::getOutputNumChannels() {
-  return interm_sig_.channels;
-}
-
-int64_t SoxEffectsChain::getOutputSampleRate() {
-  return interm_sig_.rate;
-}
-
-} // namespace torchaudio::sox
diff --git a/src/libtorchaudio/sox/effects_chain.h b/src/libtorchaudio/sox/effects_chain.h
deleted file mode 100644
index e6a892b5e8..0000000000
--- a/src/libtorchaudio/sox/effects_chain.h
+++ /dev/null
@@ -1,61 +0,0 @@
-#ifndef TORCHAUDIO_SOX_EFFECTS_CHAIN_H
-#define TORCHAUDIO_SOX_EFFECTS_CHAIN_H
-
-#include <libtorchaudio/sox/utils.h>
-#include <sox.h>
-
-namespace torchaudio::sox {
-
-// Helper struct to safely close sox_effect_t* pointer returned by
-// sox_create_effect
-
-struct SoxEffect {
-  explicit SoxEffect(sox_effect_t* se) noexcept;
-  SoxEffect(const SoxEffect& other) = delete;
-  SoxEffect(SoxEffect&& other) = delete;
-  auto operator=(const SoxEffect& other) -> SoxEffect& = delete;
-  auto operator=(SoxEffect&& other) -> SoxEffect& = delete;
-  ~SoxEffect();
-  operator sox_effect_t*() const;
-  auto operator->() noexcept -> sox_effect_t*;
-
- private:
-  sox_effect_t* se_;
-};
-
-// Helper struct to safely close sox_effects_chain_t with handy methods
-class SoxEffectsChain {
-  const sox_encodinginfo_t in_enc_;
-  const sox_encodinginfo_t out_enc_;
-
- protected:
-  sox_signalinfo_t in_sig_;
-  sox_signalinfo_t interm_sig_;
-  sox_signalinfo_t out_sig_;
-  sox_effects_chain_t* sec_;
-
- public:
-  explicit SoxEffectsChain(
-      sox_encodinginfo_t input_encoding,
-      sox_encodinginfo_t output_encoding);
-  SoxEffectsChain(const SoxEffectsChain& other) = delete;
-  SoxEffectsChain(SoxEffectsChain&& other) = delete;
-  SoxEffectsChain& operator=(const SoxEffectsChain& other) = delete;
-  SoxEffectsChain& operator=(SoxEffectsChain&& other) = delete;
-  ~SoxEffectsChain();
-  void run();
-  void addInputTensor(
-      torch::Tensor* waveform,
-      int64_t sample_rate,
-      bool channels_first);
-  void addInputFile(sox_format_t* sf);
-  void addOutputBuffer(std::vector<sox_sample_t>* output_buffer);
-  void addOutputFile(sox_format_t* sf);
-  void addEffect(const std::vector<std::string>& effect);
-  int64_t getOutputNumChannels();
-  int64_t getOutputSampleRate();
-};
-
-} // namespace torchaudio::sox
-
-#endif
diff --git a/src/libtorchaudio/sox/io.cpp b/src/libtorchaudio/sox/io.cpp
deleted file mode 100644
index 474726ad1c..0000000000
--- a/src/libtorchaudio/sox/io.cpp
+++ /dev/null
@@ -1,128 +0,0 @@
-#include <libtorchaudio/sox/effects.h>
-#include <libtorchaudio/sox/effects_chain.h>
-#include <libtorchaudio/sox/io.h>
-#include <libtorchaudio/sox/types.h>
-#include <libtorchaudio/sox/utils.h>
-
-using namespace torch::indexing;
-
-namespace torchaudio::sox {
-
-std::tuple<int64_t, int64_t, int64_t, int64_t, std::string> get_info_file(
-    const std::string& path,
-    const std::optional<std::string>& format) {
-  SoxFormat sf(sox_open_read(
-      path.c_str(),
-      /*signal=*/nullptr,
-      /*encoding=*/nullptr,
-      /*filetype=*/format.has_value() ? format.value().c_str() : nullptr));
-
-  validate_input_file(sf, path);
-
-  return std::make_tuple(
-      static_cast<int64_t>(sf->signal.rate),
-      static_cast<int64_t>(sf->signal.length / sf->signal.channels),
-      static_cast<int64_t>(sf->signal.channels),
-      static_cast<int64_t>(sf->encoding.bits_per_sample),
-      get_encoding(sf->encoding.encoding));
-}
-
-std::vector<std::vector<std::string>> get_effects(
-    const std::optional<int64_t>& frame_offset,
-    const std::optional<int64_t>& num_frames) {
-  const auto offset = frame_offset.value_or(0);
-  TORCH_CHECK(
-      offset >= 0,
-      "Invalid argument: frame_offset must be non-negative. Found: ",
-      offset);
-  const auto frames = num_frames.value_or(-1);
-  TORCH_CHECK(
-      frames > 0 || frames == -1,
-      "Invalid argument: num_frames must be -1 or greater than 0.");
-
-  std::vector<std::vector<std::string>> effects;
-  if (frames != -1) {
-    std::ostringstream os_offset, os_frames;
-    os_offset << offset << "s";
-    os_frames << "+" << frames << "s";
-    effects.emplace_back(
-        std::vector<std::string>{"trim", os_offset.str(), os_frames.str()});
-  } else if (offset != 0) {
-    std::ostringstream os_offset;
-    os_offset << offset << "s";
-    effects.emplace_back(std::vector<std::string>{"trim", os_offset.str()});
-  }
-  return effects;
-}
-
-std::tuple<torch::Tensor, int64_t> load_audio_file(
-    const std::string& path,
-    const std::optional<int64_t>& frame_offset,
-    const std::optional<int64_t>& num_frames,
-    std::optional<bool> normalize,
-    std::optional<bool> channels_first,
-    const std::optional<std::string>& format) {
-  auto effects = get_effects(frame_offset, num_frames);
-  return apply_effects_file(path, effects, normalize, channels_first, format);
-}
-
-void save_audio_file(
-    const std::string& path,
-    torch::Tensor tensor,
-    int64_t sample_rate,
-    bool channels_first,
-    std::optional<double> compression,
-    std::optional<std::string> format,
-    std::optional<std::string> encoding,
-    std::optional<int64_t> bits_per_sample) {
-  validate_input_tensor(tensor);
-
-  const auto filetype = [&]() {
-    if (format.has_value()) {
-      return format.value();
-    }
-    return get_filetype(path);
-  }();
-
-  if (filetype == "amr-nb") {
-    const auto num_channels = tensor.size(channels_first ? 0 : 1);
-    TORCH_CHECK(
-        num_channels == 1, "amr-nb format only supports single channel audio.");
-  } else if (filetype == "htk") {
-    const auto num_channels = tensor.size(channels_first ? 0 : 1);
-    TORCH_CHECK(
-        num_channels == 1, "htk format only supports single channel audio.");
-  } else if (filetype == "gsm") {
-    const auto num_channels = tensor.size(channels_first ? 0 : 1);
-    TORCH_CHECK(
-        num_channels == 1, "gsm format only supports single channel audio.");
-    TORCH_CHECK(
-        sample_rate == 8000,
-        "gsm format only supports a sampling rate of 8kHz.");
-  }
-  const auto signal_info =
-      get_signalinfo(&tensor, sample_rate, filetype, channels_first);
-  const auto encoding_info = get_encodinginfo_for_save(
-      filetype, tensor.dtype(), compression, encoding, bits_per_sample);
-
-  SoxFormat sf(sox_open_write(
-      path.c_str(),
-      &signal_info,
-      &encoding_info,
-      /*filetype=*/filetype.c_str(),
-      /*oob=*/nullptr,
-      /*overwrite_permitted=*/nullptr));
-
-  TORCH_CHECK(
-      static_cast<sox_format_t*>(sf) != nullptr,
-      "Error saving audio file: failed to open file ",
-      path);
-
-  SoxEffectsChain chain(
-      /*input_encoding=*/get_tensor_encodinginfo(tensor.dtype()),
-      /*output_encoding=*/sf->encoding);
-  chain.addInputTensor(&tensor, sample_rate, channels_first);
-  chain.addOutputFile(sf);
-  chain.run();
-}
-} // namespace torchaudio::sox
diff --git a/src/libtorchaudio/sox/io.h b/src/libtorchaudio/sox/io.h
deleted file mode 100644
index b011ef59be..0000000000
--- a/src/libtorchaudio/sox/io.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#ifndef TORCHAUDIO_SOX_IO_H
-#define TORCHAUDIO_SOX_IO_H
-
-#include <libtorchaudio/sox/utils.h>
-#include <torch/script.h>
-
-namespace torchaudio::sox {
-
-auto get_effects(
-    const std::optional<int64_t>& frame_offset,
-    const std::optional<int64_t>& num_frames)
-    -> std::vector<std::vector<std::string>>;
-
-std::tuple<int64_t, int64_t, int64_t, int64_t, std::string> get_info_file(
-    const std::string& path,
-    const std::optional<std::string>& format);
-
-std::tuple<torch::Tensor, int64_t> load_audio_file(
-    const std::string& path,
-    const std::optional<int64_t>& frame_offset,
-    const std::optional<int64_t>& num_frames,
-    std::optional<bool> normalize,
-    std::optional<bool> channels_first,
-    const std::optional<std::string>& format);
-
-void save_audio_file(
-    const std::string& path,
-    torch::Tensor tensor,
-    int64_t sample_rate,
-    bool channels_first,
-    std::optional<double> compression,
-    std::optional<std::string> format,
-    std::optional<std::string> encoding,
-    std::optional<int64_t> bits_per_sample);
-
-} // namespace torchaudio::sox
-
-#endif
diff --git a/src/libtorchaudio/sox/pybind/pybind.cpp b/src/libtorchaudio/sox/pybind/pybind.cpp
deleted file mode 100644
index bd9c82c349..0000000000
--- a/src/libtorchaudio/sox/pybind/pybind.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-#include <libtorchaudio/sox/effects.h>
-#include <libtorchaudio/sox/io.h>
-#include <libtorchaudio/sox/utils.h>
-#include <torch/extension.h>
-
-namespace torchaudio {
-namespace sox {
-namespace {
-
-TORCH_LIBRARY(torchaudio_sox, m) {
-  m.def("torchaudio_sox::get_info", &get_info_file);
-  m.def("torchaudio_sox::load_audio_file", &load_audio_file);
-  m.def("torchaudio_sox::save_audio_file", &save_audio_file);
-  m.def("torchaudio_sox::initialize_sox_effects", &initialize_sox_effects);
-  m.def("torchaudio_sox::shutdown_sox_effects", &shutdown_sox_effects);
-  m.def("torchaudio_sox::apply_effects_tensor", &apply_effects_tensor);
-  m.def("torchaudio_sox::apply_effects_file", &apply_effects_file);
-}
-
-PYBIND11_MODULE(_torchaudio_sox, m) {
-  m.def("set_seed", &set_seed, "Set random seed.");
-  m.def("set_verbosity", &set_verbosity, "Set verbosity.");
-  m.def("set_use_threads", &set_use_threads, "Set threading.");
-  m.def("set_buffer_size", &set_buffer_size, "Set buffer size.");
-  m.def("get_buffer_size", &get_buffer_size, "Get buffer size.");
-  m.def("list_effects", &list_effects, "List available effects.");
-  m.def(
-      "list_read_formats",
-      &list_read_formats,
-      "List supported formats for decoding.");
-  m.def(
-      "list_write_formats",
-      &list_write_formats,
-      "List supported formats for encoding.");
-}
-
-} // namespace
-} // namespace sox
-} // namespace torchaudio
diff --git a/src/libtorchaudio/sox/types.cpp b/src/libtorchaudio/sox/types.cpp
deleted file mode 100644
index 12bd070105..0000000000
--- a/src/libtorchaudio/sox/types.cpp
+++ /dev/null
@@ -1,148 +0,0 @@
-#include <libtorchaudio/sox/types.h>
-
-namespace torchaudio::sox {
-
-Format get_format_from_string(const std::string& format) {
-  if (format == "wav") {
-    return Format::WAV;
-  }
-  if (format == "mp3") {
-    return Format::MP3;
-  }
-  if (format == "flac") {
-    return Format::FLAC;
-  }
-  if (format == "ogg" || format == "vorbis") {
-    return Format::VORBIS;
-  }
-  if (format == "amr-nb") {
-    return Format::AMR_NB;
-  }
-  if (format == "amr-wb") {
-    return Format::AMR_WB;
-  }
-  if (format == "amb") {
-    return Format::AMB;
-  }
-  if (format == "sph") {
-    return Format::SPHERE;
-  }
-  if (format == "htk") {
-    return Format::HTK;
-  }
-  if (format == "gsm") {
-    return Format::GSM;
-  }
-  TORCH_CHECK(false, "Internal Error: unexpected format value: ", format);
-}
-
-std::string to_string(Encoding v) {
-  switch (v) {
-    case Encoding::UNKNOWN:
-      return "UNKNOWN";
-    case Encoding::PCM_SIGNED:
-      return "PCM_S";
-    case Encoding::PCM_UNSIGNED:
-      return "PCM_U";
-    case Encoding::PCM_FLOAT:
-      return "PCM_F";
-    case Encoding::FLAC:
-      return "FLAC";
-    case Encoding::ULAW:
-      return "ULAW";
-    case Encoding::ALAW:
-      return "ALAW";
-    case Encoding::MP3:
-      return "MP3";
-    case Encoding::VORBIS:
-      return "VORBIS";
-    case Encoding::AMR_WB:
-      return "AMR_WB";
-    case Encoding::AMR_NB:
-      return "AMR_NB";
-    case Encoding::OPUS:
-      return "OPUS";
-    default:
-      TORCH_CHECK(false, "Internal Error: unexpected encoding.");
-  }
-}
-
-Encoding get_encoding_from_option(const std::optional<std::string>& encoding) {
-  if (!encoding.has_value()) {
-    return Encoding::NOT_PROVIDED;
-  }
-  std::string v = encoding.value();
-  if (v == "PCM_S") {
-    return Encoding::PCM_SIGNED;
-  }
-  if (v == "PCM_U") {
-    return Encoding::PCM_UNSIGNED;
-  }
-  if (v == "PCM_F") {
-    return Encoding::PCM_FLOAT;
-  }
-  if (v == "ULAW") {
-    return Encoding::ULAW;
-  }
-  if (v == "ALAW") {
-    return Encoding::ALAW;
-  }
-  TORCH_CHECK(false, "Internal Error: unexpected encoding value: ", v);
-}
-
-BitDepth get_bit_depth_from_option(const std::optional<int64_t>& bit_depth) {
-  if (!bit_depth.has_value()) {
-    return BitDepth::NOT_PROVIDED;
-  }
-  int64_t v = bit_depth.value();
-  switch (v) {
-    case 8:
-      return BitDepth::B8;
-    case 16:
-      return BitDepth::B16;
-    case 24:
-      return BitDepth::B24;
-    case 32:
-      return BitDepth::B32;
-    case 64:
-      return BitDepth::B64;
-    default: {
-      TORCH_CHECK(false, "Internal Error: unexpected bit depth value: ", v);
-    }
-  }
-}
-
-std::string get_encoding(sox_encoding_t encoding) {
-  switch (encoding) {
-    case SOX_ENCODING_UNKNOWN:
-      return "UNKNOWN";
-    case SOX_ENCODING_SIGN2:
-      return "PCM_S";
-    case SOX_ENCODING_UNSIGNED:
-      return "PCM_U";
-    case SOX_ENCODING_FLOAT:
-      return "PCM_F";
-    case SOX_ENCODING_FLAC:
-      return "FLAC";
-    case SOX_ENCODING_ULAW:
-      return "ULAW";
-    case SOX_ENCODING_ALAW:
-      return "ALAW";
-    case SOX_ENCODING_MP3:
-      return "MP3";
-    case SOX_ENCODING_VORBIS:
-      return "VORBIS";
-    case SOX_ENCODING_AMR_WB:
-      return "AMR_WB";
-    case SOX_ENCODING_AMR_NB:
-      return "AMR_NB";
-    case SOX_ENCODING_OPUS:
-      return "OPUS";
-    case SOX_ENCODING_GSM:
-      return "GSM";
-    default:
-      return "UNKNOWN";
-  }
-}
-
-} // namespace torchaudio::sox
diff --git a/src/libtorchaudio/sox/types.h b/src/libtorchaudio/sox/types.h
deleted file mode 100644
index 714d303313..0000000000
--- a/src/libtorchaudio/sox/types.h
+++ /dev/null
@@ -1,58 +0,0 @@
-#ifndef TORCHAUDIO_SOX_TYPES_H
-#define TORCHAUDIO_SOX_TYPES_H
-
-#include <sox.h>
-#include <torch/script.h>
-
-namespace torchaudio::sox {
-
-enum class Format {
-  WAV,
-  MP3,
-  FLAC,
-  VORBIS,
-  AMR_NB,
-  AMR_WB,
-  AMB,
-  SPHERE,
-  GSM,
-  HTK,
-};
-
-Format get_format_from_string(const std::string& format);
-
-enum class Encoding {
-  NOT_PROVIDED,
-  UNKNOWN,
-  PCM_SIGNED,
-  PCM_UNSIGNED,
-  PCM_FLOAT,
-  FLAC,
-  ULAW,
-  ALAW,
-  MP3,
-  VORBIS,
-  AMR_WB,
-  AMR_NB,
-  OPUS,
-};
-
-std::string to_string(Encoding v);
-Encoding get_encoding_from_option(const std::optional<std::string>& encoding);
-
-enum class BitDepth : unsigned {
-  NOT_PROVIDED = 0,
-  B8 = 8,
-  B16 = 16,
-  B24 = 24,
-  B32 = 32,
-  B64 = 64,
-};
-
-BitDepth get_bit_depth_from_option(const std::optional<int64_t>& bit_depth);
-
-std::string get_encoding(sox_encoding_t encoding);
-
-} // namespace torchaudio::sox
-
-#endif
diff --git a/src/libtorchaudio/sox/utils.cpp b/src/libtorchaudio/sox/utils.cpp
deleted file mode 100644
index 94748c5209..0000000000
--- a/src/libtorchaudio/sox/utils.cpp
+++ /dev/null
@@ -1,509 +0,0 @@
-#include <c10/core/ScalarType.h>
-#include <libtorchaudio/sox/types.h>
-#include <libtorchaudio/sox/utils.h>
-#include <sox.h>
-
-namespace torchaudio::sox {
-
-const std::unordered_set<std::string> UNSUPPORTED_EFFECTS{
-    "input",
-    "output",
-    "spectrogram",
-    "noiseprof",
-    "noisered",
-    "splice"};
-
-void set_seed(const int64_t seed) {
-  sox_get_globals()->ranqd1 = static_cast<sox_int32_t>(seed);
-}
-
-void set_verbosity(const int64_t verbosity) {
-  sox_get_globals()->verbosity = static_cast<unsigned>(verbosity);
-}
-
-void set_use_threads(const bool use_threads) {
-  sox_get_globals()->use_threads = static_cast<sox_bool>(use_threads);
-}
-
-void set_buffer_size(const int64_t buffer_size) {
-  sox_get_globals()->bufsiz = static_cast<size_t>(buffer_size);
-}
-
-int64_t get_buffer_size() {
-  return sox_get_globals()->bufsiz;
-}
-
-std::vector<std::vector<std::string>> list_effects() {
-  std::vector<std::vector<std::string>> effects;
-  for (const sox_effect_fn_t* fns = sox_get_effect_fns(); *fns; ++fns) {
-    const sox_effect_handler_t* handler = (*fns)();
-    if (handler && handler->name) {
-      if (UNSUPPORTED_EFFECTS.find(handler->name) ==
-          UNSUPPORTED_EFFECTS.end()) {
-        effects.emplace_back(std::vector<std::string>{
-            handler->name,
-            handler->usage ? std::string(handler->usage) : std::string("")});
-      }
-    }
-  }
-  return effects;
-}
-
-std::vector<std::string> list_write_formats() {
-  std::vector<std::string> formats;
-  for (const sox_format_tab_t* fns = sox_get_format_fns(); fns->fn; ++fns) {
-    const sox_format_handler_t* handler = fns->fn();
-    for (const char* const* names = handler->names; *names; ++names) {
-      if (!strchr(*names, '/') && handler->write) {
-        formats.emplace_back(*names);
-      }
-    }
-  }
-  return formats;
-}
-
-std::vector<std::string> list_read_formats() {
-  std::vector<std::string> formats;
-  for (const sox_format_tab_t* fns = sox_get_format_fns(); fns->fn; ++fns) {
-    const sox_format_handler_t* handler = fns->fn();
-    for (const char* const* names = handler->names; *names; ++names) {
-      if (!strchr(*names, '/') && handler->read) {
-        formats.emplace_back(*names);
-      }
-    }
-  }
-  return formats;
-}
-
-SoxFormat::SoxFormat(sox_format_t* fd) noexcept : fd_(fd) {}
-SoxFormat::~SoxFormat() {
-  close();
-}
-
-sox_format_t* SoxFormat::operator->() const noexcept {
-  return fd_;
-}
-SoxFormat::operator sox_format_t*() const noexcept {
-  return fd_;
-}
-
-void SoxFormat::close() {
-  if (fd_ != nullptr) {
-    sox_close(fd_);
-    fd_ = nullptr;
-  }
-}
-
-void validate_input_file(const SoxFormat& sf, const std::string& path) {
-  TORCH_CHECK(
-      static_cast<sox_format_t*>(sf) != nullptr,
-      "Error loading audio file: failed to open file " + path);
-  TORCH_CHECK(
-      sf->encoding.encoding != SOX_ENCODING_UNKNOWN,
-      "Error loading audio file: unknown encoding.");
-}
-
-void validate_input_tensor(const torch::Tensor& tensor) {
-  TORCH_CHECK(tensor.device().is_cpu(), "Input tensor has to be on CPU.");
-
-  TORCH_CHECK(tensor.ndimension() == 2, "Input tensor has to be 2D.");
-
-  switch (tensor.dtype().toScalarType()) {
-    case c10::ScalarType::Byte:
-    case c10::ScalarType::Short:
-    case c10::ScalarType::Int:
-    case c10::ScalarType::Float:
-      break;
-    default:
-      TORCH_CHECK(
-          false,
-          "Input tensor has to be one of float32, int32, int16 or uint8 type.");
-  }
-}
-
-caffe2::TypeMeta get_dtype(
-    const sox_encoding_t encoding,
-    const unsigned precision) {
-  const auto dtype = [&]() {
-    switch (encoding) {
-      case SOX_ENCODING_UNSIGNED: // 8-bit PCM WAV
-        return torch::kUInt8;
-      case SOX_ENCODING_SIGN2: // 16-bit, 24-bit, or 32-bit PCM WAV
-        switch (precision) {
-          case 16:
-            return torch::kInt16;
-          case 24: // Cast 24-bit to 32-bit.
-          case 32:
-            return torch::kInt32;
-          default:
-            TORCH_CHECK(
-                false,
-                "Only 16, 24, and 32 bits are supported for signed PCM.");
-        }
-      default:
-        // default to float32 for the other formats, including
-        // 32-bit flaoting-point WAV,
-        // MP3,
-        // FLAC,
-        // VORBIS etc...
-        return torch::kFloat32;
-    }
-  }();
-  return c10::scalarTypeToTypeMeta(dtype);
-}
-
-torch::Tensor convert_to_tensor(
-    sox_sample_t* buffer,
-    const int32_t num_samples,
-    const int32_t num_channels,
-    const caffe2::TypeMeta dtype,
-    const bool normalize,
-    const bool channels_first) {
-  torch::Tensor t;
-  uint64_t dummy = 0;
-  SOX_SAMPLE_LOCALS;
-  if (normalize || dtype == torch::kFloat32) {
-    t = torch::empty(
-        {num_samples / num_channels, num_channels}, torch::kFloat32);
-    auto ptr = t.data_ptr<float_t>();
-    for (int32_t i = 0; i < num_samples; ++i) {
-      ptr[i] = SOX_SAMPLE_TO_FLOAT_32BIT(buffer[i], dummy);
-    }
-  } else if (dtype == torch::kInt32) {
-    t = torch::from_blob(
-            buffer, {num_samples / num_channels, num_channels}, torch::kInt32)
-            .clone();
-  } else if (dtype == torch::kInt16) {
-    t = torch::empty({num_samples / num_channels, num_channels}, torch::kInt16);
-    auto ptr = t.data_ptr<int16_t>();
-    for (int32_t i = 0; i < num_samples; ++i) {
-      ptr[i] = SOX_SAMPLE_TO_SIGNED_16BIT(buffer[i], dummy);
-    }
-  } else if (dtype == torch::kUInt8) {
-    t = torch::empty({num_samples / num_channels, num_channels}, torch::kUInt8);
-    auto ptr = t.data_ptr<uint8_t>();
-    for (int32_t i = 0; i < num_samples; ++i) {
-      ptr[i] = SOX_SAMPLE_TO_UNSIGNED_8BIT(buffer[i], dummy);
-    }
-  } else {
-    TORCH_CHECK(false, "Unsupported dtype: ", dtype);
-  }
-  if (channels_first) {
-    t = t.transpose(1, 0);
-  }
-  return t.contiguous();
-}
-
-const std::string get_filetype(const std::string& path) {
-  std::string ext = path.substr(path.find_last_of('.') + 1);
-  std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
-  return ext;
-}
-
-namespace {
-
-std::tuple<sox_encoding_t, unsigned> get_save_encoding_for_wav(
-    const std::string& format,
-    caffe2::TypeMeta dtype,
-    const Encoding& encoding,
-    const BitDepth& bits_per_sample) {
-  switch (encoding) {
-    case Encoding::NOT_PROVIDED:
-      switch (bits_per_sample) {
-        case BitDepth::NOT_PROVIDED:
-          switch (dtype.toScalarType()) {
-            case c10::ScalarType::Float:
-              return std::make_tuple<>(SOX_ENCODING_FLOAT, 32);
-            case c10::ScalarType::Int:
-              return std::make_tuple<>(SOX_ENCODING_SIGN2, 32);
-            case c10::ScalarType::Short:
-              return std::make_tuple<>(SOX_ENCODING_SIGN2, 16);
-            case c10::ScalarType::Byte:
-              return std::make_tuple<>(SOX_ENCODING_UNSIGNED, 8);
-            default:
-              TORCH_CHECK(false, "Internal Error: Unexpected dtype: ", dtype);
-          }
-        case BitDepth::B8:
-          return std::make_tuple<>(SOX_ENCODING_UNSIGNED, 8);
-        default:
-          return std::make_tuple<>(
-              SOX_ENCODING_SIGN2, static_cast<unsigned>(bits_per_sample));
-      }
-    case Encoding::PCM_SIGNED:
-      switch (bits_per_sample) {
-        case BitDepth::NOT_PROVIDED:
-          return std::make_tuple<>(SOX_ENCODING_SIGN2, 16);
-        case BitDepth::B8:
-          TORCH_CHECK(
-              false, format, " does not support 8-bit signed PCM encoding.");
-        default:
-          return std::make_tuple<>(
-              SOX_ENCODING_SIGN2, static_cast<unsigned>(bits_per_sample));
-      }
-    case Encoding::PCM_UNSIGNED:
-      switch (bits_per_sample) {
-        case BitDepth::NOT_PROVIDED:
-        case BitDepth::B8:
-          return std::make_tuple<>(SOX_ENCODING_UNSIGNED, 8);
-        default:
-          TORCH_CHECK(
-              false, format, " only supports 8-bit for unsigned PCM encoding.");
-      }
-    case Encoding::PCM_FLOAT:
-      switch (bits_per_sample) {
-        case BitDepth::NOT_PROVIDED:
-        case BitDepth::B32:
-          return std::make_tuple<>(SOX_ENCODING_FLOAT, 32);
-        case BitDepth::B64:
-          return std::make_tuple<>(SOX_ENCODING_FLOAT, 64);
-        default:
-          TORCH_CHECK(
-              false,
-              format,
-              " only supports 32-bit or 64-bit for floating-point PCM encoding.");
-      }
-    case Encoding::ULAW:
-      switch (bits_per_sample) {
-        case BitDepth::NOT_PROVIDED:
-        case BitDepth::B8:
-          return std::make_tuple<>(SOX_ENCODING_ULAW, 8);
-        default:
-          TORCH_CHECK(
-              false, format, " only supports 8-bit for mu-law encoding.");
-      }
-    case Encoding::ALAW:
-      switch (bits_per_sample) {
-        case BitDepth::NOT_PROVIDED:
-        case BitDepth::B8:
-          return std::make_tuple<>(SOX_ENCODING_ALAW, 8);
-        default:
-          TORCH_CHECK(
-              false, format, " only supports 8-bit for a-law encoding.");
-      }
-    default:
-      TORCH_CHECK(
-          false, format, " does not support encoding: " + to_string(encoding));
-  }
-}
-
-std::tuple<sox_encoding_t, unsigned> get_save_encoding(
-    const std::string& format,
-    const caffe2::TypeMeta& dtype,
-    const std::optional<std::string>& encoding,
-    const std::optional<int64_t>& bits_per_sample) {
-  const Format fmt = get_format_from_string(format);
-  const Encoding enc = get_encoding_from_option(encoding);
-  const BitDepth bps = get_bit_depth_from_option(bits_per_sample);
-
-  switch (fmt) {
-    case Format::WAV:
-    case Format::AMB:
-      return get_save_encoding_for_wav(format, dtype, enc, bps);
-    case Format::MP3:
-      TORCH_CHECK(
-          enc == Encoding::NOT_PROVIDED,
-          "mp3 does not support `encoding` option.");
-      TORCH_CHECK(
-          bps == BitDepth::NOT_PROVIDED,
-          "mp3 does not support `bits_per_sample` option.");
-      return std::make_tuple<>(SOX_ENCODING_MP3, 16);
-    case Format::HTK:
-      TORCH_CHECK(
-          enc == Encoding::NOT_PROVIDED,
-          "htk does not support `encoding` option.");
-      TORCH_CHECK(
-          bps == BitDepth::NOT_PROVIDED,
-          "htk does not support `bits_per_sample` option.");
-      return std::make_tuple<>(SOX_ENCODING_SIGN2, 16);
-    case Format::VORBIS:
-      TORCH_CHECK(
-          enc == Encoding::NOT_PROVIDED,
-          "vorbis does not support `encoding` option.");
-      TORCH_CHECK(
-          bps == BitDepth::NOT_PROVIDED,
-          "vorbis does not support `bits_per_sample` option.");
-      return std::make_tuple<>(SOX_ENCODING_VORBIS, 0);
-    case Format::AMR_NB:
-      TORCH_CHECK(
-          enc == Encoding::NOT_PROVIDED,
-          "amr-nb does not support `encoding` option.");
-      TORCH_CHECK(
-          bps == BitDepth::NOT_PROVIDED,
-          "amr-nb does not support `bits_per_sample` option.");
-      return std::make_tuple<>(SOX_ENCODING_AMR_NB, 16);
-    case Format::FLAC:
-      TORCH_CHECK(
-          enc == Encoding::NOT_PROVIDED,
-          "flac does not support `encoding` option.");
-      switch (bps) {
-        case BitDepth::B32:
-        case BitDepth::B64:
-          TORCH_CHECK(
-              false, "flac does not support `bits_per_sample` larger than 24.");
-        default:
-          return std::make_tuple<>(
-              SOX_ENCODING_FLAC, static_cast<unsigned>(bps));
-      }
-    case Format::SPHERE:
-      switch (enc) {
-        case Encoding::NOT_PROVIDED:
-        case Encoding::PCM_SIGNED:
-          switch (bps) {
-            case BitDepth::NOT_PROVIDED:
-              return std::make_tuple<>(SOX_ENCODING_SIGN2, 32);
-            default:
-              return std::make_tuple<>(
-                  SOX_ENCODING_SIGN2, static_cast<unsigned>(bps));
-          }
-        case Encoding::PCM_UNSIGNED:
-          TORCH_CHECK(false, "sph does not support unsigned integer PCM.");
-        case Encoding::PCM_FLOAT:
-          TORCH_CHECK(false, "sph does not support floating point PCM.");
-        case Encoding::ULAW:
-          switch (bps) {
-            case BitDepth::NOT_PROVIDED:
-            case BitDepth::B8:
-              return std::make_tuple<>(SOX_ENCODING_ULAW, 8);
-            default:
-              TORCH_CHECK(
-                  false, "sph only supports 8-bit for mu-law encoding.");
-          }
-        case Encoding::ALAW:
-          switch (bps) {
-            case BitDepth::NOT_PROVIDED:
-            case BitDepth::B8:
-              return std::make_tuple<>(SOX_ENCODING_ALAW, 8);
-            default:
-              return std::make_tuple<>(
-                  SOX_ENCODING_ALAW, static_cast<unsigned>(bps));
-          }
-        default:
-          TORCH_CHECK(
-              false, "sph does not support encoding: ", encoding.value());
-      }
-    case Format::GSM:
-      TORCH_CHECK(
-          enc == Encoding::NOT_PROVIDED,
-          "gsm does not support `encoding` option.");
-      TORCH_CHECK(
-          bps == BitDepth::NOT_PROVIDED,
-          "gsm does not support `bits_per_sample` option.");
-      return std::make_tuple<>(SOX_ENCODING_GSM, 16);
-
-    default:
-      TORCH_CHECK(false, "Unsupported format: " + format);
-  }
-}
-
-unsigned get_precision(const std::string& filetype, caffe2::TypeMeta dtype) {
-  if (filetype == "mp3") {
-    return SOX_UNSPEC;
-  }
-  if (filetype == "flac") {
-    return 24;
-  }
-  if (filetype == "ogg" || filetype == "vorbis") {
-    return SOX_UNSPEC;
-  }
-  if (filetype == "wav" || filetype == "amb") {
-    switch (dtype.toScalarType()) {
-      case c10::ScalarType::Byte:
-        return 8;
-      case c10::ScalarType::Short:
-        return 16;
-      case c10::ScalarType::Int:
-        return 32;
-      case c10::ScalarType::Float:
-        return 32;
-      default:
-        TORCH_CHECK(false, "Unsupported dtype: ", dtype);
-    }
-  }
-  if (filetype == "sph") {
-    return 32;
-  }
-  if (filetype == "amr-nb") {
-    return 16;
-  }
-  if (filetype == "gsm") {
-    return 16;
-  }
-  if (filetype == "htk") {
-    return 16;
-  }
-  TORCH_CHECK(false, "Unsupported file type: ", filetype);
-}
-
-} // namespace
-
-sox_signalinfo_t get_signalinfo(
-    const torch::Tensor* waveform,
-    const int64_t sample_rate,
-    const std::string& filetype,
-    const bool channels_first) {
-  return sox_signalinfo_t{
-      /*rate=*/static_cast<sox_rate_t>(sample_rate),
-      /*channels=*/
-      static_cast<unsigned>(waveform->size(channels_first ? 0 : 1)),
-      /*precision=*/get_precision(filetype, waveform->dtype()),
-      /*length=*/static_cast<uint64_t>(waveform->numel()),
-      nullptr};
-}
-
-sox_encodinginfo_t get_tensor_encodinginfo(caffe2::TypeMeta dtype) {
-  sox_encoding_t encoding = [&]() {
-    switch (dtype.toScalarType()) {
-      case c10::ScalarType::Byte:
-        return SOX_ENCODING_UNSIGNED;
-      case c10::ScalarType::Short:
-        return SOX_ENCODING_SIGN2;
-      case c10::ScalarType::Int:
-        return SOX_ENCODING_SIGN2;
-      case c10::ScalarType::Float:
-        return SOX_ENCODING_FLOAT;
-      default:
-        TORCH_CHECK(false, "Unsupported dtype: ", dtype);
-    }
-  }();
-  unsigned bits_per_sample = [&]() {
-    switch (dtype.toScalarType()) {
-      case c10::ScalarType::Byte:
-        return 8;
-      case c10::ScalarType::Short:
-        return 16;
-      case c10::ScalarType::Int:
-        return 32;
-      case c10::ScalarType::Float:
-        return 32;
-      default:
-        TORCH_CHECK(false, "Unsupported dtype: ", dtype);
-    }
-  }();
-  return sox_encodinginfo_t{
-      /*encoding=*/encoding,
-      /*bits_per_sample=*/bits_per_sample,
-      /*compression=*/HUGE_VAL,
-      /*reverse_bytes=*/sox_option_default,
-      /*reverse_nibbles=*/sox_option_default,
-      /*reverse_bits=*/sox_option_default,
-      /*opposite_endian=*/sox_false};
-}
-
-sox_encodinginfo_t get_encodinginfo_for_save(
-    const std::string& format,
-    const caffe2::TypeMeta& dtype,
-    const std::optional<double>& compression,
-    const std::optional<std::string>& encoding,
-    const std::optional<int64_t>& bits_per_sample) {
-  auto enc = get_save_encoding(format, dtype, encoding, bits_per_sample);
-  return sox_encodinginfo_t{
-      /*encoding=*/std::get<0>(enc),
-      /*bits_per_sample=*/std::get<1>(enc),
-      /*compression=*/compression.value_or(HUGE_VAL),
-      /*reverse_bytes=*/sox_option_default,
-      /*reverse_nibbles=*/sox_option_default,
-      /*reverse_bits=*/sox_option_default,
-      /*opposite_endian=*/sox_false};
-}
-
-} // namespace torchaudio::sox
diff --git a/src/libtorchaudio/sox/utils.h b/src/libtorchaudio/sox/utils.h
deleted file mode 100644
index b26e25f65e..0000000000
--- a/src/libtorchaudio/sox/utils.h
+++ /dev/null
@@ -1,112 +0,0 @@
-#ifndef TORCHAUDIO_SOX_UTILS_H
-#define TORCHAUDIO_SOX_UTILS_H
-
-#include <sox.h>
-#include <torch/types.h>
-
-namespace torchaudio::sox {
-
-////////////////////////////////////////////////////////////////////////////////
-// APIs for Python interaction
-////////////////////////////////////////////////////////////////////////////////
-
-/// Set sox global options
-void set_seed(const int64_t seed);
-
-void set_verbosity(const int64_t verbosity);
-
-void set_use_threads(const bool use_threads);
-
-void set_buffer_size(const int64_t buffer_size);
-
-int64_t get_buffer_size();
-
-std::vector<std::vector<std::string>> list_effects();
-
-std::vector<std::string> list_read_formats();
-
-std::vector<std::string> list_write_formats();
-
-////////////////////////////////////////////////////////////////////////////////
-// Utilities for sox_io / sox_effects implementations
-////////////////////////////////////////////////////////////////////////////////
-
-extern const std::unordered_set<std::string> UNSUPPORTED_EFFECTS;
-
-/// helper class to automatically close sox_format_t*
-struct SoxFormat {
-  explicit SoxFormat(sox_format_t* fd) noexcept;
-  SoxFormat(const SoxFormat& other) = delete;
-  SoxFormat(SoxFormat&& other) = delete;
-  SoxFormat& operator=(const SoxFormat& other) = delete;
-  SoxFormat& operator=(SoxFormat&& other) = delete;
-  ~SoxFormat();
-  sox_format_t* operator->() const noexcept;
-  operator sox_format_t*() const noexcept;
-
-  void close();
-
- private:
-  sox_format_t* fd_;
-};
-
-///
-/// Verify that input file is found, has known encoding, and not empty
-void validate_input_file(const SoxFormat& sf, const std::string& path);
-
-///
-/// Verify that input Tensor is 2D, CPU and either uin8, int16, int32 or float32
-void validate_input_tensor(const torch::Tensor&);
-
-///
-/// Get target dtype for the given encoding and precision.
-caffe2::TypeMeta get_dtype(
-    const sox_encoding_t encoding,
-    const unsigned precision);
-
-///
-/// Convert sox_sample_t buffer to uint8/int16/int32/float32 Tensor
-/// NOTE: This function might modify the values in the input buffer to
-/// reduce the number of memory copy.
-/// @param buffer Pointer to buffer that contains audio data.
-/// @param num_samples The number of samples to read.
-/// @param num_channels The number of channels. Used to reshape the resulting
-/// Tensor.
-/// @param dtype Target dtype. Determines the output dtype and value range in
-/// conjunction with normalization.
-/// @param noramlize Perform normalization. Only effective when dtype is not
-/// kFloat32. When effective, the output tensor is kFloat32 type and value range
-/// is [-1.0, 1.0]
-/// @param channels_first When True, output Tensor has shape of [num_channels,
-/// num_frames].
-torch::Tensor convert_to_tensor(
-    sox_sample_t* buffer,
-    const int32_t num_samples,
-    const int32_t num_channels,
-    const caffe2::TypeMeta dtype,
-    const bool normalize,
-    const bool channels_first);
-
-/// Extract extension from file path
-const std::string get_filetype(const std::string& path);
-
-/// Get sox_signalinfo_t for passing a torch::Tensor object.
-sox_signalinfo_t get_signalinfo(
-    const torch::Tensor* waveform,
-    const int64_t sample_rate,
-    const std::string& filetype,
-    const bool channels_first);
-
-/// Get sox_encodinginfo_t for Tensor I/O
-sox_encodinginfo_t get_tensor_encodinginfo(const caffe2::TypeMeta dtype);
-
-/// Get sox_encodinginfo_t for saving to file/file object
-sox_encodinginfo_t get_encodinginfo_for_save(
-    const std::string& format,
-    const caffe2::TypeMeta& dtype,
-    const std::optional<double>& compression,
-    const std::optional<std::string>& encoding,
-    const std::optional<int64_t>& bits_per_sample);
-
-} // namespace torchaudio::sox
-#endif
diff --git a/src/torchaudio/__init__.py b/src/torchaudio/__init__.py
index e533cafe9d..db3ca5a293 100644
--- a/src/torchaudio/__init__.py
+++ b/src/torchaudio/__init__.py
@@ -27,7 +27,6 @@
     kaldi_io,
     models,
     pipelines,
-    sox_effects,
     transforms,
     utils,
 )
@@ -56,7 +55,6 @@
     "pipelines",
     "kaldi_io",
     "utils",
-    "sox_effects",
     "transforms",
     "list_audio_backends",
     "get_audio_backend",
diff --git a/src/torchaudio/_backend/sox.py b/src/torchaudio/_backend/sox.py
deleted file mode 100644
index f26ce83ca0..0000000000
--- a/src/torchaudio/_backend/sox.py
+++ /dev/null
@@ -1,91 +0,0 @@
-import os
-from typing import BinaryIO, Optional, Tuple, Union
-
-import torch
-import torchaudio
-
-from .backend import Backend
-from .common import AudioMetaData
-
-sox_ext = torchaudio._extension.lazy_import_sox_ext()
-
-
-class SoXBackend(Backend):
-    @staticmethod
-    def info(uri: Union[BinaryIO, str, os.PathLike], format: Optional[str], buffer_size: int = 4096) -> AudioMetaData:
-        if hasattr(uri, "read"):
-            raise ValueError(
-                "SoX backend does not support reading from file-like objects. ",
-                "Please use an alternative backend that does support reading from file-like objects, e.g. FFmpeg.",
-            )
-        else:
-            sinfo = sox_ext.get_info(uri, format)
-            if sinfo:
-                return AudioMetaData(*sinfo)
-            else:
-                raise RuntimeError(f"Failed to fetch metadata for {uri}.")
-
-    @staticmethod
-    def load(
-        uri: Union[BinaryIO, str, os.PathLike],
-        frame_offset: int = 0,
-        num_frames: int = -1,
-        normalize: bool = True,
-        channels_first: bool = True,
-        format: Optional[str] = None,
-        buffer_size: int = 4096,
-    ) -> Tuple[torch.Tensor, int]:
-        if hasattr(uri, "read"):
-            raise ValueError(
-                "SoX backend does not support loading from file-like objects. ",
-                "Please use an alternative backend that does support loading from file-like objects, e.g. FFmpeg.",
-            )
-        else:
-            ret = sox_ext.load_audio_file(str(uri), frame_offset, num_frames, normalize, channels_first, format)
-            if not ret:
-                raise RuntimeError(f"Failed to load audio from {uri}.")
-            return ret
-
-    @staticmethod
-    def save(
-        uri: Union[BinaryIO, str, os.PathLike],
-        src: torch.Tensor,
-        sample_rate: int,
-        channels_first: bool = True,
-        format: Optional[str] = None,
-        encoding: Optional[str] = None,
-        bits_per_sample: Optional[int] = None,
-        buffer_size: int = 4096,
-        compression: Optional[Union[torchaudio.io.CodecConfig, float, int]] = None,
-    ) -> None:
-        if not isinstance(compression, (float, int, type(None))):
-            raise ValueError(
-                "SoX backend expects non-`None` value for argument `compression` to be of ",
-                f"type `float` or `int`, but received value of type {type(compression)}",
-            )
-        if hasattr(uri, "write"):
-            raise ValueError(
-                "SoX backend does not support writing to file-like objects. ",
-                "Please use an alternative backend that does support writing to file-like objects, e.g. FFmpeg.",
-            )
-        else:
-            sox_ext.save_audio_file(
-                str(uri),
-                src,
-                sample_rate,
-                channels_first,
-                compression,
-                format,
-                encoding,
-                bits_per_sample,
-            )
-
-    @staticmethod
-    def can_decode(uri: Union[BinaryIO, str, os.PathLike], format: Optional[str]) -> bool:
-        # i.e. not a file-like object.
-        return not hasattr(uri, "read")
-
-    @staticmethod
-    def can_encode(uri: Union[BinaryIO, str, os.PathLike], format: Optional[str]) -> bool:
-        # i.e. not a file-like object.
-        return not hasattr(uri, "write")
diff --git a/src/torchaudio/_backend/utils.py b/src/torchaudio/_backend/utils.py
index eb7c51f0cb..4cc26ab7ae 100644
--- a/src/torchaudio/_backend/utils.py
+++ b/src/torchaudio/_backend/utils.py
@@ -5,7 +5,6 @@
 
 import torch
 
-from torchaudio._extension import lazy_import_sox_ext
 from torchaudio.io import CodecConfig
 from torio._extension import lazy_import_ffmpeg_ext
 
@@ -15,7 +14,6 @@
 from .common import AudioMetaData
 from .ffmpeg import FFmpegBackend
 from .soundfile import SoundfileBackend
-from .sox import SoXBackend
 
 
 @lru_cache(None)
@@ -23,8 +21,6 @@ def get_available_backends() -> Dict[str, Type[Backend]]:
     backend_specs: Dict[str, Type[Backend]] = {}
     if lazy_import_ffmpeg_ext().is_available():
         backend_specs["ffmpeg"] = FFmpegBackend
-    if lazy_import_sox_ext().is_available():
-        backend_specs["sox"] = SoXBackend
     if soundfile_backend._IS_SOUNDFILE_AVAILABLE:
         backend_specs["soundfile"] = SoundfileBackend
     return backend_specs
@@ -86,7 +82,7 @@ def info(
             backend (str or None, optional):
                 I/O backend to use.
                 If ``None``, function selects backend given input and available backends.
-                Otherwise, must be one of [``"ffmpeg"``, ``"sox"``, ``"soundfile"``],
+                Otherwise, must be one of [``"ffmpeg"``, ``"soundfile"``],
                 with the corresponding backend available.
                 (Default: ``None``)
 
diff --git a/src/torchaudio/_extension/__init__.py b/src/torchaudio/_extension/__init__.py
index 5c2ff55583..b7e19fa38c 100644
--- a/src/torchaudio/_extension/__init__.py
+++ b/src/torchaudio/_extension/__init__.py
@@ -4,7 +4,7 @@
 
 from torchaudio._internal.module_utils import fail_with_message, is_module_available, no_op
 
-from .utils import _check_cuda_version, _init_dll_path, _init_sox, _LazyImporter, _load_lib
+from .utils import _check_cuda_version, _init_dll_path, _LazyImporter, _load_lib
 
 _LG = logging.getLogger(__name__)
 
@@ -17,7 +17,6 @@
     "_check_cuda_version",
     "_IS_TORCHAUDIO_EXT_AVAILABLE",
     "_IS_RIR_AVAILABLE",
-    "lazy_import_sox_ext",
 ]
 
 
@@ -44,18 +43,6 @@
     _IS_ALIGN_AVAILABLE = torchaudio.lib._torchaudio.is_align_available()
 
 
-_SOX_EXT = None
-
-
-def lazy_import_sox_ext():
-    """Load SoX integration based on availability in lazy manner"""
-
-    global _SOX_EXT
-    if _SOX_EXT is None:
-        _SOX_EXT = _LazyImporter("_torchaudio_sox", _init_sox)
-    return _SOX_EXT
-
-
 fail_if_no_rir = (
     no_op
     if _IS_RIR_AVAILABLE
diff --git a/src/torchaudio/_extension/utils.py b/src/torchaudio/_extension/utils.py
index c5660a1e22..8820c68e47 100644
--- a/src/torchaudio/_extension/utils.py
+++ b/src/torchaudio/_extension/utils.py
@@ -60,52 +60,6 @@ def _load_lib(lib: str) -> bool:
     torch.ops.load_library(path)
     return True
 
-
-def _import_sox_ext():
-    if os.name == "nt":
-        raise RuntimeError("sox extension is not supported on Windows")
-    if not eval_env("TORCHAUDIO_USE_SOX", True):
-        raise RuntimeError("sox extension is disabled. (TORCHAUDIO_USE_SOX=0)")
-
-    ext = "torchaudio.lib._torchaudio_sox"
-
-    if not importlib.util.find_spec(ext):
-        raise RuntimeError(
-            # fmt: off
-            "TorchAudio is not built with sox extension. "
-            "Please build TorchAudio with libsox support. (BUILD_SOX=1)"
-            # fmt: on
-        )
-
-    _load_lib("libtorchaudio_sox")
-    return importlib.import_module(ext)
-
-
-def _init_sox():
-    ext = _import_sox_ext()
-    ext.set_verbosity(0)
-
-    import atexit
-
-    torch.ops.torchaudio_sox.initialize_sox_effects()
-    atexit.register(torch.ops.torchaudio_sox.shutdown_sox_effects)
-
-    # Bundle functions registered with TORCH_LIBRARY into extension
-    # so that they can also be accessed in the same (lazy) manner
-    # from the extension.
-    keys = [
-        "get_info",
-        "load_audio_file",
-        "save_audio_file",
-        "apply_effects_tensor",
-        "apply_effects_file",
-    ]
-    for key in keys:
-        setattr(ext, key, getattr(torch.ops.torchaudio_sox, key))
-
-    return ext
-
-
 class _LazyImporter(types.ModuleType):
     """Lazily import module/extension."""
 
diff --git a/src/torchaudio/backend/__init__.py b/src/torchaudio/backend/__init__.py
index 84df7e7d69..dc3f6a3668 100644
--- a/src/torchaudio/backend/__init__.py
+++ b/src/torchaudio/backend/__init__.py
@@ -3,6 +3,6 @@
 # New things should be added to `torchaudio._backend`.
 # Only things related to backward compatibility should be placed here.
 
-from . import common, no_backend, soundfile_backend, sox_io_backend  # noqa
+from . import common, no_backend, soundfile_backend  # noqa
 
 __all__ = []
diff --git a/src/torchaudio/backend/sox_io_backend.py b/src/torchaudio/backend/sox_io_backend.py
deleted file mode 100644
index 7e83b8fbf4..0000000000
--- a/src/torchaudio/backend/sox_io_backend.py
+++ /dev/null
@@ -1,14 +0,0 @@
-def __getattr__(name: str):
-    import warnings
-
-    warnings.warn(
-        "Torchaudio's I/O functions now support per-call backend dispatch. "
-        "Importing backend implementation directly is no longer guaranteed to work. "
-        "Please use `backend` keyword with load/save/info function, instead of "
-        "calling the underlying implementation directly.",
-        stacklevel=2,
-    )
-
-    from . import _sox_io_backend
-
-    return getattr(_sox_io_backend, name)
diff --git a/src/torchaudio/functional/__init__.py b/src/torchaudio/functional/__init__.py
index 1c3b86b5da..1227b932c8 100644
--- a/src/torchaudio/functional/__init__.py
+++ b/src/torchaudio/functional/__init__.py
@@ -32,7 +32,6 @@
     add_noise,
     amplitude_to_DB,
     apply_beamforming,
-    apply_codec,
     compute_deltas,
     convolve,
     create_dct,
@@ -111,7 +110,6 @@
     "riaa_biquad",
     "treble_biquad",
     "vad",
-    "apply_codec",
     "resample",
     "edit_distance",
     "pitch_shift",
diff --git a/src/torchaudio/functional/functional.py b/src/torchaudio/functional/functional.py
index 810d1f51fc..d1b2f630b2 100644
--- a/src/torchaudio/functional/functional.py
+++ b/src/torchaudio/functional/functional.py
@@ -34,7 +34,6 @@
     "mask_along_axis_iid",
     "sliding_window_cmn",
     "spectral_centroid",
-    "apply_codec",
     "resample",
     "edit_distance",
     "loudness",
@@ -1295,52 +1294,6 @@ def spectral_centroid(
     freq_dim = -2
     return (freqs * specgram).sum(dim=freq_dim) / specgram.sum(dim=freq_dim)
 
-
-@deprecated("Please migrate to :py:class:`torchaudio.io.AudioEffector`.", remove=False)
-def apply_codec(
-    waveform: Tensor,
-    sample_rate: int,
-    format: str,
-    channels_first: bool = True,
-    compression: Optional[float] = None,
-    encoding: Optional[str] = None,
-    bits_per_sample: Optional[int] = None,
-) -> Tensor:
-    r"""
-    Apply codecs as a form of augmentation.
-
-    .. devices:: CPU
-
-    Args:
-        waveform (Tensor): Audio data. Must be 2 dimensional. See also ```channels_first```.
-        sample_rate (int): Sample rate of the audio waveform.
-        format (str): File format.
-        channels_first (bool, optional):
-            When True, both the input and output Tensor have dimension `(channel, time)`.
-            Otherwise, they have dimension `(time, channel)`.
-        compression (float or None, optional): Used for formats other than WAV.
-            For more details see :py:func:`torchaudio.backend.sox_io_backend.save`.
-        encoding (str or None, optional): Changes the encoding for the supported formats.
-            For more details see :py:func:`torchaudio.backend.sox_io_backend.save`.
-        bits_per_sample (int or None, optional): Changes the bit depth for the supported formats.
-            For more details see :py:func:`torchaudio.backend.sox_io_backend.save`.
-
-    Returns:
-        Tensor: Resulting Tensor.
-        If ``channels_first=True``, it has `(channel, time)` else `(time, channel)`.
-    """
-    from torchaudio.backend import _sox_io_backend
-
-    with tempfile.NamedTemporaryFile() as f:
-        torchaudio.backend._sox_io_backend.save(
-            f.name, waveform, sample_rate, channels_first, compression, format, encoding, bits_per_sample
-        )
-        augmented, sr = _sox_io_backend.load(f.name, channels_first=channels_first, format=format)
-    if sr != sample_rate:
-        augmented = resample(augmented, sr, sample_rate)
-    return augmented
-
-
 _CPU = torch.device("cpu")
 
 
diff --git a/src/torchaudio/sox_effects/__init__.py b/src/torchaudio/sox_effects/__init__.py
deleted file mode 100644
index 93c63cae1d..0000000000
--- a/src/torchaudio/sox_effects/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from .sox_effects import apply_effects_file, apply_effects_tensor, effect_names, init_sox_effects, shutdown_sox_effects
-
-
-__all__ = [
-    "init_sox_effects",
-    "shutdown_sox_effects",
-    "effect_names",
-    "apply_effects_tensor",
-    "apply_effects_file",
-]
diff --git a/src/torchaudio/sox_effects/sox_effects.py b/src/torchaudio/sox_effects/sox_effects.py
deleted file mode 100644
index 256c461edc..0000000000
--- a/src/torchaudio/sox_effects/sox_effects.py
+++ /dev/null
@@ -1,275 +0,0 @@
-import os
-from typing import List, Optional, Tuple
-
-import torch
-import torchaudio
-from torchaudio._internal.module_utils import deprecated, dropping_support
-from torchaudio.utils.sox_utils import list_effects
-
-
-sox_ext = torchaudio._extension.lazy_import_sox_ext()
-
-
-@deprecated("Please remove the call. This function is called automatically.")
-def init_sox_effects():
-    """Initialize resources required to use sox effects.
-
-    Note:
-        You do not need to call this function manually. It is called automatically.
-
-    Once initialized, you do not need to call this function again across the multiple uses of
-    sox effects though it is safe to do so as long as :func:`shutdown_sox_effects` is not called yet.
-    Once :func:`shutdown_sox_effects` is called, you can no longer use SoX effects and initializing
-    again will result in error.
-    """
-    pass
-
-
-@deprecated("Please remove the call. This function is called automatically.")
-def shutdown_sox_effects():
-    """Clean up resources required to use sox effects.
-
-    Note:
-        You do not need to call this function manually. It is called automatically.
-
-    It is safe to call this function multiple times.
-    Once :py:func:`shutdown_sox_effects` is called, you can no longer use SoX effects and
-    initializing again will result in error.
-    """
-    pass
-
-
-@dropping_support
-def effect_names() -> List[str]:
-    """Gets list of valid sox effect names
-
-    Returns:
-        List[str]: list of available effect names.
-
-    Example
-        >>> torchaudio.sox_effects.effect_names()
-        ['allpass', 'band', 'bandpass', ... ]
-    """
-    return list(list_effects().keys())
-
-
-@dropping_support
-def apply_effects_tensor(
-    tensor: torch.Tensor,
-    sample_rate: int,
-    effects: List[List[str]],
-    channels_first: bool = True,
-) -> Tuple[torch.Tensor, int]:
-    """Apply sox effects to given Tensor
-
-    .. devices:: CPU
-
-    .. properties:: TorchScript
-
-    Note:
-        This function only works on CPU Tensors.
-        This function works in the way very similar to ``sox`` command, however there are slight
-        differences. For example, ``sox`` command adds certain effects automatically (such as
-        ``rate`` effect after ``speed`` and ``pitch`` and other effects), but this function does
-        only applies the given effects. (Therefore, to actually apply ``speed`` effect, you also
-        need to give ``rate`` effect with desired sampling rate.).
-
-    Args:
-        tensor (torch.Tensor): Input 2D CPU Tensor.
-        sample_rate (int): Sample rate
-        effects (List[List[str]]): List of effects.
-        channels_first (bool, optional): Indicates if the input Tensor's dimension is
-            `[channels, time]` or `[time, channels]`
-
-    Returns:
-        (Tensor, int): Resulting Tensor and sample rate.
-        The resulting Tensor has the same ``dtype`` as the input Tensor, and
-        the same channels order. The shape of the Tensor can be different based on the
-        effects applied. Sample rate can also be different based on the effects applied.
-
-    Example - Basic usage
-        >>>
-        >>> # Defines the effects to apply
-        >>> effects = [
-        ...     ['gain', '-n'],  # normalises to 0dB
-        ...     ['pitch', '5'],  # 5 cent pitch shift
-        ...     ['rate', '8000'],  # resample to 8000 Hz
-        ... ]
-        >>>
-        >>> # Generate pseudo wave:
-        >>> # normalized, channels first, 2ch, sampling rate 16000, 1 second
-        >>> sample_rate = 16000
-        >>> waveform = 2 * torch.rand([2, sample_rate * 1]) - 1
-        >>> waveform.shape
-        torch.Size([2, 16000])
-        >>> waveform
-        tensor([[ 0.3138,  0.7620, -0.9019,  ..., -0.7495, -0.4935,  0.5442],
-                [-0.0832,  0.0061,  0.8233,  ..., -0.5176, -0.9140, -0.2434]])
-        >>>
-        >>> # Apply effects
-        >>> waveform, sample_rate = apply_effects_tensor(
-        ...     wave_form, sample_rate, effects, channels_first=True)
-        >>>
-        >>> # Check the result
-        >>> # The new waveform is sampling rate 8000, 1 second.
-        >>> # normalization and channel order are preserved
-        >>> waveform.shape
-        torch.Size([2, 8000])
-        >>> waveform
-        tensor([[ 0.5054, -0.5518, -0.4800,  ..., -0.0076,  0.0096, -0.0110],
-                [ 0.1331,  0.0436, -0.3783,  ..., -0.0035,  0.0012,  0.0008]])
-        >>> sample_rate
-        8000
-
-    Example - Torchscript-able transform
-        >>>
-        >>> # Use `apply_effects_tensor` in `torch.nn.Module` and dump it to file,
-        >>> # then run sox effect via Torchscript runtime.
-        >>>
-        >>> class SoxEffectTransform(torch.nn.Module):
-        ...     effects: List[List[str]]
-        ...
-        ...     def __init__(self, effects: List[List[str]]):
-        ...         super().__init__()
-        ...         self.effects = effects
-        ...
-        ...     def forward(self, tensor: torch.Tensor, sample_rate: int):
-        ...         return sox_effects.apply_effects_tensor(
-        ...             tensor, sample_rate, self.effects)
-        ...
-        ...
-        >>> # Create transform object
-        >>> effects = [
-        ...     ["lowpass", "-1", "300"],  # apply single-pole lowpass filter
-        ...     ["rate", "8000"],  # change sample rate to 8000
-        ... ]
-        >>> transform = SoxEffectTensorTransform(effects, input_sample_rate)
-        >>>
-        >>> # Dump it to file and load
-        >>> path = 'sox_effect.zip'
-        >>> torch.jit.script(trans).save(path)
-        >>> transform = torch.jit.load(path)
-        >>>
-        >>>> # Run transform
-        >>> waveform, input_sample_rate = torchaudio.load("input.wav")
-        >>> waveform, sample_rate = transform(waveform, input_sample_rate)
-        >>> assert sample_rate == 8000
-    """
-    return sox_ext.apply_effects_tensor(tensor, sample_rate, effects, channels_first)
-
-
-@dropping_support
-def apply_effects_file(
-    path: str,
-    effects: List[List[str]],
-    normalize: bool = True,
-    channels_first: bool = True,
-    format: Optional[str] = None,
-) -> Tuple[torch.Tensor, int]:
-    """Apply sox effects to the audio file and load the resulting data as Tensor
-
-    .. devices:: CPU
-
-    .. properties:: TorchScript
-
-    Note:
-        This function works in the way very similar to ``sox`` command, however there are slight
-        differences. For example, ``sox`` commnad adds certain effects automatically (such as
-        ``rate`` effect after ``speed``, ``pitch`` etc), but this function only applies the given
-        effects. Therefore, to actually apply ``speed`` effect, you also need to give ``rate``
-        effect with desired sampling rate, because internally, ``speed`` effects only alter sampling
-        rate and leave samples untouched.
-
-    Args:
-        path (path-like object):
-            Source of audio data.
-        effects (List[List[str]]): List of effects.
-        normalize (bool, optional):
-            When ``True``, this function converts the native sample type to ``float32``.
-            Default: ``True``.
-
-            If input file is integer WAV, giving ``False`` will change the resulting Tensor type to
-            integer type.
-            This argument has no effect for formats other than integer WAV type.
-
-        channels_first (bool, optional): When True, the returned Tensor has dimension `[channel, time]`.
-            Otherwise, the returned Tensor's dimension is `[time, channel]`.
-        format (str or None, optional):
-            Override the format detection with the given format.
-            Providing the argument might help when libsox can not infer the format
-            from header or extension,
-
-    Returns:
-        (Tensor, int): Resulting Tensor and sample rate.
-        If ``normalize=True``, the resulting Tensor is always ``float32`` type.
-        If ``normalize=False`` and the input audio file is of integer WAV file, then the
-        resulting Tensor has corresponding integer type. (Note 24 bit integer type is not supported)
-        If ``channels_first=True``, the resulting Tensor has dimension `[channel, time]`,
-        otherwise `[time, channel]`.
-
-    Example - Basic usage
-        >>>
-        >>> # Defines the effects to apply
-        >>> effects = [
-        ...     ['gain', '-n'],  # normalises to 0dB
-        ...     ['pitch', '5'],  # 5 cent pitch shift
-        ...     ['rate', '8000'],  # resample to 8000 Hz
-        ... ]
-        >>>
-        >>> # Apply effects and load data with channels_first=True
-        >>> waveform, sample_rate = apply_effects_file("data.wav", effects, channels_first=True)
-        >>>
-        >>> # Check the result
-        >>> waveform.shape
-        torch.Size([2, 8000])
-        >>> waveform
-        tensor([[ 5.1151e-03,  1.8073e-02,  2.2188e-02,  ...,  1.0431e-07,
-                 -1.4761e-07,  1.8114e-07],
-                [-2.6924e-03,  2.1860e-03,  1.0650e-02,  ...,  6.4122e-07,
-                 -5.6159e-07,  4.8103e-07]])
-        >>> sample_rate
-        8000
-
-    Example - Apply random speed perturbation to dataset
-        >>>
-        >>> # Load data from file, apply random speed perturbation
-        >>> class RandomPerturbationFile(torch.utils.data.Dataset):
-        ...     \"\"\"Given flist, apply random speed perturbation
-        ...
-        ...     Suppose all the input files are at least one second long.
-        ...     \"\"\"
-        ...     def __init__(self, flist: List[str], sample_rate: int):
-        ...         super().__init__()
-        ...         self.flist = flist
-        ...         self.sample_rate = sample_rate
-        ...
-        ...     def __getitem__(self, index):
-        ...         speed = 0.5 + 1.5 * random.randn()
-        ...         effects = [
-        ...             ['gain', '-n', '-10'],  # apply 10 db attenuation
-        ...             ['remix', '-'],  # merge all the channels
-        ...             ['speed', f'{speed:.5f}'],  # duration is now 0.5 ~ 2.0 seconds.
-        ...             ['rate', f'{self.sample_rate}'],
-        ...             ['pad', '0', '1.5'],  # add 1.5 seconds silence at the end
-        ...             ['trim', '0', '2'],  # get the first 2 seconds
-        ...         ]
-        ...         waveform, _ = torchaudio.sox_effects.apply_effects_file(
-        ...             self.flist[index], effects)
-        ...         return waveform
-        ...
-        ...     def __len__(self):
-        ...         return len(self.flist)
-        ...
-        >>> dataset = RandomPerturbationFile(file_list, sample_rate=8000)
-        >>> loader = torch.utils.data.DataLoader(dataset, batch_size=32)
-        >>> for batch in loader:
-        >>>     pass
-    """
-    if not torch.jit.is_scripting():
-        if hasattr(path, "read"):
-            raise RuntimeError(
-                "apply_effects_file function does not support file-like object. "
-                "Please use torchaudio.io.AudioEffector."
-            )
-        path = os.fspath(path)
-    return sox_ext.apply_effects_file(path, effects, normalize, channels_first, format)
diff --git a/src/torchaudio/utils/__init__.py b/src/torchaudio/utils/__init__.py
index 9d4dd2dd72..be1f0bad21 100644
--- a/src/torchaudio/utils/__init__.py
+++ b/src/torchaudio/utils/__init__.py
@@ -1,10 +1,8 @@
 from torio.utils import ffmpeg_utils
 
-from . import sox_utils
 from .download import _download_asset
 
 
 __all__ = [
-    "sox_utils",
     "ffmpeg_utils",
 ]
diff --git a/src/torchaudio/utils/sox_utils.py b/src/torchaudio/utils/sox_utils.py
deleted file mode 100644
index 8cc68361d5..0000000000
--- a/src/torchaudio/utils/sox_utils.py
+++ /dev/null
@@ -1,118 +0,0 @@
-"""Module to change the configuration of libsox, which is used by I/O functions like
-:py:mod:`~torchaudio.backend.sox_io_backend` and :py:mod:`~torchaudio.sox_effects`.
-
-.. warning::
-    Starting with version 2.8, we are refactoring TorchAudio to transition it
-    into a maintenance phase. As a result:
-
-    - Some APIs are deprecated in 2.8 and will be removed in 2.9.
-    - The decoding and encoding capabilities of PyTorch for both audio and video
-      are being consolidated into TorchCodec.
-
-    Please see https://github.com/pytorch/audio/issues/3902 for more information.
-"""
-
-from typing import Dict, List
-
-import torchaudio
-
-sox_ext = torchaudio._extension.lazy_import_sox_ext()
-
-from torchaudio._internal.module_utils import dropping_support
-
-@dropping_support
-def set_seed(seed: int):
-    """Set libsox's PRNG
-
-    Args:
-        seed (int): seed value. valid range is int32.
-
-    See Also:
-        http://sox.sourceforge.net/sox.html
-    """
-    sox_ext.set_seed(seed)
-
-
-@dropping_support
-def set_verbosity(verbosity: int):
-    """Set libsox's verbosity
-
-    Args:
-        verbosity (int): Set verbosity level of libsox.
-
-            * ``1`` failure messages
-            * ``2`` warnings
-            * ``3`` details of processing
-            * ``4``-``6`` increasing levels of debug messages
-
-    See Also:
-        http://sox.sourceforge.net/sox.html
-    """
-    sox_ext.set_verbosity(verbosity)
-
-
-@dropping_support
-def set_buffer_size(buffer_size: int):
-    """Set buffer size for sox effect chain
-
-    Args:
-        buffer_size (int): Set the size in bytes of the buffers used for processing audio.
-
-    See Also:
-        http://sox.sourceforge.net/sox.html
-    """
-    sox_ext.set_buffer_size(buffer_size)
-
-
-@dropping_support
-def set_use_threads(use_threads: bool):
-    """Set multithread option for sox effect chain
-
-    Args:
-        use_threads (bool): When ``True``, enables ``libsox``'s parallel effects channels processing.
-            To use mutlithread, the underlying ``libsox`` has to be compiled with OpenMP support.
-
-    See Also:
-        http://sox.sourceforge.net/sox.html
-    """
-    sox_ext.set_use_threads(use_threads)
-
-
-@dropping_support
-def list_effects() -> Dict[str, str]:
-    """List the available sox effect names
-
-    Returns:
-        Dict[str, str]: Mapping from ``effect name`` to ``usage``
-    """
-    return dict(sox_ext.list_effects())
-
-
-@dropping_support
-def list_read_formats() -> List[str]:
-    """List the supported audio formats for read
-
-    Returns:
-        List[str]: List of supported audio formats
-    """
-    return sox_ext.list_read_formats()
-
-
-@dropping_support
-def list_write_formats() -> List[str]:
-    """List the supported audio formats for write
-
-    Returns:
-        List[str]: List of supported audio formats
-    """
-    return sox_ext.list_write_formats()
-
-
-@dropping_support
-def get_buffer_size() -> int:
-    """Get buffer size for sox effect chain
-
-    Returns:
-        int: size in bytes of buffers used for processing audio.
-    """
-    return sox_ext.get_buffer_size()
diff --git a/test/torchaudio_unittest/common_utils/__init__.py b/test/torchaudio_unittest/common_utils/__init__.py
index ff58db8f6c..509d5208df 100644
--- a/test/torchaudio_unittest/common_utils/__init__.py
+++ b/test/torchaudio_unittest/common_utils/__init__.py
@@ -15,9 +15,6 @@
     skipIfNoModule,
     skipIfNoQengine,
     skipIfNoRIR,
-    skipIfNoSox,
-    skipIfNoSoxDecoder,
-    skipIfNoSoxEncoder,
     skipIfPy310,
     skipIfRocm,
     TempDirMixin,
@@ -63,9 +60,6 @@ def inject_request(self, request):
     "skipIfNoMacOS",
     "skipIfNoModule",
     "skipIfNoRIR",
-    "skipIfNoSox",
-    "skipIfNoSoxDecoder",
-    "skipIfNoSoxEncoder",
     "skipIfRocm",
     "skipIfNoQengine",
     "skipIfNoFFmpeg",
diff --git a/test/torchaudio_unittest/common_utils/case_utils.py b/test/torchaudio_unittest/common_utils/case_utils.py
index ae8ab05cee..7ce9c89dd3 100644
--- a/test/torchaudio_unittest/common_utils/case_utils.py
+++ b/test/torchaudio_unittest/common_utils/case_utils.py
@@ -109,7 +109,6 @@ class TorchaudioTestCase(TestBaseMixin, PytorchTestCase):
 
 
 _IS_FFMPEG_AVAILABLE = torio._extension.lazy_import_ffmpeg_ext().is_available()
-_IS_SOX_AVAILABLE = torchaudio._extension.lazy_import_sox_ext().is_available()
 _IS_CTC_DECODER_AVAILABLE = None
 _IS_CUDA_CTC_DECODER_AVAILABLE = None
 
@@ -206,28 +205,6 @@ def skipIfNoModule(module, display_name=None):
     reason="CUDA does not have enough memory.",
     key="CUDA_SMALL_MEMORY",
 )
-skipIfNoSox = _skipIf(
-    not _IS_SOX_AVAILABLE,
-    reason="Sox features are not available.",
-    key="NO_SOX",
-)
-
-
-def skipIfNoSoxDecoder(ext):
-    return _skipIf(
-        not _IS_SOX_AVAILABLE or ext not in torchaudio.utils.sox_utils.list_read_formats(),
-        f'sox does not handle "{ext}" for read.',
-        key="NO_SOX_DECODER",
-    )
-
-
-def skipIfNoSoxEncoder(ext):
-    return _skipIf(
-        not _IS_SOX_AVAILABLE or ext not in torchaudio.utils.sox_utils.list_write_formats(),
-        f'sox does not handle "{ext}" for write.',
-        key="NO_SOX_ENCODER",
-    )
-
 
 skipIfNoRIR = _skipIf(
     not torchaudio._extension._IS_RIR_AVAILABLE,
diff --git a/test/torchaudio_unittest/deprecation_test.py b/test/torchaudio_unittest/deprecation_test.py
index 04493c8dc3..c44d1907f9 100644
--- a/test/torchaudio_unittest/deprecation_test.py
+++ b/test/torchaudio_unittest/deprecation_test.py
@@ -3,7 +3,7 @@
 import torch
 
 from torchaudio._internal.module_utils import UNSUPPORTED
-from torchaudio.sox_effects import apply_effects_tensor
+from torchaudio.prototype.functional import exp_sigmoid
 
 # Importing prototype modules is needed to trigger the registration of the
 # corresponding APIs in the UNSUPPORTED register.
@@ -25,10 +25,8 @@ def test_deprecations(func):
 # deprecated for years.
 @pytest.mark.parametrize("scripted", (True, False))
 def test_torchscript_fails(scripted):
-    f = apply_effects_tensor
+    f = exp_sigmoid
     if scripted:
         pytest.xfail("Deprecation decorator breaks torchscript")
         f = torch.jit.script(f)
-    _, out_sample_rate = f(torch.rand(2, 1000), sample_rate=16_000, effects=[["rate", "8000"]])
-    assert out_sample_rate == 8000
-
+    f(torch.rand(2, 1000))
diff --git a/test/torchaudio_unittest/functional/functional_cpu_test.py b/test/torchaudio_unittest/functional/functional_cpu_test.py
index 7b81cc92ac..9a6ad0a63d 100644
--- a/test/torchaudio_unittest/functional/functional_cpu_test.py
+++ b/test/torchaudio_unittest/functional/functional_cpu_test.py
@@ -4,7 +4,7 @@
 import torchaudio.functional as F
 from parameterized import parameterized
 import unittest
-from torchaudio_unittest.common_utils import PytorchTestCase, skipIfNoSox, TorchaudioTestCase
+from torchaudio_unittest.common_utils import PytorchTestCase
 
 from .functional_impl import Functional, FunctionalCPUOnly
 
@@ -21,38 +21,3 @@ def test_lfilter_9th_order_filter_stability(self):
 class TestFunctionalFloat64(Functional, PytorchTestCase):
     dtype = torch.float64
     device = torch.device("cpu")
-
-
-@unittest.skip("deprecated")
-@skipIfNoSox
-class TestApplyCodec(TorchaudioTestCase):
-    def _smoke_test(self, format, compression, check_num_frames):
-        """
-        The purpose of this test suite is to verify that apply_codec functionalities do not exhibit
-        abnormal behaviors.
-        """
-        sample_rate = 8000
-        num_frames = 3 * sample_rate
-        num_channels = 2
-        waveform = torch.rand(num_channels, num_frames)
-
-        augmented = F.apply_codec(waveform, sample_rate, format, True, compression)
-        assert augmented.dtype == waveform.dtype
-        assert augmented.shape[0] == num_channels
-        if check_num_frames:
-            assert augmented.shape[1] == num_frames
-
-    def test_wave(self):
-        self._smoke_test("wav", compression=None, check_num_frames=True)
-
-    @parameterized.expand([(96,), (128,), (160,), (192,), (224,), (256,), (320,)])
-    def test_mp3(self, compression):
-        self._smoke_test("mp3", compression, check_num_frames=False)
-
-    @parameterized.expand([(0,), (1,), (2,), (3,), (4,), (5,), (6,), (7,), (8,)])
-    def test_flac(self, compression):
-        self._smoke_test("flac", compression, check_num_frames=False)
-
-    @parameterized.expand([(-1,), (0,), (1,), (2,), (3,), (3.6,), (5,), (10,)])
-    def test_vorbis(self, compression):
-        self._smoke_test("vorbis", compression, check_num_frames=False)
diff --git a/test/torchaudio_unittest/functional/sox_compatibility_test.py b/test/torchaudio_unittest/functional/sox_compatibility_test.py
index 9ec8383272..51abb899bb 100644
--- a/test/torchaudio_unittest/functional/sox_compatibility_test.py
+++ b/test/torchaudio_unittest/functional/sox_compatibility_test.py
@@ -6,16 +6,11 @@
     load_wav,
     save_wav,
     skipIfNoExec,
-    skipIfNoSox,
-    sox_utils,
     TempDirMixin,
     TorchaudioTestCase,
     RequestMixin
 )
 
-
-@skipIfNoSox
-@skipIfNoExec("sox")
 class TestFunctionalFiltering(TempDirMixin, TorchaudioTestCase, RequestMixin):
     def run_sox_effect(self, input_file, effect):
         output_file = self.get_temp_path("expected.wav")
diff --git a/test/torchaudio_unittest/transforms/sox_compatibility_test.py b/test/torchaudio_unittest/transforms/sox_compatibility_test.py
index 222bb463b3..3460b71bf4 100644
--- a/test/torchaudio_unittest/transforms/sox_compatibility_test.py
+++ b/test/torchaudio_unittest/transforms/sox_compatibility_test.py
@@ -9,7 +9,6 @@
     load_wav,
     save_wav,
     skipIfNoExec,
-    skipIfNoSox,
     sox_utils,
     TempDirMixin,
     TorchaudioTestCase,
@@ -17,8 +16,6 @@
 )
 
 
-@skipIfNoSox
-@skipIfNoExec("sox")
 class TestFunctionalFiltering(TempDirMixin, TorchaudioTestCase, RequestMixin):
     def run_sox_effect(self, input_file, effect):
         output_file = self.get_temp_path("expected.wav")
diff --git a/test/torchaudio_unittest/utils/sox_utils_test.py b/test/torchaudio_unittest/utils/sox_utils_test.py
deleted file mode 100644
index 8b88d966c3..0000000000
--- a/test/torchaudio_unittest/utils/sox_utils_test.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from torchaudio.utils import sox_utils
-from torchaudio_unittest.common_utils import PytorchTestCase, skipIfNoSox
-
-
-@skipIfNoSox
-class TestSoxUtils(PytorchTestCase):
-    """Smoke tests for sox_util module"""
-
-    def test_set_seed(self):
-        """`set_seed` does not crush"""
-        sox_utils.set_seed(0)
-
-    def test_set_verbosity(self):
-        """`set_verbosity` does not crush"""
-        for val in range(6, 0, -1):
-            sox_utils.set_verbosity(val)
-
-    def test_set_buffer_size(self):
-        """`set_buffer_size` does not crush"""
-        sox_utils.set_buffer_size(131072)
-        # back to default
-        sox_utils.set_buffer_size(8192)
-
-    def test_set_use_threads(self):
-        """`set_use_threads` does not crush"""
-        sox_utils.set_use_threads(True)
-        # back to default
-        sox_utils.set_use_threads(False)
-
-    def test_list_effects(self):
-        """`list_effects` returns the list of available effects"""
-        effects = sox_utils.list_effects()
-        # We cannot infer what effects are available, so only check some of them.
-        assert "highpass" in effects
-        assert "phaser" in effects
-        assert "gain" in effects
-
-    def test_list_read_formats(self):
-        """`list_read_formats` returns the list of supported formats"""
-        formats = sox_utils.list_read_formats()
-        assert "wav" in formats
-
-    def test_list_write_formats(self):
-        """`list_write_formats` returns the list of supported formats"""
-        formats = sox_utils.list_write_formats()
-        assert "opus" not in formats
diff --git a/tools/setup_helpers/extension.py b/tools/setup_helpers/extension.py
index 58f5087854..b322541e36 100644
--- a/tools/setup_helpers/extension.py
+++ b/tools/setup_helpers/extension.py
@@ -51,13 +51,6 @@ def get_ext_modules():
         Extension(name="torchaudio.lib.libtorchaudio", sources=[]),
         Extension(name="torchaudio.lib._torchaudio", sources=[]),
     ]
-    if _BUILD_SOX:
-        modules.extend(
-            [
-                Extension(name="torchaudio.lib.libtorchaudio_sox", sources=[]),
-                Extension(name="torchaudio.lib._torchaudio_sox", sources=[]),
-            ]
-        )
     if _BUILD_CUDA_CTC_DECODER:
         modules.extend(
             [