diff --git a/backends/qualcomm/CMakeLists.txt b/backends/qualcomm/CMakeLists.txt
index 33f150413a3..0ef1ecdaf65 100644
--- a/backends/qualcomm/CMakeLists.txt
+++ b/backends/qualcomm/CMakeLists.txt
@@ -39,17 +39,6 @@ if(${ANDROID})
   find_library(android_log log)
 endif()
 
-set(qcir_schema_include_dir ${CMAKE_CURRENT_LIST_DIR}/aot/ir)
-set(qcir_schema_output ${qcir_schema_include_dir}/qcir_generated.h)
-add_custom_command(
-  OUTPUT qcir_schema_output
-  COMMAND flatc --cpp --cpp-std c++11 --scoped-enums -o
-          ${qcir_schema_include_dir} ${qcir_schema_include_dir}/qcir.fbs
-  DEPENDS flatc
-  COMMENT "Generating qualcomm ir schema headers"
-  VERBATIM
-)
-
 add_compile_options("-Wall" "-Werror" "-Wno-sign-compare")
 add_compile_definitions(C10_USING_CUSTOM_GENERATED_MACROS)
 
@@ -73,7 +62,6 @@ include_directories(
   ${_common_include_directories}
   ${QNN_SDK_ROOT}/include/QNN
   ${QNN_SDK_ROOT}/share/QNN/converter/jni
-  ${EXECUTORCH_SOURCE_DIR}/third-party/flatbuffers/include
   ${EXECUTORCH_SOURCE_DIR}/runtime/core/portable_type/c10
 )
 
@@ -112,8 +100,6 @@ include_directories(
 # declare targets
 #
 add_library(executorch_backend INTERFACE)
-add_library(qcir INTERFACE qcir_schema_output)
-add_library(qcir_utils STATIC)
 add_library(qnn_backend STATIC)
 add_library(qnn_backend_cache STATIC)
 add_library(qnn_context STATIC)
@@ -142,7 +128,6 @@ add_library(utils STATIC)
 #
 # declare dependency
 #
-target_link_libraries(qcir_utils PRIVATE qcir)
 target_link_libraries(wrappers PRIVATE qnn_executorch_logging)
 target_link_libraries(
   qnn_implementation PRIVATE qnn_function_interface qnn_executorch_logging
@@ -225,10 +210,6 @@ add_subdirectory(
   ${QNN_EXECUTORCH_ROOT_DIR}/aot/wrappers
   ${CMAKE_CURRENT_BINARY_DIR}/qnn_executorch/wrappers
 )
-add_subdirectory(
-  ${QNN_EXECUTORCH_ROOT_DIR}/aot/ir
-  ${CMAKE_CURRENT_BINARY_DIR}/qnn_executorch/ir
-)
 install(
   TARGETS qnn_executorch_backend
   EXPORT ExecuTorchTargets
diff --git a/backends/qualcomm/aot/ir/CMakeLists.txt b/backends/qualcomm/aot/ir/CMakeLists.txt
deleted file mode 100755
index 48cb07c5dd2..00000000000
--- a/backends/qualcomm/aot/ir/CMakeLists.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-# Copyright (c) Qualcomm Innovation Center, Inc.
-# All rights reserved
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-# QCIR
-target_sources(
-  qcir_utils PRIVATE ${CMAKE_CURRENT_LIST_DIR}/qcir_utils.h
-                     ${CMAKE_CURRENT_LIST_DIR}/qcir_utils.cpp
-)
diff --git a/backends/qualcomm/aot/ir/TARGETS b/backends/qualcomm/aot/ir/TARGETS
deleted file mode 100644
index 0a42614a385..00000000000
--- a/backends/qualcomm/aot/ir/TARGETS
+++ /dev/null
@@ -1,5 +0,0 @@
-load(":targets.bzl", "define_common_targets")
-
-oncall("executorch")
-
-define_common_targets()
diff --git a/backends/qualcomm/aot/ir/qcir.fbs b/backends/qualcomm/aot/ir/qcir.fbs
deleted file mode 100755
index 82e56c405cc..00000000000
--- a/backends/qualcomm/aot/ir/qcir.fbs
+++ /dev/null
@@ -1,119 +0,0 @@
-//
-// Copyright (c) Qualcomm Innovation Center, Inc.
-// All rights reserved.
-//
-// This source code is licensed under the BSD-style license found in the
-// LICENSE file in the root directory of this source tree.
-//
-
-namespace qcir;
-
-enum TensorType : byte {
-    WRITE = 0,
-    READ,
-    READWRITE,
-    NATIVE,
-    STATIC,
-    OPTIONAL,
-    UNDEFINED,
-}
-
-enum DataType : byte {
-    INT8 = 0,
-    INT16,
-    INT32,
-    INT64,
-    UINT8,
-    UINT16,
-    UINT32,
-    UINT64,
-    FLOAT16,
-    FLOAT32,
-    FLOAT64,
-    SFIXED4,
-    SFIXED8,
-    SFIXED16,
-    SFIXED32,
-    UFIXED4,
-    UFIXED8,
-    UFIXED16,
-    UFIXED32,
-    BOOL,
-    STRING,
-    UNDEFINED,
-}
-
-enum QuantizeDef : byte {
-    IMPL_GENERATED = 0,
-    DEFINED,
-    UNDEFINED,
-}
-
-enum QuantizeType : byte {
-    SCALE_OFFSET = 0,
-    AXIS_SCALE_OFFSET,
-    BW_SCALE_OFFSET,
-    BW_AXIS_SCALE_OFFSET,
-    BLOCKWISE_EXPANSION,
-    UNDEFINED,
-}
-
-enum BlockScaleStorageType: byte {
-    BITWIDTH_SCALE_STORAGE_8 = 0,
-    BITWIDTH_SCALE_STORAGE_16,
-    UNDEFINED,
-}
-
-struct ScaleOffset {
-    scale: float;
-    offset: int;
-}
-
-table QuantizeParam {
-    def: QuantizeDef;
-    type: QuantizeType;
-    bitwidth: uint;
-    axis: int;
-    // used by bitwidth quantization
-    scales: [float];
-    offsets: [int];
-    // used by general quantization
-    data: [ScaleOffset];
-    // used by block quantization
-    num_blocks_per_axis: uint;
-    block_scale_storage_type: BlockScaleStorageType;
-    block_scale: [ubyte];
-}
-
-table Tensor {
-    name: string;
-    shape: [uint];
-    dynamic_dims: [ubyte];
-    type: TensorType;
-    dtype: DataType;
-    qparam: QuantizeParam;
-    size: uint;
-    offset: ulong;
-}
-
-table Operator {
-    name: string;
-    package_name: string;
-    type_name: string;
-    // keep only tensor indexes
-    inputs: [uint];
-    outputs: [uint];
-    params: [uint];
-}
-
-table Graph {
-    name: string;
-    nodes: [Operator];
-    tensors: [Tensor];
-}
-
-table Context {
-    graphs: [Graph];
-}
-
-root_type Context;
diff --git a/backends/qualcomm/aot/ir/qcir_utils.cpp b/backends/qualcomm/aot/ir/qcir_utils.cpp
deleted file mode 100755
index de9e349abe7..00000000000
--- a/backends/qualcomm/aot/ir/qcir_utils.cpp
+++ /dev/null
@@ -1,345 +0,0 @@
-/*
- * Copyright (c) Qualcomm Innovation Center, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#include <executorch/backends/qualcomm/aot/ir/qcir_utils.h>
-#include <executorch/backends/qualcomm/aot/wrappers/TensorWrapper.h>
-
-#include <unordered_map>
-
-namespace executorch {
-namespace backends {
-namespace qnn {
-
-qcir::TensorType ToTensorType(Qnn_TensorType_t type) {
-  static const std::unordered_map<Qnn_TensorType_t, qcir::TensorType> type_map{
-      {QNN_TENSOR_TYPE_APP_WRITE, qcir::TensorType::WRITE},
-      {QNN_TENSOR_TYPE_APP_READ, qcir::TensorType::READ},
-      {QNN_TENSOR_TYPE_APP_READWRITE, qcir::TensorType::READWRITE},
-      {QNN_TENSOR_TYPE_NATIVE, qcir::TensorType::NATIVE},
-      {QNN_TENSOR_TYPE_STATIC, qcir::TensorType::STATIC},
-      {QNN_TENSOR_TYPE_NULL, qcir::TensorType::OPTIONAL},
-      {QNN_TENSOR_TYPE_UNDEFINED, qcir::TensorType::UNDEFINED},
-  };
-  return type_map.at(type);
-}
-
-Qnn_TensorType_t ToTensorType(qcir::TensorType type) {
-  static const std::unordered_map<qcir::TensorType, Qnn_TensorType_t> type_map{
-      {qcir::TensorType::WRITE, QNN_TENSOR_TYPE_APP_WRITE},
-      {qcir::TensorType::READ, QNN_TENSOR_TYPE_APP_READ},
-      {qcir::TensorType::READWRITE, QNN_TENSOR_TYPE_APP_READWRITE},
-      {qcir::TensorType::NATIVE, QNN_TENSOR_TYPE_NATIVE},
-      {qcir::TensorType::STATIC, QNN_TENSOR_TYPE_STATIC},
-      {qcir::TensorType::OPTIONAL, QNN_TENSOR_TYPE_NULL},
-      {qcir::TensorType::UNDEFINED, QNN_TENSOR_TYPE_UNDEFINED},
-  };
-  return type_map.at(type);
-}
-
-// TODO: enable commented type by QNN version control
-qcir::DataType ToDataType(Qnn_DataType_t type) {
-  static const std::unordered_map<Qnn_DataType_t, qcir::DataType> type_map{
-      {QNN_DATATYPE_INT_8, qcir::DataType::INT8},
-      {QNN_DATATYPE_INT_16, qcir::DataType::INT16},
-      {QNN_DATATYPE_INT_32, qcir::DataType::INT32},
-      {QNN_DATATYPE_INT_64, qcir::DataType::INT64},
-      {QNN_DATATYPE_UINT_8, qcir::DataType::UINT8},
-      {QNN_DATATYPE_UINT_16, qcir::DataType::UINT16},
-      {QNN_DATATYPE_UINT_32, qcir::DataType::UINT32},
-      {QNN_DATATYPE_UINT_64, qcir::DataType::UINT64},
-      {QNN_DATATYPE_FLOAT_16, qcir::DataType::FLOAT16},
-      {QNN_DATATYPE_FLOAT_32, qcir::DataType::FLOAT32},
-      // {QNN_DATATYPE_FLOAT_64, qcir::DataType::FLOAT64},
-      {QNN_DATATYPE_SFIXED_POINT_4, qcir::DataType::SFIXED4},
-      {QNN_DATATYPE_SFIXED_POINT_8, qcir::DataType::SFIXED8},
-      {QNN_DATATYPE_SFIXED_POINT_16, qcir::DataType::SFIXED16},
-      {QNN_DATATYPE_SFIXED_POINT_32, qcir::DataType::SFIXED32},
-      {QNN_DATATYPE_UFIXED_POINT_4, qcir::DataType::UFIXED4},
-      {QNN_DATATYPE_UFIXED_POINT_8, qcir::DataType::UFIXED8},
-      {QNN_DATATYPE_UFIXED_POINT_16, qcir::DataType::UFIXED16},
-      {QNN_DATATYPE_UFIXED_POINT_32, qcir::DataType::UFIXED32},
-      {QNN_DATATYPE_BOOL_8, qcir::DataType::BOOL},
-      // {QNN_DATATYPE_STRING, qcir::DataType::STRING},
-      {QNN_DATATYPE_UNDEFINED, qcir::DataType::UNDEFINED},
-  };
-  return type_map.at(type);
-}
-
-// TODO: enable commented type by QNN version control
-Qnn_DataType_t ToDataType(qcir::DataType type) {
-  static const std::unordered_map<qcir::DataType, Qnn_DataType_t> type_map{
-      {qcir::DataType::INT8, QNN_DATATYPE_INT_8},
-      {qcir::DataType::INT16, QNN_DATATYPE_INT_16},
-      {qcir::DataType::INT32, QNN_DATATYPE_INT_32},
-      {qcir::DataType::INT64, QNN_DATATYPE_INT_64},
-      {qcir::DataType::UINT8, QNN_DATATYPE_UINT_8},
-      {qcir::DataType::UINT16, QNN_DATATYPE_UINT_16},
-      {qcir::DataType::UINT32, QNN_DATATYPE_UINT_32},
-      {qcir::DataType::UINT64, QNN_DATATYPE_UINT_64},
-      {qcir::DataType::FLOAT16, QNN_DATATYPE_FLOAT_16},
-      {qcir::DataType::FLOAT32, QNN_DATATYPE_FLOAT_32},
-      // {qcir::DataType::FLOAT64, QNN_DATATYPE_FLOAT_64},
-      {qcir::DataType::SFIXED4, QNN_DATATYPE_SFIXED_POINT_4},
-      {qcir::DataType::SFIXED8, QNN_DATATYPE_SFIXED_POINT_8},
-      {qcir::DataType::SFIXED16, QNN_DATATYPE_SFIXED_POINT_16},
-      {qcir::DataType::SFIXED32, QNN_DATATYPE_SFIXED_POINT_32},
-      {qcir::DataType::UFIXED4, QNN_DATATYPE_UFIXED_POINT_4},
-      {qcir::DataType::UFIXED8, QNN_DATATYPE_UFIXED_POINT_8},
-      {qcir::DataType::UFIXED16, QNN_DATATYPE_UFIXED_POINT_16},
-      {qcir::DataType::UFIXED32, QNN_DATATYPE_UFIXED_POINT_32},
-      {qcir::DataType::BOOL, QNN_DATATYPE_BOOL_8},
-      // {qcir::DataType::STRING, QNN_DATATYPE_STRING},
-      {qcir::DataType::UNDEFINED, QNN_DATATYPE_UNDEFINED},
-  };
-  return type_map.at(type);
-}
-
-flatbuffers::Offset<qcir::QuantizeParam> ToQuantizeParam(
-    const Qnn_Tensor_t& tensor,
-    flatbuffers::FlatBufferBuilder* builder) {
-  static const std::unordered_map<Qnn_Definition_t, qcir::QuantizeDef> def_map{
-      {QNN_DEFINITION_IMPL_GENERATED, qcir::QuantizeDef::IMPL_GENERATED},
-      {QNN_DEFINITION_DEFINED, qcir::QuantizeDef::DEFINED},
-      {QNN_DEFINITION_UNDEFINED, qcir::QuantizeDef::UNDEFINED},
-  };
-  static const std::
-      unordered_map<Qnn_QuantizationEncoding_t, qcir::QuantizeType>
-          type_map{
-              {QNN_QUANTIZATION_ENCODING_SCALE_OFFSET,
-               qcir::QuantizeType::SCALE_OFFSET},
-              {QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET,
-               qcir::QuantizeType::AXIS_SCALE_OFFSET},
-              {QNN_QUANTIZATION_ENCODING_BW_SCALE_OFFSET,
-               qcir::QuantizeType::BW_SCALE_OFFSET},
-              {QNN_QUANTIZATION_ENCODING_BW_AXIS_SCALE_OFFSET,
-               qcir::QuantizeType::BW_AXIS_SCALE_OFFSET},
-              {QNN_QUANTIZATION_ENCODING_BLOCKWISE_EXPANSION,
-               qcir::QuantizeType::BLOCKWISE_EXPANSION},
-              {QNN_QUANTIZATION_ENCODING_UNDEFINED,
-               qcir::QuantizeType::UNDEFINED},
-          };
-
-  int32_t axis = 0;
-  uint32_t bitwidth = 0, num_blocks_per_axis = 0;
-  auto param = QNN_TENSOR_VER_PTR(tensor)->quantizeParams;
-  auto quant_type = type_map.at(param.quantizationEncoding);
-  std::vector<qcir::ScaleOffset> data;
-  std::vector<uint8_t> block_scale;
-  std::vector<float> scales;
-  std::vector<int32_t> offsets;
-  qcir::BlockScaleStorageType block_scale_storage_type =
-      qcir::BlockScaleStorageType::BITWIDTH_SCALE_STORAGE_8;
-  switch (quant_type) {
-    case qcir::QuantizeType::SCALE_OFFSET: {
-      data.emplace_back(qcir::ScaleOffset(
-          param.scaleOffsetEncoding.scale, param.scaleOffsetEncoding.offset));
-    } break;
-    case qcir::QuantizeType::AXIS_SCALE_OFFSET: {
-      size_t len = param.axisScaleOffsetEncoding.numScaleOffsets;
-      axis = param.axisScaleOffsetEncoding.axis;
-      data.reserve(len);
-      for (uint i = 0; i < len; ++i) {
-        data.emplace_back(qcir::ScaleOffset(
-            param.axisScaleOffsetEncoding.scaleOffset[i].scale,
-            param.axisScaleOffsetEncoding.scaleOffset[i].offset));
-      }
-    } break;
-    case qcir::QuantizeType::BW_SCALE_OFFSET: {
-      bitwidth = param.bwScaleOffsetEncoding.bitwidth;
-      scales.push_back(param.bwScaleOffsetEncoding.scale);
-      offsets.push_back(param.bwScaleOffsetEncoding.offset);
-    } break;
-    case qcir::QuantizeType::BW_AXIS_SCALE_OFFSET: {
-      bitwidth = param.bwAxisScaleOffsetEncoding.bitwidth;
-      axis = param.bwAxisScaleOffsetEncoding.axis;
-      size_t len = param.bwAxisScaleOffsetEncoding.numElements;
-      scales.reserve(len);
-      offsets.reserve(len);
-      for (size_t i = 0; i < len; ++i) {
-        scales.push_back(param.bwAxisScaleOffsetEncoding.scales[i]);
-        offsets.push_back(param.bwAxisScaleOffsetEncoding.offsets[i]);
-      }
-    } break;
-    case qcir::QuantizeType::BLOCKWISE_EXPANSION: {
-      bitwidth = param.blockwiseExpansion->blockScaleBitwidth;
-      axis = param.blockwiseExpansion->axis;
-      uint num_channels = QNN_TENSOR_VER_PTR(tensor)->dimensions[axis];
-      for (uint i = 0; i < num_channels; ++i) {
-        data.emplace_back(qcir::ScaleOffset(
-            param.blockwiseExpansion->scaleOffsets[i].scale,
-            param.blockwiseExpansion->scaleOffsets[i].offset));
-      }
-      num_blocks_per_axis = param.blockwiseExpansion->numBlocksPerAxis;
-      uint multiplier = 1;
-      if (param.blockwiseExpansion->blockScaleStorageType ==
-          QNN_BLOCKWISE_EXPANSION_BITWIDTH_SCALE_STORAGE_16) {
-        multiplier = 2;
-        block_scale_storage_type =
-            qcir::BlockScaleStorageType::BITWIDTH_SCALE_STORAGE_16;
-      }
-      uint total_bytes = num_channels * num_blocks_per_axis * multiplier;
-      block_scale = std::vector<uint8_t>(
-          param.blockwiseExpansion->blocksScale8,
-          param.blockwiseExpansion->blocksScale8 + total_bytes);
-    } break;
-    default:
-      // encodings are not required if lowering with floating point precision
-      break;
-  }
-  return CreateQuantizeParamDirect(
-      *builder,
-      def_map.at(param.encodingDefinition),
-      quant_type,
-      bitwidth,
-      axis,
-      &scales,
-      &offsets,
-      &data,
-      num_blocks_per_axis,
-      block_scale_storage_type,
-      &block_scale);
-}
-
-Qnn_QuantizeParams_t ToQuantizeParam(const tensor_type& tensor) {
-  static const std::unordered_map<qcir::QuantizeDef, Qnn_Definition_t> def_map{
-      {qcir::QuantizeDef::IMPL_GENERATED, QNN_DEFINITION_IMPL_GENERATED},
-      {qcir::QuantizeDef::DEFINED, QNN_DEFINITION_DEFINED},
-      {qcir::QuantizeDef::UNDEFINED, QNN_DEFINITION_UNDEFINED},
-  };
-  static const std::
-      unordered_map<qcir::QuantizeType, Qnn_QuantizationEncoding_t>
-          type_map{
-              {qcir::QuantizeType::SCALE_OFFSET,
-               QNN_QUANTIZATION_ENCODING_SCALE_OFFSET},
-              {qcir::QuantizeType::AXIS_SCALE_OFFSET,
-               QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET},
-              {qcir::QuantizeType::BW_SCALE_OFFSET,
-               QNN_QUANTIZATION_ENCODING_BW_SCALE_OFFSET},
-              {qcir::QuantizeType::BW_AXIS_SCALE_OFFSET,
-               QNN_QUANTIZATION_ENCODING_BW_AXIS_SCALE_OFFSET},
-              {qcir::QuantizeType::BLOCKWISE_EXPANSION,
-               QNN_QUANTIZATION_ENCODING_BLOCKWISE_EXPANSION},
-              {qcir::QuantizeType::UNDEFINED,
-               QNN_QUANTIZATION_ENCODING_UNDEFINED},
-          };
-  // Qnn_BlockwiseExpansion_t is a pointer type in Qnn_QuantizeParams_t
-  // need a bookkeeper for guarding life cycle
-  static std::vector<std::unique_ptr<Qnn_BlockwiseExpansion_t>> block_param;
-
-  Qnn_QuantizeParams_t p = QNN_QUANTIZE_PARAMS_INIT;
-  auto param = tensor->qparam();
-  p.encodingDefinition = def_map.at(param->def());
-  p.quantizationEncoding = type_map.at(param->type());
-  switch (p.quantizationEncoding) {
-    case QNN_QUANTIZATION_ENCODING_SCALE_OFFSET: {
-      p.scaleOffsetEncoding.scale = param->data()->Get(0)->scale();
-      p.scaleOffsetEncoding.offset = param->data()->Get(0)->offset();
-    } break;
-    case QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET: {
-      p.axisScaleOffsetEncoding.axis = param->axis();
-      p.axisScaleOffsetEncoding.numScaleOffsets = param->data()->size();
-      p.axisScaleOffsetEncoding.scaleOffset =
-          reinterpret_cast<Qnn_ScaleOffset_t*>(
-              const_cast<uint8_t*>(param->data()->Data()));
-    } break;
-    case QNN_QUANTIZATION_ENCODING_BW_SCALE_OFFSET: {
-      p.bwAxisScaleOffsetEncoding.bitwidth = param->bitwidth();
-      p.bwScaleOffsetEncoding.scale = param->scales()->Get(0);
-      p.bwScaleOffsetEncoding.offset = param->offsets()->Get(0);
-    } break;
-    case QNN_QUANTIZATION_ENCODING_BW_AXIS_SCALE_OFFSET: {
-      p.bwAxisScaleOffsetEncoding.bitwidth = param->bitwidth();
-      p.bwAxisScaleOffsetEncoding.axis = param->axis();
-      p.bwAxisScaleOffsetEncoding.numElements = param->scales()->size();
-      p.bwAxisScaleOffsetEncoding.scales =
-          const_cast<float*>(param->scales()->data());
-      p.bwAxisScaleOffsetEncoding.offsets =
-          const_cast<int32_t*>(param->offsets()->data());
-    } break;
-    case QNN_QUANTIZATION_ENCODING_BLOCKWISE_EXPANSION: {
-      block_param.emplace_back(std::make_unique<Qnn_BlockwiseExpansion_t>());
-      p.blockwiseExpansion = block_param.back().get();
-      p.blockwiseExpansion->axis = param->axis();
-      p.blockwiseExpansion->scaleOffsets = reinterpret_cast<Qnn_ScaleOffset_t*>(
-          const_cast<uint8_t*>(param->data()->Data()));
-      p.blockwiseExpansion->numBlocksPerAxis = param->num_blocks_per_axis();
-      switch (param->block_scale_storage_type()) {
-        case qcir::BlockScaleStorageType::BITWIDTH_SCALE_STORAGE_8:
-          p.blockwiseExpansion->blockScaleStorageType =
-              QNN_BLOCKWISE_EXPANSION_BITWIDTH_SCALE_STORAGE_8;
-          break;
-        case qcir::BlockScaleStorageType::BITWIDTH_SCALE_STORAGE_16:
-          p.blockwiseExpansion->blockScaleStorageType =
-              QNN_BLOCKWISE_EXPANSION_BITWIDTH_SCALE_STORAGE_16;
-          break;
-        default:
-          p.blockwiseExpansion->blockScaleStorageType =
-              QNN_BLOCKWISE_EXPANSION_BITWIDTH_SCALE_STORAGE_UNDEFINED;
-          break;
-      }
-      p.blockwiseExpansion->blocksScale8 =
-          const_cast<uint8_t*>(param->block_scale()->Data());
-    } break;
-    default:
-      // encodings are not required if lowering with floating point precision
-      break;
-  }
-  return p;
-}
-
-flatbuffers::Offset<qcir::Tensor> ToTensor(
-    const Qnn_Tensor_t& tensor,
-    const uint64_t data_offset,
-    flatbuffers::FlatBufferBuilder* builder) {
-  std::vector<uint32_t> shape(
-      QNN_TENSOR_VER_PTR(tensor)->dimensions,
-      QNN_TENSOR_VER_PTR(tensor)->dimensions +
-          QNN_TENSOR_VER_PTR(tensor)->rank);
-  std::vector<uint8_t> dynamic_dims(
-      QNN_TENSOR_VER_PTR(tensor)->isDynamicDimensions,
-      QNN_TENSOR_VER_PTR(tensor)->isDynamicDimensions +
-          QNN_TENSOR_VER_PTR(tensor)->rank);
-
-  return qcir::CreateTensorDirect(
-      *builder,
-      QNN_TENSOR_VER_PTR(tensor)->name,
-      &shape,
-      &dynamic_dims,
-      ToTensorType(QNN_TENSOR_VER_PTR(tensor)->type),
-      ToDataType(QNN_TENSOR_VER_PTR(tensor)->dataType),
-      ToQuantizeParam(tensor, builder),
-      QNN_TENSOR_VER_PTR(tensor)->clientBuf.dataSize,
-      data_offset);
-}
-
-Qnn_Tensor_t ToTensor(const tensor_type& tensor, const uint8_t* data_ptr) {
-  auto is_io_tensor = [](Qnn_TensorType_t type) {
-    return type < QNN_TENSOR_TYPE_STATIC;
-  };
-
-  Qnn_Tensor_t t({.version = QNN_TENSOR_VERSION_2, .v2 = QNN_TENSOR_V2_INIT});
-  QNN_TENSOR_VER_PTR(t)->name = tensor->name()->c_str();
-  QNN_TENSOR_VER_PTR(t)->type = ToTensorType(tensor->type());
-  QNN_TENSOR_VER_PTR(t)->dataType = ToDataType(tensor->dtype());
-  QNN_TENSOR_VER_PTR(t)->quantizeParams = ToQuantizeParam(tensor);
-  QNN_TENSOR_VER_PTR(t)->rank = tensor->shape()->size();
-  QNN_TENSOR_VER_PTR(t)->dimensions =
-      const_cast<uint32_t*>(tensor->shape()->data());
-  QNN_TENSOR_VER_PTR(t)->isDynamicDimensions =
-      const_cast<uint8_t*>(tensor->dynamic_dims()->data());
-  QNN_TENSOR_VER_PTR(t)->clientBuf.dataSize = tensor->size();
-  QNN_TENSOR_VER_PTR(t)->clientBuf.data =
-      is_io_tensor(QNN_TENSOR_VER_PTR(t)->type)
-      ? nullptr
-      : static_cast<void*>(const_cast<uint8_t*>(data_ptr));
-  return t;
-}
-
-} // namespace qnn
-} // namespace backends
-} // namespace executorch
diff --git a/backends/qualcomm/aot/ir/qcir_utils.h b/backends/qualcomm/aot/ir/qcir_utils.h
deleted file mode 100755
index 085f09bf145..00000000000
--- a/backends/qualcomm/aot/ir/qcir_utils.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) Qualcomm Innovation Center, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-#pragma once
-
-#include <executorch/backends/qualcomm/aot/ir/qcir_generated.h>
-#include "QnnTypes.h"
-
-namespace executorch {
-namespace backends {
-namespace qnn {
-
-typedef flatbuffers::Vector<::flatbuffers::Offset<qcir::Tensor>>::return_type
-    tensor_type;
-typedef flatbuffers::Vector<
-    ::flatbuffers::Offset<qcir::QuantizeParam>>::return_type qparam_type;
-
-qcir::TensorType ToTensorType(Qnn_TensorType_t type);
-Qnn_TensorType_t ToTensorType(qcir::TensorType type);
-qcir::DataType ToDataType(Qnn_DataType_t type);
-Qnn_DataType_t ToDataType(qcir::DataType type);
-
-flatbuffers::Offset<qcir::QuantizeParam> ToQuantizeParam(
-    const Qnn_Tensor_t& tensor,
-    flatbuffers::FlatBufferBuilder* builder);
-Qnn_QuantizeParams_t ToQuantizeParam(const tensor_type& tensor);
-
-flatbuffers::Offset<qcir::Tensor> ToTensor(
-    const Qnn_Tensor_t& tensor,
-    const uint64_t data_offset,
-    flatbuffers::FlatBufferBuilder* builder);
-Qnn_Tensor_t ToTensor(const tensor_type& tensor, const uint8_t* data_ptr);
-
-} // namespace qnn
-} // namespace backends
-} // namespace executorch
diff --git a/backends/qualcomm/aot/ir/targets.bzl b/backends/qualcomm/aot/ir/targets.bzl
deleted file mode 100644
index 2405af35d6c..00000000000
--- a/backends/qualcomm/aot/ir/targets.bzl
+++ /dev/null
@@ -1,68 +0,0 @@
-load(
-    "@fbsource//tools/build_defs:default_platform_defs.bzl",
-    "ANDROID",
-)
-load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
-load("@fbsource//xplat/executorch/backends/qualcomm:targets.bzl", "generate_schema_header")
-load("@fbsource//xplat/executorch/backends/qualcomm/qnn_version.bzl", "get_qnn_library_version")
-
-QCIR_NAME = "qcir"
-INPUT_QCIR = QCIR_NAME + ".fbs"
-OUTPUT_QCIR_HEADER = QCIR_NAME + "_generated.h"
-QCIR_GEN_RULE_NAME = "qcir_generated"
-
-def define_common_targets():
-    """Defines targets that should be shared between fbcode and xplat.
-    The directory containing this targets.bzl file should also contain both
-    TARGETS and BUCK files that call this function.
-    """
-
-    generate_schema_header(
-        QCIR_GEN_RULE_NAME,
-        [INPUT_QCIR],
-        [OUTPUT_QCIR_HEADER],
-        OUTPUT_QCIR_HEADER,
-    )
-
-    # Header-only library target with the generate executorch program schema header.
-    runtime.cxx_library(
-        name = "qcir_schema",
-        srcs = [],
-        exported_headers = {
-            OUTPUT_QCIR_HEADER: ":{}[{}]".format(QCIR_GEN_RULE_NAME, OUTPUT_QCIR_HEADER),
-        },
-        visibility = [
-            # Lock this down as tightly as possible to ensure that flatbuffers
-            # are an implementation detail. Ideally this list would only include
-            # //executorch/runtime/executor/...
-            "//executorch/backends/qualcomm/...",
-            "//executorch/backends/qualcomm/aot/ir/...",
-        ],
-        exported_external_deps = ["flatbuffers-api"],
-        define_static_target = True,
-        platforms = [ANDROID],
-    )
-
-
-    runtime.cxx_library(
-        name = "qcir_utils",
-        srcs = [
-            "qcir_utils.cpp",
-        ],
-        exported_headers = [
-            "qcir_utils.h",
-        ],
-        define_static_target = True,
-        platforms = [ANDROID],
-        visibility = ["@EXECUTORCH_CLIENTS"],
-        deps = [
-            "fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_version()),
-            "fbsource//third-party/qualcomm/qnn/qnn-{0}:app_sources".format(get_qnn_library_version()),
-            "//executorch/runtime/backend:interface",
-            "//executorch/runtime/core:core",
-            "//executorch/backends/qualcomm/aot/wrappers:wrappers",
-        ],
-        exported_deps = [
-            ":qcir_schema",
-        ],
-    )
diff --git a/backends/qualcomm/aot/python/PyQnnManagerAdaptor.h b/backends/qualcomm/aot/python/PyQnnManagerAdaptor.h
index 409ec1a4294..c8044e5db0e 100644
--- a/backends/qualcomm/aot/python/PyQnnManagerAdaptor.h
+++ b/backends/qualcomm/aot/python/PyQnnManagerAdaptor.h
@@ -132,16 +132,6 @@ class PyQnnManager {
     return qnn_manager_->GetSpillFillBufferSize();
   }
 
-  QnnExecuTorchContextBinary MakeQcirCustomBinaryInfo(
-      const QnnExecuTorchContextBinary& ctx_bin,
-      const std::vector<uint8_t>& tensor_data) {
-    custom_qcir_protocol_buffer_ =
-        QnnQcirCustomProtocol(ctx_bin.nbytes, tensor_data.size());
-    custom_qcir_protocol_buffer_.BuildQcirCustomBuffer(ctx_bin, tensor_data);
-    auto [ptr, size] = custom_qcir_protocol_buffer_.GetCustomProtocolBuffer();
-    return {ptr, size};
-  }
-
   py::array_t<char> MakeBinaryInfo(const py::bytes& ctx_bin) {
     py::buffer_info info(py::buffer(ctx_bin).request());
     QnnExecuTorchContextBinary binary(
@@ -171,22 +161,10 @@ class PyQnnManager {
       buf_size = ctx_size;
       buf_ptr = ctx_bin;
     } else {
-      // check if it's a qcir flatbuffers, return fbs if matched
-      auto
-          [status,
-           qcir_fbs_size,
-           qcir_tensor_size,
-           qcir_fbs_ptr,
-           qcir_tensor_ptr] =
-              QnnQcirCustomProtocol().DeserializeQcirCustomBuffer(info.ptr);
-      if (status == Error::Ok) {
-        buf_size = qcir_fbs_size;
-        buf_ptr = qcir_fbs_ptr;
-      } else {
-        // the format should be DLC, return nothing here
-        return py::array_t<char>(0);
-      }
+      // the format should be DLC, return nothing here
+      return py::array_t<char>(0);
     }
+
     auto result = py::array_t<char>(buf_size);
     auto result_buffer = result.request();
     std::memcpy(result_buffer.ptr, buf_ptr, buf_size);
@@ -199,7 +177,6 @@ class PyQnnManager {
   const py::bytes qnn_executorch_option_ptr_;
   QnnExecuTorchContextBinary qnn_executorch_context_binary_;
   std::shared_ptr<QnnManager> qnn_manager_;
-  QnnQcirCustomProtocol custom_qcir_protocol_buffer_;
   QnnContextCustomProtocol custom_context_custom_buffer_;
   flatbuffers::FlatBufferBuilder builder_;
 };
diff --git a/backends/qualcomm/aot/python/targets.bzl b/backends/qualcomm/aot/python/targets.bzl
index da27997808b..74fbd1da511 100644
--- a/backends/qualcomm/aot/python/targets.bzl
+++ b/backends/qualcomm/aot/python/targets.bzl
@@ -31,7 +31,6 @@ def define_common_targets():
             "//executorch/backends/qualcomm/aot/wrappers:wrappers",
             "//executorch/backends/qualcomm/runtime:logging",
             "//executorch/backends/qualcomm:schema",
-            "//executorch/backends/qualcomm/aot/ir:qcir_utils",
             "//executorch/backends/qualcomm/runtime:runtime",
             "fbsource//third-party/pybind11:pybind11",
             "fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_version()),
@@ -65,7 +64,6 @@ def define_common_targets():
             "//executorch/backends/qualcomm/aot/wrappers:wrappers",
             "//executorch/backends/qualcomm/runtime:logging",
             "//executorch/backends/qualcomm:schema",
-            "//executorch/backends/qualcomm/aot/ir:qcir_utils",
             "//executorch/backends/qualcomm/runtime:runtime",
             "fbsource//third-party/pybind11:pybind11",
             "fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_version()),
@@ -94,7 +92,6 @@ def define_common_targets():
             "//executorch/backends/qualcomm/aot/wrappers:wrappers",
             "//executorch/backends/qualcomm/runtime:logging",
             "//executorch/backends/qualcomm:schema",
-            "//executorch/backends/qualcomm/aot/ir:qcir_utils",
             "//executorch/backends/qualcomm/runtime:runtime",
             "fbsource//third-party/pybind11:pybind11",
             "fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_version()),
diff --git a/backends/qualcomm/runtime/QnnExecuTorch.h b/backends/qualcomm/runtime/QnnExecuTorch.h
index 2ca0cd61cd5..83776937411 100644
--- a/backends/qualcomm/runtime/QnnExecuTorch.h
+++ b/backends/qualcomm/runtime/QnnExecuTorch.h
@@ -22,8 +22,7 @@ extern "C" {
 
 // This could be:
 // 1. qnn_context_binary
-// 2. QnnQcirCustomProtocol
-// 3. QnnContextCustomProtocol
+// 2. QnnContextCustomProtocol
 // To check if it is custom protocol, users can deserialize the binary using
 // QnnCustomProtocol and check the status
 typedef struct {
diff --git a/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp b/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp
index 01bf13603d6..751e5d18119 100644
--- a/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp
+++ b/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp
@@ -48,8 +48,7 @@ Result<DelegateHandle*> QnnExecuTorchBackend::init(
     qnn_context_blob.buffer = ctx_bin;
   } else {
     // This buffer will be verified again in QnnBackendCache.
-    QNN_EXECUTORCH_LOG_INFO(
-        "Deserializing processed data using QnnQcirCustomProtocol");
+    QNN_EXECUTORCH_LOG_INFO("Deserializing processed data using Dlc");
     qnn_context_blob.buffer = const_cast<void*>(processed->data());
     qnn_context_blob.nbytes = processed->size();
   }
diff --git a/backends/qualcomm/runtime/backends/QnnBackendCache.cpp b/backends/qualcomm/runtime/backends/QnnBackendCache.cpp
index 4387d61ab7c..3dd1738d33b 100644
--- a/backends/qualcomm/runtime/backends/QnnBackendCache.cpp
+++ b/backends/qualcomm/runtime/backends/QnnBackendCache.cpp
@@ -113,7 +113,6 @@ Error QnnBackendCache::Configure(const std::vector<std::string>& graph_names) {
   // DO DESERIALIZE
   state_ = DESERIALIZE;
   QNN_EXECUTORCH_LOG_INFO("Caching: Caching is in RESTORE MODE.");
-
   auto [status, _, context_size, context_ptr] =
       QnnContextCustomProtocol().DeserializeContextCustomBuffer(
           qnn_context_blob_.buffer);
diff --git a/backends/qualcomm/runtime/backends/QnnCustomProtocol.cpp b/backends/qualcomm/runtime/backends/QnnCustomProtocol.cpp
index 12de1b3e705..b01d7ab6d80 100644
--- a/backends/qualcomm/runtime/backends/QnnCustomProtocol.cpp
+++ b/backends/qualcomm/runtime/backends/QnnCustomProtocol.cpp
@@ -12,87 +12,6 @@ namespace executorch {
 namespace backends {
 namespace qnn {
 
-// we still need this for on-device op validation of other backends
-void QnnQcirCustomProtocol::BuildQcirCustomBuffer(
-    const QnnExecuTorchContextBinary& qcir_binary,
-    const std::vector<uint8_t>& tensor_data) {
-  if (qnn_custom_buffer_.size() == 0) {
-    uint8_t magic_number_proto_size = sizeof(magic_number_);
-    uint8_t qcir_fbs_proto_size = sizeof(qcir_fbs_size_);
-    uint8_t tensor_proto_size = sizeof(tensor_size_);
-
-    uint64_t buffer_size = magic_number_proto_size + qcir_fbs_proto_size +
-        tensor_proto_size + qcir_fbs_size_ + tensor_size_;
-    qnn_custom_buffer_.resize(buffer_size, 0);
-
-    size_t pos = 0;
-    // magic number itself
-    std::memcpy(
-        qnn_custom_buffer_.data(), &magic_number_, magic_number_proto_size);
-    pos += magic_number_proto_size;
-
-    // size of qcir_fbs, should be 4 bytes
-    std::memcpy(
-        qnn_custom_buffer_.data() + pos, &qcir_fbs_size_, qcir_fbs_proto_size);
-    pos += qcir_fbs_proto_size;
-
-    // size of tensor, should be 8 bytes
-    std::memcpy(
-        qnn_custom_buffer_.data() + pos, &tensor_size_, tensor_proto_size);
-    pos += tensor_proto_size;
-
-    // qcir.fbs buffer
-    uint8_t* qcir_ptr = static_cast<uint8_t*>(qcir_binary.buffer);
-
-    std::memcpy(qnn_custom_buffer_.data() + pos, qcir_ptr, qcir_fbs_size_);
-    pos += qcir_fbs_size_;
-
-    // tensor data
-    std::memcpy(
-        qnn_custom_buffer_.data() + pos, tensor_data.data(), tensor_size_);
-  }
-}
-
-std::tuple<Error, uint32_t, uint64_t, void*, void*>
-QnnQcirCustomProtocol::DeserializeQcirCustomBuffer(void* processed_data) {
-  Error status = Error::Ok;
-  uint8_t* ptr = static_cast<uint8_t*>(processed_data);
-  size_t magic_number_proto_size = sizeof(magic_number_);
-  uint8_t qcir_fbs_proto_size = sizeof(qcir_fbs_size_);
-  uint8_t tensor_proto_size = sizeof(tensor_size_);
-
-  uint32_t magic_number;
-  std::memcpy(&magic_number, ptr, magic_number_proto_size);
-  ptr += magic_number_proto_size;
-
-  if (magic_number != magic_number_) {
-    QNN_EXECUTORCH_LOG_INFO(
-        "QnnQcirCustomProtocol expected magic number: 0x%x but get: 0x%x",
-        magic_number_,
-        magic_number);
-    status = Error::Internal;
-  }
-
-  // Retrieve size of qcir.fbs
-  uint32_t qcir_fbs_size;
-  std::memcpy(&qcir_fbs_size, ptr, qcir_fbs_proto_size);
-  ptr += qcir_fbs_proto_size;
-
-  // Retrieve size of tensor
-  uint64_t tensor_size;
-  std::memcpy(&tensor_size, ptr, tensor_proto_size);
-  ptr += tensor_proto_size;
-
-  // Retrieve qcir.fbs pointer
-  void* qcir_fbs_ptr = static_cast<void*>(ptr);
-  ptr += qcir_fbs_size;
-
-  // Retrieve tensor
-  void* tensor_ptr = static_cast<void*>(ptr);
-
-  return {status, qcir_fbs_size, tensor_size, qcir_fbs_ptr, tensor_ptr};
-}
-
 void QnnContextCustomProtocol::BuildContextCustomBuffer() {
   if (qnn_custom_buffer_.size() == 0) {
     signature_ =
diff --git a/backends/qualcomm/runtime/backends/QnnCustomProtocol.h b/backends/qualcomm/runtime/backends/QnnCustomProtocol.h
index 6ea556899f5..3cc6a6e25dc 100644
--- a/backends/qualcomm/runtime/backends/QnnCustomProtocol.h
+++ b/backends/qualcomm/runtime/backends/QnnCustomProtocol.h
@@ -24,13 +24,8 @@ namespace qnn {
 
 using executorch::runtime::Error;
 
-// We have 2 kinds of protocol here: custom_qcir_protocol,
-// custom_context_protocol. We need this class due to limitation of 32bits
-// flatbuffer. Since larger models can exceed the maximum size for 32bits
-// flatbuffer, we need to define our own protocol and store some information
-// outside of the flatbuffer. The magic number helps determine if we are getting
-// the correct custom protocol buffer and differentiate custom_qcir_protocol
-// from custom_context_protocol.
+// Required for multi-graph support to retrieve qnn manager handle via unique
+// signature.
 class QnnCustomProtocol {
  public:
   QnnCustomProtocol() {}
@@ -47,48 +42,6 @@ class QnnCustomProtocol {
   std::vector<uint8_t> qnn_custom_buffer_;
 };
 
-// For custom_qcir_protocol, we expect the following format:
-//
-// ------------------------------
-// | qcir magic number (4 bytes)|
-// ------------------------------
-// | qcir.fbs size (4 bytes)    |
-// ------------------------------
-// | tensor size (8 bytes)      |
-// ------------------------------
-// | qcir.fbs (flatbuffer)      |
-// ------------------------------
-// | tensor.data                |
-// ------------------------------
-class QnnQcirCustomProtocol : public QnnCustomProtocol {
- public:
-  // Constructor for Serialize
-  QnnQcirCustomProtocol(uint32_t qcir_fbs_size, uint64_t tensor_size)
-      : QnnCustomProtocol(),
-        qcir_fbs_size_(qcir_fbs_size),
-        tensor_size_(tensor_size) {}
-
-  // Constructor for Deserialize
-  QnnQcirCustomProtocol() : QnnCustomProtocol() {}
-
-  void BuildQcirCustomBuffer(
-      const QnnExecuTorchContextBinary& qcir_binary,
-      const std::vector<uint8_t>& tensor_data);
-  // Return a tuple with 5 elements:
-  // 1) Error: Status of whether deserializing is successful.
-  // 2) uint32_t: Size of qcir fbs
-  // 3) uint64_t: Size of tensor
-  // 4) void*: Pointer pointing to the start of qcir fbs
-  // 5) void*: Pointer pointing to the start of tensor
-  std::tuple<Error, uint32_t, uint64_t, void*, void*>
-  DeserializeQcirCustomBuffer(void* processed_data);
-
- private:
-  static constexpr uint32_t magic_number_ = 0x1234ABCD;
-  uint32_t qcir_fbs_size_{0};
-  uint64_t tensor_size_{0};
-};
-
 // For custom context binary protocol, we expect the following format:
 //
 // ---------------------------------
diff --git a/backends/qualcomm/runtime/targets.bzl b/backends/qualcomm/runtime/targets.bzl
index 1bd82f8f913..a74b16ad4a2 100644
--- a/backends/qualcomm/runtime/targets.bzl
+++ b/backends/qualcomm/runtime/targets.bzl
@@ -73,7 +73,6 @@ def define_common_targets():
                 "fbsource//third-party/qualcomm/qnn/qnn-{0}:app_sources".format(get_qnn_library_version()),
                 ":logging",
                 "//executorch/backends/qualcomm:schema",
-                "//executorch/backends/qualcomm/aot/ir:qcir_utils",
                 "//executorch/backends/qualcomm/aot/wrappers:wrappers",
                 "//executorch/runtime/backend:interface",
                 "//executorch/runtime/core:core",
diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py
index fd2d10e2b93..051e2c78184 100644
--- a/backends/qualcomm/tests/utils.py
+++ b/backends/qualcomm/tests/utils.py
@@ -144,30 +144,6 @@ def validate_context_binary(ctx_bin: bytes):
         assert os.path.isfile(f"{tmp_dir}/ctx.json"), print(result.stderr)
 
 
-def validate_qcir(qcir: bytes):
-    with tempfile.TemporaryDirectory() as tmp_dir:
-        with open(f"{tmp_dir}/qcir.bin", "wb") as binary_file:
-            binary_file.write(qcir)
-
-        cmds = [
-            "flatc",
-            "-o",
-            tmp_dir,
-            "--raw-binary",
-            "-t",
-            f"{os.path.dirname(__file__)}/../aot/ir/qcir.fbs",
-            "--",
-            f"{tmp_dir}/qcir.bin",
-        ]
-        result = subprocess.run(
-            " ".join(cmds),
-            shell=True,
-            executable="/bin/bash",
-            capture_output=True,
-        )
-        assert os.path.isfile(f"{tmp_dir}/qcir.json"), print(result.stderr)
-
-
 class TestQNN(unittest.TestCase):
     rtol: float = 0
     atol: float = 0