intel
diff --git a/‎csrc/gpu/runtime/CachingDeviceAllocator.cpp‎
Lines changed: 13 additions & 120 deletions b/‎csrc/gpu/runtime/CachingDeviceAllocator.cpp‎
Lines changed: 13 additions & 120 deletions
diff --git a/‎csrc/gpu/runtime/CachingDeviceAllocator.h‎
Lines changed: 0 additions & 80 deletions b/‎csrc/gpu/runtime/CachingDeviceAllocator.h‎
Lines changed: 0 additions & 80 deletions
@@ -96,8 +96,7 @@ CachingDeviceAllocator::Block::Block(
       m_allocated(0),
       m_prev(nullptr),
       m_next(nullptr),
-      m_event_cnt(0),
-      m_owner_private_pool(nullptr) {
+      m_event_cnt(0) {
   auto device_cnt = at::xpu::device_count();
   std::vector<DeviceStats> dev_stats;
 }
@@ -117,8 +116,7 @@ CachingDeviceAllocator::Block::Block(
       m_allocated(false),
       m_prev(nullptr),
       m_next(nullptr),
-      m_event_cnt(0),
-      m_owner_private_pool(nullptr) {}
+      m_event_cnt(0) {}
 
 bool CachingDeviceAllocator::Block::is_split() const {
   return (m_prev != nullptr) || (m_next != nullptr);
@@ -202,39 +200,13 @@ void CachingDeviceAllocator::malloc(
   }
 
   BlockPool* pool = nullptr;
-  PrivatePool* private_pool = nullptr;
   PoolType pool_type = PoolType::UNDEF;
-  Block* block = nullptr;
-
-  if (recordings_underway.size()) {
-    // graph path, try to find the blocks pointer which related to the
-    // PrivatePool who is recording graph on current queue.
-    for (auto& entry : recordings_underway) {
-      if (entry.second(queue)) {
-        auto it1 = graph_pools.find(entry.first);
-        TORCH_INTERNAL_ASSERT(it1 != graph_pools.end());
-        if (size <= kSmallSize) {
-          pool_type = PoolType::SMALL_POOL;
-          pool = &it1->second->small_blocks;
-        } else {
-          pool_type = PoolType::LARGE_POOL;
-          pool = &it1->second->large_blocks;
-        }
-        private_pool = it1->second.get();
-      }
-    }
-  }
-  // fallback check. It's not suitable to change it to 'else' statement.
-  if (pool == nullptr) {
-    // normal path, search and return aiming block for allocation in
-    // DeviceCachingAllocator's own pool.
-    if (size <= kSmallSize) {
-      pool_type = PoolType::SMALL_POOL;
-      pool = &small_blocks;
-    } else {
-      pool_type = PoolType::LARGE_POOL;
-      pool = &large_blocks;
-    }
+  if (size <= kSmallSize) {
+    pool_type = PoolType::SMALL_POOL;
+    pool = &small_blocks;
+  } else {
+    pool_type = PoolType::LARGE_POOL;
+    pool = &large_blocks;
   }
 
   Block search_key(curDevID, *queue, size);
@@ -253,7 +225,7 @@ void CachingDeviceAllocator::malloc(
   stat_types[static_cast<size_t>(StatType::AGGREGATE)] = true;
   stat_types[static_cast<size_t>(get_stat_type_for_pool(pool_type))] = true;
   DeviceStats& stats = get_stats_for_device(curDevID);
-  block = find_free_block();
+  Block* block = find_free_block();
 
   if (block == nullptr) {
     void* buffer;
@@ -297,9 +269,6 @@ void CachingDeviceAllocator::malloc(
   Block* remaining = nullptr;
   AT_ASSERT(block);
 
-  // need to record the block's owner pool for lazy releasing
-  block->m_owner_private_pool = private_pool;
-
   const bool already_split = block->is_split();
   if (block->should_split(size)) {
     remaining = block;
@@ -448,18 +417,10 @@ void CachingDeviceAllocator::free_block(Block* block) {
   size_t original_block_size = block->m_size;
 
   BlockPool* pool = nullptr;
-  if (block->m_owner_private_pool == nullptr) {
-    if (block->m_pool_type == PoolType::LARGE_POOL) {
-      pool = &large_blocks;
-    } else if (block->m_pool_type == PoolType::SMALL_POOL) {
-      pool = &small_blocks;
-    }
-  } else {
-    if (block->m_pool_type == PoolType::LARGE_POOL) {
-      pool = &block->m_owner_private_pool->large_blocks;
-    } else if (block->m_pool_type == PoolType::SMALL_POOL) {
-      pool = &block->m_owner_private_pool->small_blocks;
-    }
+  if (block->m_pool_type == PoolType::LARGE_POOL) {
+    pool = &large_blocks;
+  } else if (block->m_pool_type == PoolType::SMALL_POOL) {
+    pool = &small_blocks;
   }
 
   int64_t net_change_inactive_split_blocks = 0;
@@ -673,17 +634,6 @@ void CachingDeviceAllocator::free_cached_blocks(DeviceId di) {
   free_blocks(large_blocks, begin, end);
   find_cached_blocks_bound(di, small_blocks, begin, end);
   free_blocks(small_blocks, begin, end);
-
-  // Release graph private pools
-  for (auto it = graph_pools_freeable.begin();
-       it != graph_pools_freeable.end();) {
-    TORCH_INTERNAL_ASSERT(it->second->use_count == 0);
-    free_blocks(it->second->small_blocks, begin, end);
-    free_blocks(it->second->large_blocks, begin, end);
-    auto erase_count = graph_pools.erase(it->first);
-    TORCH_INTERNAL_ASSERT(erase_count == 1);
-    it = graph_pools_freeable.erase(it);
-  }
 }
 
 void CachingDeviceAllocator::synchronize_and_free_events(
@@ -833,62 +783,5 @@ void CachingDeviceAllocator::dumpMemoryStatus(DeviceId deviceIndex) {
                       .current));
 }
 
-// Called by XPUGraph::begin_recording
-void CachingDeviceAllocator::beginAllocateToPool(
-    DeviceId deviceIndex,
-    MempoolId_t mempoolId,
-    std::function<bool(sycl::queue*)> filter) {
-  std::lock_guard<std::recursive_mutex> lock(mutex);
-  auto search_key = std::make_pair(deviceIndex, mempoolId);
-  auto it = graph_pools.find(search_key);
-  if (it == graph_pools.end()) {
-    graph_pools.emplace(search_key, std::make_unique<PrivatePool>());
-  } else {
-    TORCH_INTERNAL_ASSERT(it->second->use_count > 0);
-    it->second->use_count += 1;
-  }
-  for (auto it2 = recordings_underway.begin(); it2 != recordings_underway.end();
-       ++it2) {
-    TORCH_CHECK(
-        it2->first != search_key,
-        "beginAllocateToPool: already recording to mempool_id");
-  }
-  recordings_underway.emplace_back(search_key, std::move(filter));
-}
-
-// Called by XPUGraph::end_recording
-void CachingDeviceAllocator::endAllocateToPool(
-    DeviceId deviceIndex,
-    MempoolId_t mempoolId) {
-  std::lock_guard<std::recursive_mutex> lock(mutex);
-  auto search_key = std::make_pair(deviceIndex, mempoolId);
-  for (auto it = recordings_underway.begin(); it != recordings_underway.end();
-       ++it) {
-    if (it->first == search_key) {
-      recordings_underway.erase(it);
-      return;
-    }
-  }
-  TORCH_CHECK(
-      false, "endAllocateToPool: not currently recording to mempool_id");
-}
-
-// Called by XPUGraph::reset
-void CachingDeviceAllocator::releasePool(
-    DeviceId deviceIndex,
-    MempoolId_t mempoolId) {
-  std::lock_guard<std::recursive_mutex> lock(mutex);
-  auto search_key = std::make_pair(deviceIndex, mempoolId);
-  auto it = graph_pools.find(search_key);
-  TORCH_INTERNAL_ASSERT(it != graph_pools.end());
-  auto uc = --(it->second->use_count);
-  TORCH_INTERNAL_ASSERT(uc >= 0);
-  if (uc == 0) {
-    bool inserted =
-        graph_pools_freeable.insert({search_key, it->second.get()}).second;
-    TORCH_INTERNAL_ASSERT(inserted);
-  }
-}
-
 } // namespace dpcpp
 } // namespace torch_ipex::xpu
@@ -5,9 +5,6 @@
 
 #include <core/AllocationInfo.h>
 #include <runtime/Device.h>
-#include <runtime/XPUGraph.h>
-
-#include <c10/util/flat_hash_map.h>
 
 #include <algorithm>
 #include <bitset>
@@ -33,8 +30,6 @@ class CachingDeviceAllocator final {
     SMALL_POOL = 2,
   };
 
-  struct PrivatePool;
-
   struct Block {
     Block(DeviceId device, sycl::queue queue, size_t size);
 
@@ -76,8 +71,6 @@ class CachingDeviceAllocator final {
     Block* m_prev;
     Block* m_next;
     int m_event_cnt;
-    // Store pointer to private pool for tracing back
-    PrivatePool* m_owner_private_pool;
   };
 
   using BlockPool = std::set<Block*, decltype(Block::Comparator)*>;
@@ -91,70 +84,6 @@ class CachingDeviceAllocator final {
   std::unordered_map<void*, Block*> allocated_blocks;
   std::deque<std::pair<sycl::event, Block*>> dpcpp_events;
 
-  // Members specific to XPU graphs
-
-  // A recorded graph should retain its memory pool in order all graphs to
-  // replay many times on the same active zone, which should not be freed
-  // or replaced or modified by other tensors.
-  //
-  // To identify the pools used only for graphs and should be kept unless
-  // no graphs related remained, the graph mechanism has MempoolId_t to
-  // mark each pool either created by user or by other graphs. But our
-  // allocator is global for all devices but ids are not unique across
-  // devices, so there is a need to combine DeviceId together in a key.
-  struct PrivatePool {
-    PrivatePool()
-        : use_count(1),
-          large_blocks(Block::Comparator),
-          small_blocks(Block::Comparator) {}
-    PrivatePool(const PrivatePool&) = delete;
-    PrivatePool(PrivatePool&&) = delete;
-    PrivatePool& operator=(const PrivatePool&) = delete;
-    // Number of live graphs using this pool. When use_count
-    // equals to 0, this pool can be destroyed safely.
-    // Because SYCL doesn't has the ability to unmap blocks instead of
-    // freeing them immediately, there is no need to count remained
-    // blocks here as all of them should be considered to be freed once
-    // no graph would use this pool anymore.
-    int use_count;
-    // Totally a mirror copy of a normal block pool, and will always be
-    // initialized as empty set when newly create a PrivatePool instance.
-    BlockPool large_blocks;
-    BlockPool small_blocks;
-  };
-
-  struct MempoolHash {
-    std::size_t operator()(const std::pair<DeviceId, MempoolId_t>& p) const {
-      auto h1 = std::hash<DeviceId>{}(p.first);
-      auto h2 = std::hash<CaptureId_t>{}(p.second.first);
-      auto h3 = std::hash<CaptureId_t>{}(p.second.second);
-      return h1 ^ (h2 << 1) ^ (h3 << 2);
-    }
-  };
-
-  // Private pools for XPU graphs
-  // As DeviceCachingAllocator in IPEX is designed as an singleton running on
-  // multi-devices, which is different to the allocator upstream to PyTorch,
-  // there is a must to add `DeviceId` into the maping keys list.
-  ska::flat_hash_map<
-      std::pair<DeviceId, MempoolId_t>,
-      std::unique_ptr<PrivatePool>,
-      MempoolHash>
-      graph_pools;
-  // Pools no longer referenced by any graph. Their BlockPools are eligible for
-  // free_blocks. The reason to use map here is the need to erase PrivatePools
-  // in graph_pools at the same time with same search keys.
-  ska::
-      flat_hash_map<std::pair<DeviceId, MempoolId_t>, PrivatePool*, MempoolHash>
-          graph_pools_freeable;
-  // Store pools underway in recording.
-  std::vector<std::pair<
-      std::pair<DeviceId, MempoolId_t>,
-      std::function<bool(sycl::queue*)>>>
-      recordings_underway;
-
-  MempoolId_t get_mempool_id(DeviceId device);
-
   DeviceStats& get_stats_for_device(DeviceId device);
 
   void update_stat_array(
@@ -229,15 +158,6 @@ class CachingDeviceAllocator final {
   std::vector<SegmentInfo> snapshot() const;
 
   void dumpMemoryStatus(DeviceId deviceIndex);
-
-  void beginAllocateToPool(
-      DeviceId deviceIndex,
-      MempoolId_t mempoolId,
-      std::function<bool(sycl::queue*)> filter);
-
-  void endAllocateToPool(DeviceId deviceIndex, MempoolId_t mempoolId);
-
-  void releasePool(DeviceId deviceIndex, MempoolId_t mempoolId);
 };
 
 } // namespace dpcpp