-
Notifications
You must be signed in to change notification settings - Fork 31
refactor(dipu): add static_value_array and simplify code #920
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,29 @@ | ||
| #pragma once | ||
|
|
||
| #include <array> | ||
| #include <utility> | ||
|
|
||
| namespace dipu { | ||
|
|
||
| // Create an array of functor. Each functor should provide a 'value()' function | ||
| // to return a reference of a static variable. Thus the value could be lazily | ||
| // initialized. | ||
| template <typename T, std::size_t N> | ||
| struct make_static_function_array { | ||
| template <typename U = std::make_index_sequence<N>> | ||
| struct slot {}; | ||
| template <std::size_t... I> | ||
| struct slot<std::index_sequence<I...>> { | ||
| // deduction friendly | ||
| using type = std::decay_t<decltype(T::template value<0>)>; | ||
| // the actual array | ||
| auto inline static constexpr value = | ||
| std::array<type, sizeof...(I)>{T::template value<I>...}; | ||
| }; | ||
| }; | ||
|
|
||
| template <typename T, std::size_t N> | ||
| using static_function_array = | ||
| typename make_static_function_array<T, N>::template slot<>; | ||
|
|
||
| } // namespace dipu | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,102 +1,81 @@ | ||
| #include "DIPUEventPool.h" | ||
|
|
||
| #include <array> | ||
| #include <deque> | ||
| #include <functional> | ||
| #include <iostream> | ||
| #include <mutex> | ||
|
|
||
| namespace dipu { | ||
| #include "csrc_dipu/base/utility.h" | ||
| #include "csrc_dipu/runtime/device/deviceapis.h" | ||
|
|
||
| template <typename T> | ||
| class EventPool final { | ||
| protected: | ||
| std::deque<T> event_pool_; | ||
| unsigned int allocate_num_ = 0; | ||
| namespace { | ||
|
|
||
| std::function<void(T&)> allocator_; | ||
| std::function<void(T&)> deleter_; | ||
| using mutex_t = std::recursive_mutex; | ||
| mutex_t event_mutex_; | ||
| class EventPool { | ||
| std::deque<dipu::deviceEvent_t> pool; | ||
| std::recursive_mutex mutex; | ||
|
|
||
| public: | ||
| EventPool(const std::function<void(T&)>& allocator, | ||
| const std::function<void(T&)>& deleter) | ||
| : allocator_(allocator), deleter_(deleter) {} | ||
|
|
||
| EventPool(const EventPool&) = delete; | ||
| EventPool(EventPool&&) = delete; | ||
| EventPool& operator=(const EventPool&) = delete; | ||
| EventPool& operator=(EventPool&&) = delete; | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 有锁作为成员,EventPool 自动变得无法拷贝和移动,所以我都删掉了 |
||
|
|
||
| ~EventPool() = default; | ||
|
|
||
| void release() { | ||
| std::lock_guard<mutex_t> _(event_mutex_); | ||
| for (auto& event : event_pool_) { | ||
| deleter_(event); | ||
| allocate_num_--; | ||
| } | ||
| event_pool_.clear(); | ||
| } | ||
|
|
||
| void get(T& event) { | ||
| bool need_allocator = false; | ||
| void acquire(dipu::deviceEvent_t& event) { | ||
| { | ||
| std::lock_guard<mutex_t> _(event_mutex_); | ||
| if (event_pool_.empty()) { | ||
| need_allocator = true; | ||
| } else { | ||
| event = event_pool_.back(); | ||
| event_pool_.pop_back(); | ||
| std::scoped_lock _(mutex); | ||
| if (!pool.empty()) { | ||
| event = pool.back(); | ||
| pool.pop_back(); | ||
| return; | ||
| } | ||
| } | ||
| if (need_allocator) { | ||
| allocator_(event); | ||
| } | ||
|
|
||
| dipu::devapis::createEvent(&event); | ||
| } | ||
|
|
||
| void release(dipu::deviceEvent_t& event) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 新的名字确实比原来的规范 |
||
| std::scoped_lock _(mutex); | ||
| pool.emplace_back(event); | ||
| } | ||
|
|
||
| void restore(T& event) { | ||
| std::lock_guard<mutex_t> _(event_mutex_); | ||
| event_pool_.emplace_back(event); | ||
| void clear() { // should it called inside destructor? | ||
| std::scoped_lock _(mutex); | ||
| for (auto& event : pool) { | ||
| dipu::devapis::destroyEvent(event); | ||
| } | ||
| pool.clear(); | ||
| } | ||
| }; | ||
|
|
||
| EventPool<deviceEvent_t>* getEventPool() { | ||
| const int index = devproxy::current_device(); | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 我是觉得 event pool 不应该调用 devproxy 命名空间的方法,所以把它移到外面了 |
||
| // GlobalEventPool for different cards , construct when really needed | ||
| #define dispatch_event_pool(device_id) \ | ||
| if (index == (device_id)) { \ | ||
| static EventPool<deviceEvent_t> gDIPUEventPool( \ | ||
| [](deviceEvent_t& event) { devapis::createEvent(&event); }, \ | ||
| [](deviceEvent_t& event) { devapis::destroyEvent(event); }); \ | ||
| return &gDIPUEventPool; \ | ||
| struct EventPoolHolder { | ||
| template <std::size_t I> | ||
| inline static auto& value() { | ||
| auto static instance = EventPool(); | ||
| return instance; | ||
| } | ||
| }; | ||
|
|
||
| auto constexpr max_card_number = 16; | ||
wiryls marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| using EventPoolHolderArray = | ||
| dipu::static_function_array<EventPoolHolder, max_card_number>; | ||
|
|
||
| dispatch_event_pool(0); | ||
| dispatch_event_pool(1); | ||
| dispatch_event_pool(2); | ||
| dispatch_event_pool(3); | ||
| dispatch_event_pool(4); | ||
| dispatch_event_pool(5); | ||
| dispatch_event_pool(6); | ||
| dispatch_event_pool(7); | ||
| dispatch_event_pool(8); | ||
| dispatch_event_pool(9); | ||
| dispatch_event_pool(10); | ||
| dispatch_event_pool(11); | ||
| dispatch_event_pool(12); | ||
| dispatch_event_pool(13); | ||
| dispatch_event_pool(14); | ||
| dispatch_event_pool(15); | ||
| TORCH_CHECK(false, "support up to 16 cards"); | ||
| auto event_pool(int index) -> EventPool& { | ||
| TORCH_CHECK(0 <= index and index < max_card_number, "support up to 16 cards"); | ||
| return EventPoolHolderArray::value[index](); | ||
| } | ||
|
|
||
| void getEventFromPool(deviceEvent_t& event) { getEventPool()->get(event); } | ||
| } // namespace | ||
|
|
||
| namespace dipu { | ||
|
|
||
| void event_pool_acquire(int index, deviceEvent_t& event) { | ||
| event_pool(index).acquire(event); | ||
| } | ||
|
|
||
| void restoreEventToPool(deviceEvent_t& event) { | ||
| getEventPool()->restore(event); | ||
| void event_pool_release(int index, deviceEvent_t& event) { | ||
| event_pool(index).release(event); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 参数加了 index, 上下层的职责划分也比原来明确些。 |
||
| } | ||
|
|
||
| void releaseAllEvent() { getEventPool()->release(); } | ||
| void event_pool_clear() { | ||
| for (auto& pool : EventPoolHolderArray::value) { | ||
|
||
| pool().clear(); | ||
| } | ||
| } | ||
|
|
||
| } // namespace dipu | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,10 +6,8 @@ | |
|
|
||
| namespace dipu { | ||
|
|
||
| void getEventFromPool(deviceEvent_t& event); | ||
|
|
||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 其实原来的驼峰也没哈问题,真没必要都改成下划线,纯个人习惯问题。。。
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 确实,这里是习惯了 |
||
| void restoreEventToPool(deviceEvent_t& event); | ||
|
|
||
| void releaseAllEvent(); | ||
| void event_pool_acquire(int index, deviceEvent_t& event); | ||
| void event_pool_release(int index, deviceEvent_t& event); | ||
| void event_pool_clear(); | ||
|
|
||
| } // namespace dipu | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,6 +5,7 @@ | |
| #include <c10/core/Device.h> | ||
| #include <c10/util/flat_hash_map.h> | ||
|
|
||
| #include "csrc_dipu/base/utility.h" | ||
| #include "csrc_dipu/runtime/core/DIPUEvent.h" | ||
|
|
||
| #include "DIPUAsyncResourcePool.h" | ||
|
|
@@ -213,48 +214,32 @@ struct RawAllocator<at::DeviceType::CPU> { | |
| using type = DIPURawHostAllocator; | ||
| }; | ||
|
|
||
| template <typename AllocatorImpl, class AsyncMemPoolImpl, int device_id> | ||
| c10::Allocator* get_allocator_impl(c10::Allocator* raw_allocator) { | ||
| // Construct when really needed | ||
| // async_mem_pool is used when cache_allocator being destructed so it should | ||
| // be destructed after cache_allocator | ||
| static AsyncMemPoolImpl async_mem_pool; | ||
| static AllocatorImpl cache_allocator; | ||
| static int n = [&]() { | ||
| cache_allocator.set_raw_allocator(raw_allocator); | ||
| cache_allocator.set_async_mem_pool(&async_mem_pool); | ||
| return 0; | ||
| }(); | ||
| return &cache_allocator; | ||
| } | ||
|
|
||
| template <class AllocatorImpl, class AsyncMemPoolImpl> | ||
| c10::Allocator* get_allocator(int device_id, c10::Allocator* raw_allocator) { | ||
| #define DIPU_ALLOCATOR_DISPATCH_DEVICE_ID(id) \ | ||
| if (device_id == (id)) { \ | ||
| return get_allocator_impl<AllocatorImpl, AsyncMemPoolImpl, id>( \ | ||
| raw_allocator); \ | ||
| template <typename AllocatorImpl, class AsyncMemPoolImpl> | ||
| struct AllocatorHolder { | ||
| template <std::size_t I> | ||
| inline static c10::Allocator* value(c10::Allocator* raw_allocator) { | ||
| // Construct when really needed | ||
| // async_mem_pool is used when cache_allocator being destructed so it should | ||
| // be destructed after cache_allocator | ||
| static AsyncMemPoolImpl async_mem_pool; | ||
| static AllocatorImpl cache_allocator; | ||
| static int n = [&]() { | ||
| cache_allocator.set_raw_allocator(raw_allocator); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这个 precondition 好强啊,没有任何警示吗
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这块没想到怎么改合适……索性直接复制之前的实现 |
||
| cache_allocator.set_async_mem_pool(&async_mem_pool); | ||
| return 0; | ||
| }(); | ||
| return &cache_allocator; | ||
| } | ||
| }; | ||
|
|
||
| DIPU_ALLOCATOR_DISPATCH_DEVICE_ID(0); | ||
| DIPU_ALLOCATOR_DISPATCH_DEVICE_ID(1); | ||
| DIPU_ALLOCATOR_DISPATCH_DEVICE_ID(2); | ||
| DIPU_ALLOCATOR_DISPATCH_DEVICE_ID(3); | ||
| DIPU_ALLOCATOR_DISPATCH_DEVICE_ID(4); | ||
| DIPU_ALLOCATOR_DISPATCH_DEVICE_ID(5); | ||
| DIPU_ALLOCATOR_DISPATCH_DEVICE_ID(6); | ||
| DIPU_ALLOCATOR_DISPATCH_DEVICE_ID(7); | ||
| DIPU_ALLOCATOR_DISPATCH_DEVICE_ID(8); | ||
| DIPU_ALLOCATOR_DISPATCH_DEVICE_ID(9); | ||
| DIPU_ALLOCATOR_DISPATCH_DEVICE_ID(10); | ||
| DIPU_ALLOCATOR_DISPATCH_DEVICE_ID(11); | ||
| DIPU_ALLOCATOR_DISPATCH_DEVICE_ID(12); | ||
| DIPU_ALLOCATOR_DISPATCH_DEVICE_ID(13); | ||
| DIPU_ALLOCATOR_DISPATCH_DEVICE_ID(14); | ||
| DIPU_ALLOCATOR_DISPATCH_DEVICE_ID(15); | ||
| TORCH_CHECK(false, "support up to 16 cards"); | ||
| template <class AllocatorImpl, class AsyncMemPoolImpl> | ||
| c10::Allocator* get_allocator(int index, c10::Allocator* raw_allocator) { | ||
| auto constexpr max_card_number = 16; | ||
| TORCH_CHECK(0 <= index and index < max_card_number, "support up to 16 cards"); | ||
| return dipu::static_function_array< | ||
| AllocatorHolder<AllocatorImpl, AsyncMemPoolImpl>, | ||
| max_card_number>::value[index](raw_allocator); | ||
| } | ||
| #undef DIPU_ALLOCATOR_DISPATCH_DEVICE_ID | ||
|
|
||
| #define DIPU_REGISTER_ALLOCATOR(name, device_type, CachingAllocator, \ | ||
| algorithm, priority) \ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
是否应该放到 utils/ 目录下
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
我也在犹豫放哪里,有没有啥规范或者惯例,还有就是文件名字有没有更好的
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
附议