Skip to content

Commit c4ef409

Browse files
keith2018pytorchmergebot
authored andcommitted
Fix segfault on exit in CachingHostAllocator by signaling background thread to exit (pytorch#154117)
Fixes pytorch#152008 This PR fixes a segmentation fault that occurred when exiting the program due to improper background thread management in CachingHostAllocator. Previously, the background thread continued running and called process_events() even after the allocator object was destroyed, leading to a crash on exit. https://github.com/pytorch/pytorch/blob/f12d8d60b19083123d810ebda1eb1591dbe3dd3d/aten/src/ATen/core/CachingHostAllocator.h#L218 ```cpp // Launch the background thread and process events in a loop. static bool background_thread_flag [[maybe_unused]] = [this] { getBackgroundThreadPool()->run([&]() { while (true) { process_events(); // <-- This line may cause segfault on exit std::this_thread::sleep_for(std::chrono::microseconds(100)); } }); return true; }(); ``` The fix adds a mechanism to signal the background thread to exit before the object is destructed, ensuring the thread stops safely. Pull Request resolved: pytorch#154117 Approved by: https://github.com/ngimel, https://github.com/cyyever
1 parent 9d922b5 commit c4ef409

File tree

2 files changed

+24
-2
lines changed

2 files changed

+24
-2
lines changed

aten/src/ATen/core/CachingHostAllocator.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,12 @@ template <
177177
typename E,
178178
typename B = HostBlock<S>>
179179
struct CachingHostAllocatorImpl {
180-
virtual ~CachingHostAllocatorImpl() = default;
180+
virtual ~CachingHostAllocatorImpl() {
181+
active_ = false;
182+
if (pinned_use_background_threads()) {
183+
getBackgroundThreadPool()->waitWorkComplete();
184+
}
185+
}
181186

182187
public:
183188
// return data_ptr and block pair.
@@ -214,7 +219,7 @@ struct CachingHostAllocatorImpl {
214219
// Launch the background thread and process events in a loop.
215220
static bool background_thread_flag [[maybe_unused]] = [this] {
216221
getBackgroundThreadPool()->run([&]() {
217-
while (true) {
222+
while (active_) {
218223
process_events();
219224
std::this_thread::sleep_for(std::chrono::microseconds(100));
220225
}
@@ -620,6 +625,10 @@ struct CachingHostAllocatorImpl {
620625

621626
alignas(64) std::mutex events_mutex_;
622627
std::deque<std::pair<E, B*>> events_; // event queue paired with block
628+
629+
// Indicates whether the object is active.
630+
// Set to false in the destructor to signal background threads to stop.
631+
std::atomic<bool> active_{true};
623632
protected:
624633
alignas(64) HostStatsStaged stats_;
625634
};

test/test_cuda.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,19 @@ def test_pinned_memory_empty_cache(self):
328328
"pinned_use_cuda_host_register:False"
329329
)
330330

331+
def test_pinned_memory_use_background_threads(self):
332+
script = """
333+
import torch
334+
335+
torch.cuda.memory._set_allocator_settings(
336+
f"pinned_use_background_threads:True"
337+
)
338+
t = torch.ones(1024 * 1024, pin_memory=True)
339+
print(t.is_pinned())
340+
"""
341+
proc = subprocess.run([sys.executable, "-c", script], capture_output=True)
342+
self.assertEqual(proc.returncode, 0)
343+
331344
def test_cudart_register(self):
332345
t = torch.ones(20)
333346
self.assertFalse(t.is_pinned())

0 commit comments

Comments
 (0)