Skip to content

Commit 9f94c7b

Browse files
guangyeypytorchmergebot
authored andcommitted
[fix] Assign CUDAEvent external member properly (pytorch#167711)
# Motivation This PR aims to fix the bug that the moved-to object's `external_` member is not assigned correctly. # Additional Context It's not fine to swap the valid value and the invalid value. We'd just need to prevent double-free. Pull Request resolved: pytorch#167711 Approved by: https://github.com/albanD
1 parent e5a766e commit 9f94c7b

File tree

3 files changed

+49
-5
lines changed

3 files changed

+49
-5
lines changed

aten/src/ATen/cuda/CUDAEvent.h

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -238,11 +238,18 @@ struct TORCH_CUDA_CPP_API CUDAEvent {
238238
}
239239

240240
void moveHelper(CUDAEvent&& other) {
241-
std::swap(flags_, other.flags_);
242-
std::swap(is_created_, other.is_created_);
243-
std::swap(was_recorded_, other.was_recorded_);
244-
std::swap(device_index_, other.device_index_);
245-
std::swap(event_, other.event_);
241+
// Transfer ownership of all state from other to this
242+
flags_ = other.flags_;
243+
is_created_ = other.is_created_;
244+
was_recorded_ = other.was_recorded_;
245+
external_ = other.external_;
246+
device_index_ = other.device_index_;
247+
event_ = other.event_;
248+
249+
// Reset other to a valid empty state to prevent double-free
250+
// The moved-from object must not attempt to destroy the event
251+
other.is_created_ = false;
252+
other.event_ = cudaEvent_t{};
246253
}
247254
};
248255

aten/src/ATen/test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ list(APPEND ATen_CUDA_TEST_SRCS
6565
${CMAKE_CURRENT_SOURCE_DIR}/cuda_device_test.cpp
6666
${CMAKE_CURRENT_SOURCE_DIR}/cuda_distributions_test.cu
6767
${CMAKE_CURRENT_SOURCE_DIR}/cuda_dlconvertor_test.cpp
68+
${CMAKE_CURRENT_SOURCE_DIR}/cuda_event_test.cpp
6869
${CMAKE_CURRENT_SOURCE_DIR}/cuda_exchange_device_test.cpp
6970
${CMAKE_CURRENT_SOURCE_DIR}/cuda_generator_test.cu
7071
${CMAKE_CURRENT_SOURCE_DIR}/cuda_half_test.cu
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#include <gtest/gtest.h>
2+
3+
#include <ATen/cuda/CUDAEvent.h>
4+
#include <ATen/cuda/CUDAGraph.h>
5+
#include <ATen/cuda/Sleep.h>
6+
7+
TEST(CUDAEventTest, testCUDAExternalEvent) {
8+
if (!at::cuda::is_available()) {
9+
return;
10+
}
11+
12+
// Create two external CUDA events
13+
unsigned int flags = cudaEventDefault | cudaEventExternal;
14+
auto event1 = at::cuda::CUDAEvent(flags);
15+
auto event2 = at::cuda::CUDAEvent(flags);
16+
// Ensure external CUDAEvent remain valid and functional after being moved.
17+
auto start_event = std::move(event1);
18+
auto end_event = std::move(event2);
19+
20+
auto stream = at::cuda::getStreamFromPool();
21+
at::cuda::setCurrentCUDAStream(stream);
22+
23+
auto graph = at::cuda::CUDAGraph();
24+
graph.capture_begin();
25+
start_event.record();
26+
at::cuda::sleep(100000);
27+
end_event.record();
28+
graph.capture_end();
29+
30+
// External events should correctly record timestamps even when used inside
31+
// CUDA graphs, and elapsed_time() between them should be positive.
32+
stream.synchronize();
33+
graph.replay();
34+
at::cuda::device_synchronize();
35+
EXPECT_TRUE(start_event.elapsed_time(end_event) > 0);
36+
}

0 commit comments

Comments
 (0)