Skip to content

Commit 5277276

Browse files
yushangdipytorchmergebot
authored andcommitted
Change AOTI_RUNTIME_DEVICE_CHECK to be device device specific (pytorch#157818)
Summary: Change AOTI_RUNTIME_DEVICE_CHECK to the following depending on device: AOTI_RUNTIME_CUDA_CHECK AOTI_RUNTIME_XPU_CHECK AOTI_RUNTIME_CPU_CHECK Currently in the codebase, only `AOTI_RUNTIME_CUDA_CHECK` is used. This shouldn't change anything as of now, but we do this to prepare for simultaneouly loading multiple backends (e..g CPU and CUDA) in AOTI standalone. We don't want people writing `AOTI_RUNTIME_DEVICE_CHECK` for both CPU and CUDA checks. This could cause compilation problems when we statically link both CPU and CUDA models. Test Plan: CI Rollback Plan: Reviewed By: muchulee8 Differential Revision: D77742977 Pull Request resolved: pytorch#157818 Approved by: https://github.com/jingsh
1 parent c547786 commit 5277276

File tree

3 files changed

+14
-14
lines changed

3 files changed

+14
-14
lines changed

torch/csrc/inductor/aoti_runtime/device_utils.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
#include <cuda.h>
1515
#include <cuda_runtime_api.h>
1616

17-
#define AOTI_RUNTIME_DEVICE_CHECK(EXPR) \
17+
#define AOTI_RUNTIME_CUDA_CHECK(EXPR) \
1818
do { \
1919
const cudaError_t code = EXPR; \
2020
const char* msg = cudaGetErrorString(code); \
@@ -34,7 +34,7 @@ using DeviceStreamType = cudaStream_t;
3434
#include <level_zero/ze_api.h>
3535
#include <sycl/sycl.hpp>
3636
#include <sstream>
37-
#define AOTI_RUNTIME_DEVICE_CHECK(EXPR) \
37+
#define AOTI_RUNTIME_XPU_CHECK(EXPR) \
3838
do { \
3939
const ze_result_t status = EXPR; \
4040
if (status != ZE_RESULT_SUCCESS) { \
@@ -52,7 +52,7 @@ using DeviceStreamType = sycl::queue*;
5252

5353
#else
5454

55-
#define AOTI_RUNTIME_DEVICE_CHECK(EXPR) \
55+
#define AOTI_RUNTIME_CPU_CHECK(EXPR) \
5656
bool ok = EXPR; \
5757
if (!ok) { \
5858
throw std::runtime_error("CPU runtime error"); \

torch/csrc/inductor/aoti_runtime/model_base.h

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,8 @@ using RAIIDataPtr = std::unique_ptr<void, std::function<void(void*)>>;
6363
// NOLINTNEXTLINE(clang-diagnostic-unneeded-internal-declaration)
6464
RAIIDataPtr RAII_gpuMalloc(size_t num_bytes) {
6565
void* data_ptr = nullptr;
66-
AOTI_RUNTIME_DEVICE_CHECK(cudaMalloc((void**)&data_ptr, num_bytes));
67-
auto deleter = [](void* ptr) { AOTI_RUNTIME_DEVICE_CHECK(cudaFree(ptr)); };
66+
AOTI_RUNTIME_CUDA_CHECK(cudaMalloc((void**)&data_ptr, num_bytes));
67+
auto deleter = [](void* ptr) { AOTI_RUNTIME_CUDA_CHECK(cudaFree(ptr)); };
6868
return RAIIDataPtr(data_ptr, deleter);
6969
}
7070

@@ -165,10 +165,10 @@ class AOTInductorModelBase {
165165

166166
#ifdef USE_CUDA
167167
if (device_idx_ == -1) {
168-
AOTI_RUNTIME_DEVICE_CHECK(cudaGetDevice(&device_idx_));
168+
AOTI_RUNTIME_CUDA_CHECK(cudaGetDevice(&device_idx_));
169169
} else {
170170
// If device_idx_ is passed in, we need to set the current device to it
171-
AOTI_RUNTIME_DEVICE_CHECK(cudaSetDevice(device_idx_));
171+
AOTI_RUNTIME_CUDA_CHECK(cudaSetDevice(device_idx_));
172172
}
173173
#endif // USE_CUDA
174174
#ifdef USE_XPU
@@ -222,7 +222,7 @@ class AOTInductorModelBase {
222222
#ifdef USE_CUDA
223223
if (!run_finished_) {
224224
cudaEvent_t run_finished = nullptr;
225-
AOTI_RUNTIME_DEVICE_CHECK(cudaEventCreate(&run_finished));
225+
AOTI_RUNTIME_CUDA_CHECK(cudaEventCreate(&run_finished));
226226
run_finished_.emplace(run_finished);
227227
}
228228
#elif defined(USE_XPU)
@@ -239,7 +239,7 @@ class AOTInductorModelBase {
239239
model->run_impl(input_handles, output_handles, stream, proxy_executor);
240240

241241
#ifdef USE_CUDA
242-
AOTI_RUNTIME_DEVICE_CHECK(cudaEventRecord(*run_finished_, stream));
242+
AOTI_RUNTIME_CUDA_CHECK(cudaEventRecord(*run_finished_, stream));
243243
#elif defined(USE_XPU)
244244
run_finished_ = std::make_optional<sycl::event*>(new sycl::event(
245245
static_cast<sycl::queue*>(stream)->ext_oneapi_submit_barrier()));
@@ -273,7 +273,7 @@ class AOTInductorModelBase {
273273
#ifdef USE_CUDA
274274
if (!run_finished_) {
275275
cudaEvent_t run_finished = nullptr;
276-
AOTI_RUNTIME_DEVICE_CHECK(cudaEventCreate(&run_finished));
276+
AOTI_RUNTIME_CUDA_CHECK(cudaEventCreate(&run_finished));
277277
run_finished_.emplace(run_finished);
278278
}
279279
#elif defined(USE_XPU)
@@ -291,7 +291,7 @@ class AOTInductorModelBase {
291291
model->const_run_impl(stream, proxy_executor, initialization);
292292

293293
#ifdef USE_CUDA
294-
AOTI_RUNTIME_DEVICE_CHECK(cudaEventRecord(*run_finished_, stream));
294+
AOTI_RUNTIME_CUDA_CHECK(cudaEventRecord(*run_finished_, stream));
295295
#elif defined(USE_XPU)
296296
// sycl::queue* queue_ptr = nullptr;
297297
// aoti_torch_get_current_sycl_queue((void**)&queue_ptr);
@@ -408,7 +408,7 @@ class AOTInductorModelBase {
408408
->memcpy(internal_ptr, _get_constants_start() + bytes_read, data_size)
409409
.wait();
410410
#elif USE_CUDA
411-
AOTI_RUNTIME_DEVICE_CHECK(cudaMemcpy(
411+
AOTI_RUNTIME_CUDA_CHECK(cudaMemcpy(
412412
internal_ptr,
413413
_get_constants_start() + bytes_read,
414414
data_size,
@@ -613,7 +613,7 @@ class AOTInductorModelBase {
613613
throw std::runtime_error{"Model event was not initialized"};
614614
}
615615

616-
AOTI_RUNTIME_DEVICE_CHECK(cudaEventSynchronize(*run_finished_));
616+
AOTI_RUNTIME_CUDA_CHECK(cudaEventSynchronize(*run_finished_));
617617
#endif // USE_CUDA
618618
#ifdef USE_XPU
619619
if (!run_finished_) {

torch/csrc/inductor/aoti_runtime/model_container.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -476,7 +476,7 @@ class AOTInductorModelContainer {
476476
->memcpy(internal_constants_ptr, user_constant_ptr, constant_size)
477477
.wait();
478478
#elif USE_CUDA
479-
AOTI_RUNTIME_DEVICE_CHECK(cudaMemcpy(
479+
AOTI_RUNTIME_CUDA_CHECK(cudaMemcpy(
480480
internal_constants_ptr,
481481
user_constant_ptr,
482482
constant_size,

0 commit comments

Comments
 (0)