Skip to content

Commit 7bc36c2

Browse files
pytorchmergebotamathewc
authored andcommitted
Revert "[ROCm] enable HIPMallocAsyncAllocator (pytorch#149145)"
This reverts commit ee1a2b7. Reverted pytorch#149145 on behalf of https://github.com/izaitsevfb due to breaking internal builds ([comment](pytorch#149145 (comment)))
1 parent 888bb4c commit 7bc36c2

File tree

6 files changed

+8
-80
lines changed

6 files changed

+8
-80
lines changed

c10/cuda/CUDAAllocatorConfig.cpp

Lines changed: 3 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -220,47 +220,15 @@ size_t CUDAAllocatorConfig::parseAllocatorConfig(
220220
const std::vector<std::string>& config,
221221
size_t i,
222222
bool& used_cudaMallocAsync) {
223-
// For ease of maintenance and understanding, the CUDA and ROCm
224-
// implementations of this function are separated. This avoids having many
225-
// #ifdef's throughout.
226-
#ifdef USE_ROCM
227-
// Ease burden on ROCm users by allowing either cuda or hip tokens.
228-
// cuda token is broken up to prevent hipify matching it.
229-
#define PYTORCH_TOKEN1 \
230-
"cud" \
231-
"aMallocAsync"
232-
#define PYTORCH_TOKEN2 "hipMallocAsync"
233-
consumeToken(config, ++i, ':');
234-
if (++i < config.size()) {
235-
TORCH_CHECK(
236-
((config[i] == "native") || (config[i] == PYTORCH_TOKEN1) ||
237-
(config[i] == PYTORCH_TOKEN2)),
238-
"Unknown allocator backend, "
239-
"options are native, " PYTORCH_TOKEN1 ", and " PYTORCH_TOKEN2);
240-
used_cudaMallocAsync =
241-
(config[i] == PYTORCH_TOKEN1 || config[i] == PYTORCH_TOKEN2);
242-
TORCH_INTERNAL_ASSERT(
243-
config[i] == get()->name() ||
244-
(config[i] == PYTORCH_TOKEN1 && get()->name() == PYTORCH_TOKEN2),
245-
"Allocator backend parsed at runtime != "
246-
"allocator backend parsed at load time, ",
247-
config[i],
248-
" != ",
249-
get()->name());
250-
} else {
251-
TORCH_CHECK(false, "Error parsing backend value", "");
252-
}
253-
return i;
254-
#undef PYTORCH_TOKEN1
255-
#undef PYTORCH_TOKEN2
256-
#else // USE_ROCM
257223
consumeToken(config, ++i, ':');
258224
if (++i < config.size()) {
259225
TORCH_CHECK(
260226
((config[i] == "native") || (config[i] == "cudaMallocAsync")),
261227
"Unknown allocator backend, "
262228
"options are native and cudaMallocAsync");
263229
used_cudaMallocAsync = (config[i] == "cudaMallocAsync");
230+
#ifndef USE_ROCM
231+
// HIP supports hipMallocAsync and does not need to check versions
264232
if (used_cudaMallocAsync) {
265233
#if CUDA_VERSION >= 11040
266234
int version = 0;
@@ -278,6 +246,7 @@ size_t CUDAAllocatorConfig::parseAllocatorConfig(
278246
CUDA_VERSION);
279247
#endif
280248
}
249+
#endif
281250
TORCH_INTERNAL_ASSERT(
282251
config[i] == get()->name(),
283252
"Allocator backend parsed at runtime != "
@@ -286,7 +255,6 @@ size_t CUDAAllocatorConfig::parseAllocatorConfig(
286255
TORCH_CHECK(false, "Error parsing backend value", "");
287256
}
288257
return i;
289-
#endif // USE_ROCM
290258
}
291259

292260
void CUDAAllocatorConfig::parseArgs(const char* env) {

c10/cuda/CUDAAllocatorConfig.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -81,12 +81,6 @@ class C10_CUDA_API CUDAAllocatorConfig {
8181
static CUDAAllocatorConfig* s_instance = ([]() {
8282
auto inst = new CUDAAllocatorConfig();
8383
const char* env = getenv("PYTORCH_CUDA_ALLOC_CONF");
84-
#ifdef USE_ROCM
85-
// convenience for ROCm users, allow alternative HIP token
86-
if (!env) {
87-
env = getenv("PYTORCH_HIP_ALLOC_CONF");
88-
}
89-
#endif
9084
inst->parseArgs(env);
9185
return inst;
9286
})();

c10/cuda/CUDACachingAllocator.cpp

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3955,13 +3955,7 @@ struct BackendStaticInitializer {
39553955
// version checks, to CUDAAllocatorConfig's runtime doublecheck. If this
39563956
// works, maybe we should move all of CUDAAllocatorConfig here?
39573957
CUDAAllocator* parseEnvForBackend() {
3958-
auto val = c10::utils::get_env("PYTORCH_CUDA_ALLOC_CONF");
3959-
#ifdef USE_ROCM
3960-
// convenience for ROCm users to allow either CUDA or HIP env var
3961-
if (!val.has_value()) {
3962-
val = c10::utils::get_env("PYTORCH_HIP_ALLOC_CONF");
3963-
}
3964-
#endif
3958+
const auto val = c10::utils::get_env("PYTORCH_CUDA_ALLOC_CONF");
39653959
if (val.has_value()) {
39663960
const std::string& config = val.value();
39673961

@@ -3977,15 +3971,7 @@ struct BackendStaticInitializer {
39773971
std::vector<std::string> kv(it2, end2);
39783972
if (kv.size() >= 2) {
39793973
if (kv[0] == "backend") {
3980-
#ifdef USE_ROCM
3981-
// convenience for ROCm users to allow either CUDA or HIP env var
3982-
if (kv[1] ==
3983-
"cud"
3984-
"aMallocAsync" ||
3985-
kv[1] == "hipMallocAsync")
3986-
#else
39873974
if (kv[1] == "cudaMallocAsync")
3988-
#endif
39893975
return CudaMallocAsync::allocator();
39903976
if (kv[1] == "native")
39913977
return &Native::allocator;

c10/cuda/CUDAMallocAsyncAllocator.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ namespace c10::cuda::CUDACachingAllocator::CudaMallocAsync {
1414
using namespace c10::CachingAllocator;
1515
using namespace c10::CachingDeviceAllocator;
1616

17-
#if CUDA_VERSION >= 11040 || defined(USE_ROCM)
17+
#if CUDA_VERSION >= 11040
1818
// CUDA device allocator that uses cudaMallocAsync to implement
1919
// the same interface as CUDACachingAllocator.cpp.
2020

@@ -504,9 +504,9 @@ struct CudaMallocAsyncAllocator : public CUDAAllocator {
504504
CUDAGuard g(static_cast<c10::DeviceIndex>(dev));
505505

506506
cudaMemPool_t mempool = nullptr;
507-
C10_CUDA_CHECK(cudaDeviceGetDefaultMemPool(&mempool, dev));
508-
C10_CUDA_CHECK(cudaDeviceSynchronize());
509-
C10_CUDA_CHECK(cudaMemPoolTrimTo(mempool, 0));
507+
cudaDeviceGetDefaultMemPool(&mempool, dev);
508+
cudaDeviceSynchronize();
509+
cudaMemPoolTrimTo(mempool, 0);
510510
}
511511
}
512512
}

torch/utils/collect_env.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -456,8 +456,6 @@ def get_pip_packages(run_lambda, patterns=None):
456456

457457
def get_cachingallocator_config():
458458
ca_config = os.environ.get('PYTORCH_CUDA_ALLOC_CONF', '')
459-
if not ca_config:
460-
ca_config = os.environ.get('PYTORCH_HIP_ALLOC_CONF', '')
461459
return ca_config
462460

463461

torch/utils/hipify/cuda_to_hip_mappings.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4051,23 +4051,6 @@
40514051
("hipMemset3DAsync", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED),
40524052
),
40534053
("cudaMemGetInfo", ("hipMemGetInfo", CONV_MEM, API_RUNTIME)),
4054-
("cudaDeviceGetDefaultMemPool", ("hipDeviceGetDefaultMemPool", CONV_MEM, API_RUNTIME)),
4055-
("cudaMemAccessDesc", ("hipMemAccessDesc", CONV_MEM, API_RUNTIME)),
4056-
("cudaMemAccessFlagsProtReadWrite", ("hipMemAccessFlagsProtReadWrite", CONV_MEM, API_RUNTIME)),
4057-
("cudaMemLocationTypeDevice", ("hipMemLocationTypeDevice", CONV_MEM, API_RUNTIME)),
4058-
("cudaMemPoolAttrReleaseThreshold", ("hipMemPoolAttrReleaseThreshold", CONV_MEM, API_RUNTIME)),
4059-
("cudaMemPoolAttrReservedMemCurrent", ("hipMemPoolAttrReservedMemCurrent", CONV_MEM, API_RUNTIME)),
4060-
("cudaMemPoolAttrReservedMemHigh", ("hipMemPoolAttrReservedMemHigh", CONV_MEM, API_RUNTIME)),
4061-
("cudaMemPoolAttrUsedMemCurrent", ("hipMemPoolAttrUsedMemCurrent", CONV_MEM, API_RUNTIME)),
4062-
("cudaMemPoolAttrUsedMemHigh", ("hipMemPoolAttrUsedMemHigh", CONV_MEM, API_RUNTIME)),
4063-
("cudaMemPoolGetAttribute", ("hipMemPoolGetAttribute", CONV_MEM, API_RUNTIME)),
4064-
("cudaMemPoolReuseAllowInternalDependencies", ("hipMemPoolReuseAllowInternalDependencies", CONV_MEM, API_RUNTIME)),
4065-
("cudaMemPoolReuseAllowOpportunistic", ("hipMemPoolReuseAllowOpportunistic", CONV_MEM, API_RUNTIME)),
4066-
("cudaMemPoolReuseFollowEventDependencies", ("hipMemPoolReuseFollowEventDependencies", CONV_MEM, API_RUNTIME)),
4067-
("cudaMemPoolSetAccess", ("hipMemPoolSetAccess", CONV_MEM, API_RUNTIME)),
4068-
("cudaMemPoolSetAttribute", ("hipMemPoolSetAttribute", CONV_MEM, API_RUNTIME)),
4069-
("cudaMemPoolTrimTo", ("hipMemPoolTrimTo", CONV_MEM, API_RUNTIME)),
4070-
("cudaMemPool_t", ("hipMemPool_t", CONV_MEM, API_RUNTIME)),
40714054
(
40724055
"cudaArrayGetInfo",
40734056
("hipArrayGetInfo", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED),
@@ -8608,7 +8591,6 @@
86088591
CAFFE2_SPECIFIC_MAPPINGS = collections.OrderedDict(
86098592
[
86108593
("PYTORCH_NO_CUDA_MEMORY_CACHING", ("PYTORCH_NO_CUDA_MEMORY_CACHING", API_CAFFE2)),
8611-
("PYTORCH_CUDA_ALLOC_CONF", ("PYTORCH_CUDA_ALLOC_CONF", API_CAFFE2)),
86128594
("cuda_stream", ("hip_stream", API_CAFFE2)),
86138595
# if the header is a native hip folder (under hip directory),
86148596
# there is no need to add a hip path to it; the trie in hipify script

0 commit comments

Comments
 (0)