Skip to content

Commit fa64d1b

Browse files
dstaay-fbfacebook-github-bot
authored andcommitted
Relax requirement to use Cuda Caching Allocator (#1368)
Summary: Pull Request resolved: #1368 LSS: its OK to not use cuda caching allocator in practice, just creating RdmaBuffers is ~2ms extra; but open up path to reduce adoption friction with strong warning. Reviewed By: zdevito Differential Revision: D83483138 fbshipit-source-id: e4e6effe2d48e494e1214500a1906d54b3e35af7
1 parent a4da785 commit fa64d1b

File tree

1 file changed

+12
-11
lines changed
  • python/monarch/_src/tensor_engine

1 file changed

+12
-11
lines changed

python/monarch/_src/tensor_engine/rdma.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -158,37 +158,38 @@ def _check_cuda_expandable_segments_enabled() -> bool:
158158
159159
Returns:
160160
bool: True if expandable segments are enabled, False otherwise
161-
162-
Raises:
163-
RuntimeError: If expandable segments are not enabled but required for RDMA
164161
"""
165162
try:
166163
# Use the new Rust utility function that calls the C++ pt_cuda_allocator_compatibility()
167164
pt_cuda_compat = _RdmaBuffer.pt_cuda_allocator_compatibility()
168165

169166
if not pt_cuda_compat:
170-
raise RuntimeError(
167+
warnings.warn(
171168
"CUDA caching allocator is not using expandable segments.\n"
172-
"This is required for RDMA to work correctly with CUDA tensors.\n\n"
169+
"This is required to maximize RDMA performance with CUDA tensors.\n\n"
173170
"To fix this, set the environment variable BEFORE importing PyTorch:\n"
174171
"1. In shell:\n"
175172
' export PYTORCH_CUDA_ALLOC_CONF="expandable_segments:True"\n'
176173
"2. Or in Python script (BEFORE any PyTorch imports):\n"
177174
" import os\n"
178175
' os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"\n'
179-
" import torch # Must come after setting the env var\n\n"
180-
"Note: This setting must be configured before PyTorch's CUDA allocator is initialized."
176+
" import torch # Must come after setting the env var\n\n",
177+
UserWarning,
178+
stacklevel=2,
181179
)
180+
return False
182181
return True
183182

184183
except Exception as e:
185-
logging.error(f"Failed to check CUDA allocator configuration: {e}")
186-
raise RuntimeError(
184+
warnings.warn(
187185
"Unable to verify CUDA allocator configuration.\n"
188-
"Please ensure expandable segments are enabled:\n"
186+
"Please ensure expandable segments are enabled for best RDMA performance with CUDA tensors:\n"
189187
' export PYTORCH_CUDA_ALLOC_CONF="expandable_segments:True"\n'
190-
"Set this environment variable before importing PyTorch."
188+
"Set this environment variable before importing PyTorch.",
189+
UserWarning,
190+
stacklevel=2,
191191
)
192+
return False
192193

193194

194195
class RDMABuffer:

0 commit comments

Comments
 (0)