device type helper

kashif · kashif · commit 17b25ad0b75e · 2025-11-21T09:13:02.000Z
diff --git a/src/accelerate/launchers.py b/src/accelerate/launchers.py
@@ -24,6 +24,7 @@
     PrepareForLaunch,
     are_libraries_initialized,
     check_cuda_p2p_ib_support,
+    get_current_device_type,
     get_gpu_info,
     is_mps_available,
     is_torch_version,
@@ -203,8 +204,8 @@ def train(*args):
             # process here (the other ones will be set be the launcher).
             with patch_environment(**patched_env):
                 # First dummy launch
-                device_type = torch.accelerator.current_accelerator().type if hasattr(torch, "accelerator") else "cuda"
-                distributed_type = "MULTI_XPU" if device_type == "xpu" else "MULTI_GPU"
+                # Determine device type without initializing any device (which would break fork)
+                device_type, distributed_type = get_current_device_type()
                 if os.environ.get("ACCELERATE_DEBUG_MODE", "false").lower() == "true":
                     launcher = PrepareForLaunch(test_launch, distributed_type=distributed_type)
                     try:
diff --git a/src/accelerate/utils/__init__.py b/src/accelerate/utils/__init__.py
@@ -75,6 +75,7 @@
     clear_environment,
     convert_dict_to_env_variables,
     get_cpu_distributed_information,
+    get_current_device_type,
     get_gpu_info,
     get_int_from_env,
     parse_choice_from_env,
diff --git a/src/accelerate/utils/environment.py b/src/accelerate/utils/environment.py
@@ -98,6 +98,49 @@ def are_libraries_initialized(*library_names: str) -> list[str]:
     return [lib_name for lib_name in library_names if lib_name in sys.modules.keys()]
 
 
+def get_current_device_type() -> tuple[str, str]:
+    """
+    Determines the current device type and distributed type without initializing any device.
+
+    This is particularly important when using fork-based multiprocessing, as device initialization
+    before forking can cause errors.
+
+    The device detection order follows the same priority as state.py:_prepare_backend():
+    MLU -> SDAA -> MUSA -> NPU -> HPU -> CUDA -> XPU
+
+    Returns:
+        tuple[str, str]: A tuple of (device_type, distributed_type)
+            - device_type: The device string (e.g., "cuda", "npu", "xpu")
+            - distributed_type: The distributed type string (e.g., "MULTI_GPU", "MULTI_NPU")
+
+    Example:
+        ```python
+        >>> device_type, distributed_type = get_current_device_type()
+        >>> print(device_type)  # "cuda"
+        >>> print(distributed_type)  # "MULTI_GPU"
+        ```
+    """
+    from .imports import is_hpu_available, is_mlu_available, is_musa_available, is_npu_available, is_sdaa_available, is_xpu_available
+
+    if is_mlu_available():
+        return "mlu", "MULTI_MLU"
+    elif is_sdaa_available():
+        return "sdaa", "MULTI_SDAA"
+    elif is_musa_available():
+        return "musa", "MULTI_MUSA"
+    elif is_npu_available():
+        return "npu", "MULTI_NPU"
+    elif is_hpu_available():
+        return "hpu", "MULTI_HPU"
+    elif torch.cuda.is_available():
+        return "cuda", "MULTI_GPU"
+    elif is_xpu_available():
+        return "xpu", "MULTI_XPU"
+    else:
+        # Default to CUDA even if not available (for CPU-only scenarios where CUDA code paths are still used)
+        return "cuda", "MULTI_GPU"
+
+
 def _nvidia_smi():
     """
     Returns the right nvidia-smi command based on the system.
@@ -248,7 +291,7 @@ def override_numa_affinity(local_process_index: int, verbose: Optional[bool] = N
 
         if not is_pynvml_available():
             raise ImportError(
-                "To set CPU affinity on CUDA GPUs the `pynvml` package must be available. (`pip install pynvml`)"
+                "To set CPU affinity on CUDA GPUs the `nvidia-ml-py` package must be available. (`pip install nvidia-ml-py`)"
             )
         import pynvml as nvml