feature(pu): adapt to npu

puyuan1996 · puyuan1996 · commit 205cd615bca7 · 2026-02-06T03:38:01.000+08:00
diff --git a/ding/policy/base_policy.py b/ding/policy/base_policy.py
@@ -9,6 +9,7 @@
 from ding.model import create_model
 from ding.utils import import_module, allreduce, allreduce_with_indicator, broadcast, get_rank, allreduce_async, \
     synchronize, deep_merge_dicts, POLICY_REGISTRY
+from ding.torch_utils import auto_device_init, move_to_device
 
 
 class Policy(ABC):
@@ -83,8 +84,12 @@ def default_config(cls: type) -> EasyDict:
     config = dict(
         # (bool) Whether the learning policy is the same as the collecting data policy (on-policy).
         on_policy=False,
-        # (bool) Whether to use cuda in policy.
+        # (bool) Whether to use cuda in policy (deprecated, use 'device' instead).
         cuda=False,
+        # (str) Device to use for policy. Can be 'auto', 'cuda', 'npu', or 'cpu'.
+        # 'auto' will automatically detect NPU > GPU > CPU.
+        # If not specified, will use 'cuda' config for backward compatibility.
+        device='auto',
         # (bool) Whether to use data parallel multi-gpu mode in policy.
         multi_gpu=False,
         # (bool) Whether to synchronize update the model parameters after allreduce the gradients of model parameters.
@@ -136,25 +141,42 @@ def __init__(
 
         if len(set(self._enable_field).intersection(set(['learn', 'collect', 'eval']))) > 0:
             model = self._create_model(cfg, model)
-            self._cuda = cfg.cuda and torch.cuda.is_available()
+
+            # Device initialization with auto-detection support for NPU/GPU/CPU
+            # Backward compatibility: if 'device' not in cfg, use 'cuda' config
+            if hasattr(cfg, 'device') and cfg.device is not None:
+                # New way: use 'device' config for auto-detection or explicit setting
+                cfg_device = cfg.device
+            else:
+                # Legacy way: convert 'cuda' boolean to device string
+                cfg_device = 'cuda' if (hasattr(cfg, 'cuda') and cfg.cuda) else 'cpu'
+
             # now only support multi-gpu for only enable learn mode
             if len(set(self._enable_field).intersection(set(['learn']))) > 0:
                 multi_gpu = self._cfg.multi_gpu
                 self._rank = get_rank() if multi_gpu else 0
-                if self._cuda:
-                    # model.cuda() is an in-place operation.
-                    model.cuda()
+            else:
+                self._rank = 0
+
+            # Auto-detect or set device
+            self._device_type, self._use_accelerator, self._device = auto_device_init(cfg_device, self._rank)
+
+            # Keep backward compatibility with _cuda attribute
+            self._cuda = self._use_accelerator and self._device_type == 'cuda'
+
+            # Move model to the detected/configured device
+            if self._use_accelerator:
+                move_to_device(model, self._device_type, self._rank)
+
+            # Multi-GPU initialization
+            if len(set(self._enable_field).intersection(set(['learn']))) > 0:
+                multi_gpu = self._cfg.multi_gpu
                 if multi_gpu:
                     bp_update_sync = self._cfg.bp_update_sync
                     self._bp_update_sync = bp_update_sync
                     self._init_multi_gpu_setting(model, bp_update_sync)
-            else:
-                self._rank = 0
-                if self._cuda:
-                    # model.cuda() is an in-place operation.
-                    model.cuda()
+
             self._model = model
-            self._device = 'cuda:{}'.format(self._rank % torch.cuda.device_count()) if self._cuda else 'cpu'
         else:
             self._cuda = False
             self._rank = 0
diff --git a/ding/torch_utils/__init__.py b/ding/torch_utils/__init__.py
@@ -12,3 +12,5 @@
 from .dataparallel import DataParallel
 from .reshape_helper import fold_batch, unfold_batch, unsqueeze_repeat
 from .parameter import NonegativeParameter, TanhParameter
+from .device_helper import get_available_device, get_device_count, move_to_device, get_device_string, \
+    auto_device_init, is_npu_available, is_cuda_available
diff --git a/ding/torch_utils/device_helper.py b/ding/torch_utils/device_helper.py
@@ -0,0 +1,183 @@
+"""
+Copyright 2020 Sensetime X-lab. All Rights Reserved.
+
+Device helper utilities for automatic detection of NPU and GPU devices.
+Supports Huawei Ascend NPU (torch_npu) and NVIDIA GPU (torch.cuda).
+"""
+
+import torch
+from typing import Tuple, Optional
+import logging
+
+# Try to import torch_npu for Huawei NPU support
+try:
+    import torch_npu
+    TORCH_NPU_AVAILABLE = True
+except ImportError:
+    TORCH_NPU_AVAILABLE = False
+
+logger = logging.getLogger(__name__)
+
+
+def get_available_device() -> Tuple[str, bool]:
+    """
+    Overview:
+        Automatically detect the available device (NPU or GPU or CPU).
+        Priority: NPU > GPU > CPU
+    Returns:
+        - device_type (:obj:`str`): Device type string, one of 'npu', 'cuda', 'cpu'
+        - is_accelerator (:obj:`bool`): Whether an accelerator (NPU/GPU) is available
+    Examples:
+        >>> device_type, is_accelerator = get_available_device()
+        >>> print(f"Using device: {device_type}")
+    """
+    # Check for NPU first (Huawei Ascend)
+    if TORCH_NPU_AVAILABLE and torch.npu.is_available():
+        npu_count = torch.npu.device_count()
+        logger.info(f"Detected {npu_count} NPU device(s), using NPU")
+        return 'npu', True
+
+    # Check for CUDA GPU
+    if torch.cuda.is_available():
+        gpu_count = torch.cuda.device_count()
+        logger.info(f"Detected {gpu_count} CUDA GPU device(s), using GPU")
+        return 'cuda', True
+
+    # Fallback to CPU
+    logger.info("No NPU or GPU detected, using CPU")
+    return 'cpu', False
+
+
+def get_device_count(device_type: str) -> int:
+    """
+    Overview:
+        Get the number of available devices for the specified device type.
+    Arguments:
+        - device_type (:obj:`str`): Device type, one of 'npu', 'cuda', 'cpu'
+    Returns:
+        - count (:obj:`int`): Number of available devices
+    """
+    if device_type == 'npu' and TORCH_NPU_AVAILABLE:
+        return torch.npu.device_count()
+    elif device_type == 'cuda':
+        return torch.cuda.device_count()
+    else:
+        return 1  # CPU always has 1 "device"
+
+
+def move_to_device(model: torch.nn.Module, device_type: str, rank: int = 0) -> torch.nn.Module:
+    """
+    Overview:
+        Move a PyTorch model to the specified device.
+        Supports NPU, CUDA, and CPU devices.
+    Arguments:
+        - model (:obj:`torch.nn.Module`): The model to move
+        - device_type (:obj:`str`): Device type, one of 'npu', 'cuda', 'cpu'
+        - rank (:obj:`int`): Device rank for multi-device setups
+    Returns:
+        - model (:obj:`torch.nn.Module`): The model moved to the device (in-place operation)
+    """
+    if device_type == 'npu' and TORCH_NPU_AVAILABLE:
+        device_count = torch.npu.device_count()
+        device_id = rank % device_count if device_count > 0 else 0
+        model.npu(device_id)
+        logger.debug(f"Moved model to NPU device {device_id}")
+    elif device_type == 'cuda':
+        device_count = torch.cuda.device_count()
+        device_id = rank % device_count if device_count > 0 else 0
+        model.cuda(device_id)
+        logger.debug(f"Moved model to CUDA device {device_id}")
+    # CPU case: no need to move
+    return model
+
+
+def get_device_string(device_type: str, rank: int = 0) -> str:
+    """
+    Overview:
+        Get the device string for PyTorch tensor operations.
+    Arguments:
+        - device_type (:obj:`str`): Device type, one of 'npu', 'cuda', 'cpu'
+        - rank (:obj:`int`): Device rank for multi-device setups
+    Returns:
+        - device_str (:obj:`str`): Device string like 'npu:0', 'cuda:0', or 'cpu'
+    """
+    if device_type in ['npu', 'cuda']:
+        device_count = get_device_count(device_type)
+        device_id = rank % device_count if device_count > 0 else 0
+        return f'{device_type}:{device_id}'
+    else:
+        return 'cpu'
+
+
+def auto_device_init(cfg_device: Optional[str], rank: int = 0) -> Tuple[str, bool, str]:
+    """
+    Overview:
+        Initialize device settings based on config.
+        Supports automatic detection, explicit device type, or legacy 'cuda' boolean.
+    Arguments:
+        - cfg_device (:obj:`Optional[str]`): Device configuration from config.
+            Can be 'auto', 'npu', 'cuda', 'cpu', or None (defaults to 'auto')
+        - rank (:obj:`int`): Device rank for multi-device setups
+    Returns:
+        - device_type (:obj:`str`): Detected device type ('npu', 'cuda', or 'cpu')
+        - use_accelerator (:obj:`bool`): Whether an accelerator is being used
+        - device_str (:obj:`str`): Full device string for PyTorch operations
+    Examples:
+        >>> device_type, use_accelerator, device_str = auto_device_init('auto')
+        >>> # Returns ('npu', True, 'npu:0') if NPU available
+        >>> # Returns ('cuda', True, 'cuda:0') if GPU available
+        >>> # Returns ('cpu', False, 'cpu') otherwise
+    """
+    # Default to auto detection if not specified
+    if cfg_device is None or cfg_device == 'auto':
+        device_type, use_accelerator = get_available_device()
+    else:
+        # Explicit device type specified
+        device_type = cfg_device.lower()
+
+        # Validate the device type is available
+        if device_type == 'npu':
+            if TORCH_NPU_AVAILABLE and torch.npu.is_available():
+                use_accelerator = True
+                logger.info("Using NPU as explicitly configured")
+            else:
+                logger.warning("NPU requested but not available, falling back to CPU")
+                device_type = 'cpu'
+                use_accelerator = False
+        elif device_type == 'cuda':
+            if torch.cuda.is_available():
+                use_accelerator = True
+                logger.info("Using CUDA GPU as explicitly configured")
+            else:
+                logger.warning("CUDA requested but not available, falling back to CPU")
+                device_type = 'cpu'
+                use_accelerator = False
+        else:
+            # CPU or any other value
+            device_type = 'cpu'
+            use_accelerator = False
+            logger.info("Using CPU as configured")
+
+    device_str = get_device_string(device_type, rank)
+
+    return device_type, use_accelerator, device_str
+
+
+def is_npu_available() -> bool:
+    """
+    Overview:
+        Check if Huawei NPU is available.
+    Returns:
+        - available (:obj:`bool`): True if NPU is available
+    """
+    return TORCH_NPU_AVAILABLE and torch.npu.is_available()
+
+
+def is_cuda_available() -> bool:
+    """
+    Overview:
+        Check if NVIDIA CUDA GPU is available.
+    Returns:
+        - available (:obj:`bool`): True if CUDA is available
+    """
+    return torch.cuda.is_available()
diff --git a/dizoo/classic_control/cartpole/config/cartpole_ppo_config.py b/dizoo/classic_control/cartpole/config/cartpole_ppo_config.py
@@ -9,7 +9,7 @@
         stop_value=195,
     ),
     policy=dict(
-        cuda=False,
+        device='auto',  # Auto-detect NPU > GPU > CPU
         action_space='discrete',
         model=dict(
             obs_shape=4,