add cuda() with device id

jayfeather9 · jayfeather9 · commit 64ccb4feaddd · 2025-02-16T15:59:38.000+08:00
diff --git a/lightllm/common/basemodel/layer_weights/meta_weights/mm_weight.py b/lightllm/common/basemodel/layer_weights/meta_weights/mm_weight.py
@@ -76,7 +76,7 @@ def _post_load_weights(self) -> None:
                     if self.weight_scale.ndim > 1:
                         self.weight_scale = self.weight_scale.transpose(0, 1).cuda(get_current_device_id())
                     self.weight = [
-                        self.weight.transpose(0, 1).cuda(),
+                        self.weight.transpose(0, 1).cuda(get_current_device_id()),
                         self.weight_scale,
                         self.input_scale,
                     ]
@@ -151,7 +151,7 @@ def load_hf_weights(self, weights: Dict[str, torch.Tensor]) -> None:
 
         if self.act_scale_name is not None and self.act_scale_name in weights:
             input_scale = weights[self.act_scale_name].to(torch.float)
-            self.input_scale = input_scale.cuda()
+            self.input_scale = input_scale.cuda(get_current_device_id())
 
         if weight is None and weight_scale is None and input_scale is None:
             return
@@ -213,7 +213,7 @@ def load_hf_weights(self, weights: Dict[str, torch.Tensor]) -> None:
 
         if self.static_activation and self.act_scale_name in weights:
             input_scale = weights[self.act_scale_name].to(torch.float)
-            self.input_scale = input_scale.cuda()
+            self.input_scale = input_scale.cuda(get_current_device_id())
 
         if weight is None and weight_scale is None and input_scale is None:
             return
@@ -291,13 +291,13 @@ def _fuse(self) -> None:
             delattr(self, "weights")
 
         if self.weight_scale is None and (None not in self.weight_scales):
-            self.weight_scale = torch.cat(self.weight_scales, dim=0).cuda()
+            self.weight_scale = torch.cat(self.weight_scales, dim=0).cuda(get_current_device_id())
             self._post_load_weights()
             delattr(self, "weight_scales")
 
         if self.static_activation and self.input_scale is None and (None not in self.input_scales):
             input_scales = torch.stack(self.input_scales, dim=0)
-            self.input_scale = torch.max(input_scales).cuda()
+            self.input_scale = torch.max(input_scales).cuda(get_current_device_id())
             self._post_load_weights()
             delattr(self, "input_scales")
 
@@ -528,7 +528,7 @@ def load_hf_weights(self, weights: Dict[str, torch.Tensor]) -> None:
 
         if self.act_scale_name is not None and self.act_scale_name in weights:
             input_scale = weights[self.act_scale_name].to(torch.float)
-            self.input_scale = input_scale.cuda()
+            self.input_scale = input_scale.cuda(get_current_device_id())
 
         if weight is None and weight_scale is None and input_scale is None:
             return