diff --git a/swift/llm/model/utils.py b/swift/llm/model/utils.py index 2eceef8c0c..8e89afa281 100644 --- a/swift/llm/model/utils.py +++ b/swift/llm/model/utils.py @@ -254,22 +254,12 @@ def safe_snapshot_download(model_id_or_path: str, hub = get_hub(use_hf) if model_id_or_path.startswith('~'): model_id_or_path = os.path.abspath(os.path.expanduser(model_id_or_path)) - with safe_ddp_context(hash_id=model_id_or_path): - if os.path.exists(model_id_or_path): - model_dir = model_id_or_path - sub_folder = None - else: - if model_id_or_path.startswith('/'): # startswith - raise ValueError(f"path: '{model_id_or_path}' not found") - model_id_or_path = model_id_or_path.split(':', 1) # get sub_folder - if len(model_id_or_path) == 1: - model_id_or_path = [model_id_or_path[0], None] - model_id_or_path, sub_folder = model_id_or_path - if sub_folder is not None: - kwargs['allow_patterns'] = [f"{sub_folder.rstrip('/')}/*"] - model_dir = hub.download_model(model_id_or_path, revision, ignore_patterns, token=hub_token, **kwargs) - - logger.info(f'Loading the model using model_dir: {model_dir}') + + if os.path.exists(model_id_or_path): + model_dir = model_id_or_path + sub_folder = None + else: + raise SystemError(f'Model path {model_id_or_path} does not exist.') model_dir = os.path.abspath(os.path.expanduser(model_dir)) if sub_folder: diff --git a/swift/llm/template/template/qwen.py b/swift/llm/template/template/qwen.py index 73fb0ce420..2884cb784c 100644 --- a/swift/llm/template/template/qwen.py +++ b/swift/llm/template/template/qwen.py @@ -211,6 +211,7 @@ def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: processor = self.processor input_ids = encoded['input_ids'] labels = encoded['labels'] + loss_scale = encoded.get('loss_scale', None) images = inputs.images videos = inputs.videos for media_type in ['images', 'videos']: @@ -232,6 +233,7 @@ def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: ] * len(media_grid_thw) idx_list = findall(input_ids, media_token) added_tokens_len = 0 + for i, idx in enumerate(idx_list): merge_length = processor.image_processor.merge_size**2 token_len = (media_grid_thw[i].prod() // merge_length) @@ -241,11 +243,19 @@ def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]: if labels: labels = labels[:idx + added_tokens_len] + [-100] * token_len + labels[added_tokens_len + idx + 1:] + if loss_scale: + scale_idx = loss_scale[idx+added_tokens_len] + loss_scale = loss_scale[:idx + added_tokens_len] + \ + [scale_idx] * token_len + \ + loss_scale[added_tokens_len + idx + 1:] + added_tokens_len += token_len - 1 encoded.update(media_inputs) encoded['input_ids'] = input_ids encoded['labels'] = labels + if loss_scale: + encoded['loss_scale'] = loss_scale return encoded def _post_encode(self, model, inputs: Dict[str, Any]) -> Dict[str, Any]: