Skip to content

Commit a2038ca

Browse files
committed
Merge branch 'main' into release/2.6
2 parents f934283 + 5521907 commit a2038ca

File tree

18 files changed

+65
-110
lines changed

18 files changed

+65
-110
lines changed

requirements/framework.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@ aiohttp
44
attrdict
55
binpacking
66
dacite
7+
datasets>=3.0
78
einops
89
importlib_metadata
910
jieba
1011
matplotlib
12+
modelscope[datasets]>=1.19
1113
nltk
1214
numpy<2.0
1315
oss2

setup.py

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,9 @@
11
# Copyright (c) Alibaba, Inc. and its affiliates.
22
# !/usr/bin/env python
33
import os
4-
import shutil
54
from setuptools import find_packages, setup
65
from typing import List
76

8-
from packaging import version
9-
107

118
def readme():
129
with open('README.md', encoding='utf-8') as f:
@@ -118,25 +115,8 @@ def gen_packages_items():
118115
return gen_packages_items()
119116

120117

121-
def add_modelscope_requirement(install_requires: List[str]) -> None:
122-
# The future version will remove.
123-
try:
124-
import modelscope
125-
modelscope_version = modelscope.__version__
126-
except ImportError:
127-
modelscope_version = '1.18'
128-
129-
if version.parse(modelscope_version) >= version.parse('1.19'):
130-
install_requires.append('datasets>=3.0')
131-
install_requires.append('modelscope[datasets]>=1.19')
132-
else:
133-
install_requires.append('datasets<3.0')
134-
install_requires.append('modelscope[datasets]>=1.17,<1.19')
135-
136-
137118
if __name__ == '__main__':
138119
install_requires, deps_link = parse_requirements('requirements.txt')
139-
add_modelscope_requirement(install_requires)
140120
extra_requires = {}
141121
all_requires = []
142122
extra_requires['llm'], _ = parse_requirements('requirements/llm.txt')

swift/llm/utils/template.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1627,18 +1627,20 @@ def _encode(self, example: Dict[str, Any]) -> Tuple[Dict[str, Any], Dict[str, An
16271627
+ 1:]
16281628
added_tokens_len += token_len - 1
16291629
data.update(media_inputs)
1630-
1631-
inputs['input_ids'] = input_ids
1630+
# The architecture will be optimized in ms-swift3.0
1631+
data['input_ids'] = input_ids
16321632
inputs['labels'] = labels
1633-
data['input_ids'] = torch.tensor(input_ids)[None]
16341633
inputs['_data'] = data
1634+
inputs.update(data)
16351635
return inputs, {}
16361636

16371637
def _post_encode(self, model, data: Any) -> Dict[str, Any]:
1638+
if not self._is_training:
1639+
return data
16381640
_model = model.model
16391641
if not hasattr(_model, 'embed_tokens'):
16401642
_model = _model.model # LoRA
1641-
input_ids = data['input_ids']
1643+
input_ids = torch.tensor(data['input_ids'], device=model.device)[None]
16421644
pixel_values = data.get('pixel_values')
16431645
pixel_values_videos = data.get('pixel_values_videos')
16441646
inputs_embeds = _model.embed_tokens(input_ids)
@@ -1685,10 +1687,6 @@ def data_collator(self, batch: List[Dict[str, Any]], padding_to: Optional[int] =
16851687
res['position_ids'] = position_ids.contiguous()
16861688
return res
16871689

1688-
@staticmethod
1689-
def _get_generate_ids(generate_ids: List[int], input_token_len: int) -> List[int]:
1690-
return generate_ids
1691-
16921690

16931691
class Qwen2VLTemplate(_Qwen2VLTemplateMixin, QwenTemplate):
16941692
pass

swift/torchacc_utils.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,26 @@ def get_bucket_sizes(max_length: int) -> List[int]:
2727
the bucket sizes. If not set, we use a normal distribution bucketing with
2828
8 buckets.
2929
"""
30+
padding_p_base = 2
3031
if os.getenv('TORCHACC_DATA_BUCKETS') is not None:
3132
bucket_sizes = [int(x) for x in os.getenv('TORCHACC_DATA_BUCKETS').split(',')]
3233
bucket_sizes.append(max_length)
33-
else: # default normal distribution bucketing.
34-
mean = max_length // 2
35-
var = max_length // 8
36-
bucket_sizes = [mean + i * var for i in range(-3, 4)]
34+
else:
35+
if os.getenv('TORCHACC_CACHE_PATH') is not None: # padding strategy when persistent cache is enabled
36+
padding_p_base = 1.4
37+
padding_p_base = os.getenv('TORCHACC_PADDING_P_BASE', padding_p_base)
38+
try:
39+
padding_p_base = float(padding_p_base)
40+
except ValueError as e:
41+
logger.error(f'Expect TORCHACC_PADDINF_P_BASE to be a float number, but encountered {padding_p_base}')
42+
raise e
43+
bucket_sizes = [16, 32, 48, 64, 96, 128]
44+
base_size = 256
45+
while base_size < max_length:
46+
bucket_sizes.append((int(base_size) + 127) // 128 * 128)
47+
base_size *= padding_p_base
3748
bucket_sizes.append(max_length)
49+
3850
return bucket_sizes
3951

4052

swift/trainers/trainers.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,9 @@ def compute_loss(self, model, inputs, return_outputs=None, num_items_in_batch=No
213213
acc = torch.tensor(acc_list, device=preds.device).float().mean()
214214
else:
215215
if use_torchacc():
216-
ta_trim_graph()
216+
# Only enabled during evaluation/test
217+
if not model.training:
218+
ta_trim_graph()
217219
preds = preds.to('cpu')
218220
masks = masks.to('cpu')
219221
labels = labels.to('cpu')

swift/tuners/adapter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def _feed_forward_chunk(self, attention_output):
111111
setattr(module, f'adapter_{adapter_name}', adapter_module)
112112
logger.info(f'Adapter modules(module_key): {module_key}.adapter_{adapter_name}')
113113

114-
def state_dict_callback(state_dict, adapter_name: str):
114+
def state_dict_callback(state_dict, adapter_name: str, **kwargs):
115115
return {key: value for key, value in state_dict.items() if f'adapter_{adapter_name}' in key}
116116

117117
def mark_trainable_callback(model):

swift/tuners/base.py

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -359,29 +359,6 @@ def from_pretrained(cls,
359359
continue
360360
state_dict = cls.load_state_file(sub_folder)
361361
if state_dict is not None:
362-
model_is_qlora = len([
363-
k for k in self.state_dict().keys()
364-
if k.endswith(f'.lora_A.{_adapter}.weight') or k.endswith(f'.lora_B.{_adapter}.weight')
365-
])
366-
if not model_is_qlora:
367-
# model is lora, state_dict: qlora->lora
368-
state_dict = {
369-
k[:-len(f'.{_name}.weight') if k.endswith(f'.lora_A.{_name}.weight') or k.
370-
endswith(f'.lora_B.{_name}.weight') else None]: v
371-
for k, v in state_dict.items()
372-
}
373-
if any(['loramodule' in key for key in state_dict]):
374-
# Compatible with old checkpoints before ms-swift:1.5.0
375-
state_dict = {
376-
key.replace(f'loramodule_{_name}.lora_A', 'lora_A') if f'loramodule_{_name}.lora_A.{_name}'
377-
in key else key.replace(f'loramodule_{_name}.lora_A', f'lora_A.{_name}.weight'): value
378-
for key, value in state_dict.items()
379-
}
380-
state_dict = {
381-
key.replace(f'loramodule_{_name}.lora_B', 'lora_B') if f'loramodule_{_name}.lora_B.{_name}'
382-
in key else key.replace(f'loramodule_{_name}.lora_B', f'lora_B.{_name}.weight'): value
383-
for key, value in state_dict.items()
384-
}
385362
if isinstance(adapter_name, dict):
386363
# TODO this logic is fragile! replace `_name` may cause other parts replaced
387364
state_dict = {key.replace(_name, adapter_name[_name]): value for key, value in state_dict.items()}

swift/tuners/llamapro.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def prepare_model(model: nn.Module, config: LLaMAProConfig, adapter_name: str) -
7777
model.config.num_hidden_layers = len(new_module_list)
7878
LLaMAPro._set_module_list(config, model, new_module_list)
7979

80-
def state_dict_callback(state_dict, adapter_name):
80+
def state_dict_callback(state_dict, adapter_name, **kwargs):
8181
model_key_mapping = LLaMAPro.get_model_key_mapping(config.model_type, config)
8282
new_module_list = [model_key_mapping.module_list + f'.{i}' for i in new_module_idx]
8383
return {

swift/tuners/longlora/longlora.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def prepare_model(model: nn.Module, config: LongLoRAConfig, adapter_name: str):
5151
"""Prepare a model with `LongLoRAConfig`"""
5252
LoraModel(model, config, adapter_name)
5353

54-
def state_dict_callback(state_dict, adapter_name):
54+
def state_dict_callback(state_dict, adapter_name, **kwargs):
5555
_state_dict = lora_state_dict(state_dict, adapter_name, config.bias)
5656
for name, value in state_dict.items():
5757
if isinstance(config.embedder_and_normalizer, str):

swift/tuners/lora.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def prepare_model(model: nn.Module, config: LoRAConfig, adapter_name: str):
8181
config.group_size = getattr(auto_gptq_config, 'group_size', None)
8282
LoraModel(model, config, adapter_name)
8383

84-
def state_dict_callback(state_dict, adapter_name, cfg=None):
84+
def state_dict_callback(state_dict, adapter_name, cfg=None, **kwargs):
8585
return lora_state_dict(state_dict, adapter_name, cfg.bias if cfg else config.bias)
8686

8787
def mark_trainable_callback(model, cfg=None):

0 commit comments

Comments
 (0)